12 Supplementary Figure 07

12.1 Summary

This is the accessory documentation of Supplementary Figure 07.

The Figure can be recreated by running the R script S07.R:

cd $WORK/3_figures/F_scripts

Rscript --vanilla S07.R
rm Rplots.pdf

12.2 Details of S07.R

In principal the S07.R script is a reduced versions of the F2.R script.

In contrast to F2.R, in this version only the global species comparisons are loaded. Additionally, the outlier threshold is lowered to the 99.90 FST percentile (as compared to the 99.98 percentile).

Is an executable R script that depends on a variety of image manipulation and data managing packages.

library(grid)
library(gridSVG)
library(grImport2)
library(grConvert)
library(tidyverse)
library(cowplot)
library(hrbrthemes)

12.2.1 Reading in Data

Supplementary Figure 07 depends on the results of the FST computation by VCFtools in broad resolution (50 kb windows, 5 kb increments). The results of the individual species pairs are loaded individually, the genomic positions added to the loci and details about the species comparisons added.

Further more, the extent of the individual LGs in loaded.

karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
  mutate(GSTART=lag(cumsum(END),n = 1,default = 0),
         GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
  select(CHROM,GSTART,GEND,GROUP)
# global -------------
pn <- read.csv('../../2_output/08_popGen/05_fst/pue-nig.50kb.5kb.windowed.weir.fst',sep='\t') %>%
  merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
  mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PN');

pu <- read.csv('../../2_output/08_popGen/05_fst/pue-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
  merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
  mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PU');

nu <- read.csv('../../2_output/08_popGen/05_fst/nig-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
  merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
  mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NU');

The individual species comparisons are combined into a single data set. Then the order of the species comparisons and of the comparison types are set.

# ------------------------------------------
data <- rbind(pn,pu,nu)
data$RUN <- factor(as.character(data$RUN),
                   levels=c('PN','NU','PU'))
data$COL <- factor(as.character(data$COL),levels=c('PN','PU','NU'))

12.2.2 Computing 99.90 FST percentiles

The upper 99.90 FST percentile is computed per run. Then windows above this threshold are identified for each global run.

threshs <- data %>% group_by(RUN) %>% summarise(thresh=quantile(WEIGHTED_FST,.999))

data2 <- data %>% filter(RUN %in% levels(data$RUN)[1:3]) %>% merge(.,threshs,by='RUN',all.x=T) %>%
  mutate(OUTL = (WEIGHTED_FST>thresh)) %>% filter(OUTL) %>% group_by(RUN) %>%
  mutate(CHECK=cumsum(1-(BIN_START-lag(BIN_START,default = 0)==5000)),ID=paste(RUN,'-',CHECK,sep='')) %>%
  ungroup() %>% group_by(ID) %>%
  summarise(CHROM=CHROM[1],
            xmin = min(BIN_START+GSTART),
            xmax=max(BIN_END+GSTART),
            xmean=mean(GPOS),
            LGmin=min(BIN_START),
            LGmax=min(BIN_END),
            LGmean=min(POS),
            RUN=RUN[1],COL=COL[1]) %>%
  ungroup() %>% mutate(muskS = letters[as.numeric(as.factor(xmin))],
                       musk=c('E',NA,NA,NA,'F',"A",NA,"C",NA,"D","H",       # NU
                              "E",NA,NA,NA,"G","A",NA,"B","C","D","D",      # PN
                              "F","C","D","H",rep(NA,4),NA,NA,"B","C"));    # PU

musks <- data2 %>% group_by(musk) %>% summarise(MUSKmin=min(xmin),
                                                MUSKmax=max(xmax),
                                                MUSKminLG=min(LGmin),
                                                MUSKmaxLG=max(LGmax)) %>%
  merge(data2[,c(1,9,10,12)],.,by='musk') %>% filter(!is.na(musk)) %>%
  mutate(muskX = ifelse(musk %in% c("A","B","E","F"),MUSKmin-1e+07,MUSKmax+1e+07))

For plotting, colors,labels and the scaling of the secondary axis are pre-defined.

clr <- c('#fb8620','#d93327','#1b519c')
annoclr <- rgb(.4,.4,.4)
lgclr <- rgb(.9,.9,.9)
yl <- expression(italic('F'[ST]))

The base plot is done with ggplot().

p1 <- ggplot()+
  # background shading for LGs
  geom_rect(inherit.aes = F,data=karyo,aes(xmin=GSTART,xmax=GEND,
                                           ymin=-Inf,ymax=Inf,fill=GROUP))+
  # vertical lines for outlier windows
  geom_vline(data=data2,aes(xintercept=xmean),col=annoclr,lwd=.2)+
  # Fst values
  geom_point(data=data,aes(x=GPOS,y=WEIGHTED_FST,col=COL),size=.01)+
  # labels for the outlier windows
  geom_text(data=(musks %>% filter(!is.na(musk))),aes(x=muskX,y=.65,label=musk),size=3)+
  # formatting the axes and color map
  scale_y_continuous(name = yl,breaks=0:4*0.2,labels = c(0,'',0.4,'',0.8))+
  scale_x_continuous(expand = c(0,0),breaks = (karyo$GSTART+karyo$GEND)/2,labels = 1:24,position = "top")+
  scale_color_manual(name='comparison',values=clr)+
  scale_fill_manual(values = c(NA,lgclr),guide=F)+
  # general plot theme
  theme_ipsum(grid = F,
              axis_title_just = 'c',
              axis_title_family = 'bold',
              strip_text_size = 9)+
  theme(plot.margin = unit(c(0,22,16,0),'pt'),
        panel.spacing.y = unit(3,'pt'),
        legend.position = 'none',
        axis.title.x = element_blank(),
        axis.ticks.y = element_line(color = annoclr),
        axis.line.y =  element_line(color = annoclr),
        axis.text.y = element_text(margin = unit(c(0,3,0,0),'pt')),
        axis.title.y = element_text(vjust = -1),
        strip.background = element_blank(),
        strip.text = element_blank()
        )+
  facet_grid(RUN~.)

External images are loaded for annotation.

legGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/legend-pw-cairo.svg"))))
carGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/caribbean-cairo.svg"))))
# variables for annotation placement
ys <- .08
ysc <- .0057
yd <- .856
boxes = data.frame(x=.9675,y=ys)

And finally the base plot is combined with the external annotations.

S07 <- ggdraw(p1)+
  geom_rect(data = boxes, aes(xmin = x, xmax = x + .02, ymin = y, ymax = y + yd),
            colour = NA, fill = annoclr)+
  draw_label('Global', x = .9775, y = boxes$y+.42,
             size = 9, angle=-90,col='white')+
  draw_grob(legGrob, 0.01, 0, 1, 0.08)+
  draw_grob(carGrob, 0.9332, boxes$y+.88*yd, .09, .09)

The figure is then exported using ggsave().

ggsave(plot = S07,filename = '../output/S07.png',width = 183,height = 70,dpi = 200,units = 'mm')