12 Supplementary Figure 07
12.1 Summary
This is the accessory documentation of Supplementary Figure 07.
The Figure can be recreated by running the R script S07.R
:
cd $WORK/3_figures/F_scripts
Rscript --vanilla S07.R
rm Rplots.pdf
12.2 Details of S07.R
In principal the S07.R
script is a reduced versions of the F2.R
script.
In contrast to F2.R
, in this version only the global species comparisons are loaded. Additionally, the outlier threshold is lowered to the 99.90 FST percentile (as compared to the 99.98 percentile).
Is an executable R script that depends on a variety of image manipulation and data managing packages.
library(grid)
library(gridSVG)
library(grImport2)
library(grConvert)
library(tidyverse)
library(cowplot)
library(hrbrthemes)
12.2.1 Reading in Data
Supplementary Figure 07 depends on the results of the FST computation by VCFtools
in broad resolution (50 kb windows, 5 kb increments). The results of the individual species pairs are loaded individually, the genomic positions added to the loci and details about the species comparisons added.
Further more, the extent of the individual LGs in loaded.
karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
mutate(GSTART=lag(cumsum(END),n = 1,default = 0),
GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
select(CHROM,GSTART,GEND,GROUP)
# global -------------
pn <- read.csv('../../2_output/08_popGen/05_fst/pue-nig.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PN');
pu <- read.csv('../../2_output/08_popGen/05_fst/pue-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PU');
nu <- read.csv('../../2_output/08_popGen/05_fst/nig-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NU');
The individual species comparisons are combined into a single data set. Then the order of the species comparisons and of the comparison types are set.
# ------------------------------------------
data <- rbind(pn,pu,nu)
data$RUN <- factor(as.character(data$RUN),
levels=c('PN','NU','PU'))
data$COL <- factor(as.character(data$COL),levels=c('PN','PU','NU'))
12.2.2 Computing 99.90 FST percentiles
The upper 99.90 FST percentile is computed per run. Then windows above this threshold are identified for each global run.
threshs <- data %>% group_by(RUN) %>% summarise(thresh=quantile(WEIGHTED_FST,.999))
data2 <- data %>% filter(RUN %in% levels(data$RUN)[1:3]) %>% merge(.,threshs,by='RUN',all.x=T) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>% filter(OUTL) %>% group_by(RUN) %>%
mutate(CHECK=cumsum(1-(BIN_START-lag(BIN_START,default = 0)==5000)),ID=paste(RUN,'-',CHECK,sep='')) %>%
ungroup() %>% group_by(ID) %>%
summarise(CHROM=CHROM[1],
xmin = min(BIN_START+GSTART),
xmax=max(BIN_END+GSTART),
xmean=mean(GPOS),
LGmin=min(BIN_START),
LGmax=min(BIN_END),
LGmean=min(POS),
RUN=RUN[1],COL=COL[1]) %>%
ungroup() %>% mutate(muskS = letters[as.numeric(as.factor(xmin))],
musk=c('E',NA,NA,NA,'F',"A",NA,"C",NA,"D","H", # NU
"E",NA,NA,NA,"G","A",NA,"B","C","D","D", # PN
"F","C","D","H",rep(NA,4),NA,NA,"B","C")); # PU
musks <- data2 %>% group_by(musk) %>% summarise(MUSKmin=min(xmin),
MUSKmax=max(xmax),
MUSKminLG=min(LGmin),
MUSKmaxLG=max(LGmax)) %>%
merge(data2[,c(1,9,10,12)],.,by='musk') %>% filter(!is.na(musk)) %>%
mutate(muskX = ifelse(musk %in% c("A","B","E","F"),MUSKmin-1e+07,MUSKmax+1e+07))
For plotting, colors,labels and the scaling of the secondary axis are pre-defined.
clr <- c('#fb8620','#d93327','#1b519c')
annoclr <- rgb(.4,.4,.4)
lgclr <- rgb(.9,.9,.9)
yl <- expression(italic('F'[ST]))
The base plot is done with ggplot()
.
p1 <- ggplot()+
# background shading for LGs
geom_rect(inherit.aes = F,data=karyo,aes(xmin=GSTART,xmax=GEND,
ymin=-Inf,ymax=Inf,fill=GROUP))+
# vertical lines for outlier windows
geom_vline(data=data2,aes(xintercept=xmean),col=annoclr,lwd=.2)+
# Fst values
geom_point(data=data,aes(x=GPOS,y=WEIGHTED_FST,col=COL),size=.01)+
# labels for the outlier windows
geom_text(data=(musks %>% filter(!is.na(musk))),aes(x=muskX,y=.65,label=musk),size=3)+
# formatting the axes and color map
scale_y_continuous(name = yl,breaks=0:4*0.2,labels = c(0,'',0.4,'',0.8))+
scale_x_continuous(expand = c(0,0),breaks = (karyo$GSTART+karyo$GEND)/2,labels = 1:24,position = "top")+
scale_color_manual(name='comparison',values=clr)+
scale_fill_manual(values = c(NA,lgclr),guide=F)+
# general plot theme
theme_ipsum(grid = F,
axis_title_just = 'c',
axis_title_family = 'bold',
strip_text_size = 9)+
theme(plot.margin = unit(c(0,22,16,0),'pt'),
panel.spacing.y = unit(3,'pt'),
legend.position = 'none',
axis.title.x = element_blank(),
axis.ticks.y = element_line(color = annoclr),
axis.line.y = element_line(color = annoclr),
axis.text.y = element_text(margin = unit(c(0,3,0,0),'pt')),
axis.title.y = element_text(vjust = -1),
strip.background = element_blank(),
strip.text = element_blank()
)+
facet_grid(RUN~.)
External images are loaded for annotation.
legGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/legend-pw-cairo.svg"))))
carGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/caribbean-cairo.svg"))))
# variables for annotation placement
ys <- .08
ysc <- .0057
yd <- .856
boxes = data.frame(x=.9675,y=ys)
And finally the base plot is combined with the external annotations.
S07 <- ggdraw(p1)+
geom_rect(data = boxes, aes(xmin = x, xmax = x + .02, ymin = y, ymax = y + yd),
colour = NA, fill = annoclr)+
draw_label('Global', x = .9775, y = boxes$y+.42,
size = 9, angle=-90,col='white')+
draw_grob(legGrob, 0.01, 0, 1, 0.08)+
draw_grob(carGrob, 0.9332, boxes$y+.88*yd, .09, .09)
The figure is then exported using ggsave()
.
ggsave(plot = S07,filename = '../output/S07.png',width = 183,height = 70,dpi = 200,units = 'mm')