4 Figure 2
4.1 Summary
This is the accessory documentation of Figure 2.
The Figure can be recreated by running the R script F2.R
:
cd $WORK/3_figures/F_scripts
Rscript --vanilla F2.R
rm Rplots.pdf
4.2 Details of F2.R
In the following, the individual steps of the R script are documented. Is an executable R script that depends on a variety of image manipulation and data managing packages.
library(grid)
library(gridSVG)
library(grImport2)
library(grConvert)
library(tidyverse)
library(cowplot)
library(hrbrthemes)
4.2.1 Reading in Data
Figure 2 depends on the results of the FST computation by VCFtools
in broad resolution (50 kb windows, 5 kb increments). The results of the individual species pairs are loaded individually, the genomic positions added to the loci and details about the species comparisons added.
Further more, the extent of the individual LGs in loaded.
karyo <- read.csv('../../0_data/0_resources/F2.karyo.txt',sep='\t') %>%
mutate(GSTART=lag(cumsum(END),n = 1,default = 0),
GEND=GSTART+END,GROUP=rep(letters[1:2],12)) %>%
select(CHROM,GSTART,GEND,GROUP)
# Global -------------
pn <- read.csv('../../2_output/08_popGen/05_fst/pue-nig.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PN');
pu <- read.csv('../../2_output/08_popGen/05_fst/pue-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PU');
nu <- read.csv('../../2_output/08_popGen/05_fst/nig-uni.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NU');
# Panama -------------
ppnp <- read.csv('../../2_output/08_popGen/05_fst/pueboc-nigboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PPNP');
ppup <- read.csv('../../2_output/08_popGen/05_fst/pueboc-uniboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PPUP');
npup <- read.csv('../../2_output/08_popGen/05_fst/nigboc-uniboc.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NPUP');
# Belize -------------
pbnb <- read.csv('../../2_output/08_popGen/05_fst/puebel-nigbel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PBNB');
pbub <- read.csv('../../2_output/08_popGen/05_fst/puebel-unibel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PBUB');
nbub <- read.csv('../../2_output/08_popGen/05_fst/nigbel-unibel.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NBUB');
# Honduras -------------
phnh <- read.csv('../../2_output/08_popGen/05_fst/puehon-nighon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PN',RUN='PHNH');
phuh <- read.csv('../../2_output/08_popGen/05_fst/puehon-unihon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='PU',RUN='PHUH');
nhuh <- read.csv('../../2_output/08_popGen/05_fst/nighon-unihon.50kb.5kb.windowed.weir.fst',sep='\t') %>%
merge(.,(karyo %>% select(-GEND,-GROUP)),by='CHROM',allx=T) %>%
mutate(POS=(BIN_START+BIN_END)/2,GPOS=POS+GSTART,COL='NU',RUN='NHUH');
The individual species comparisons are combined into a single data set. Then the order of the species comparisons and of the comparison types are set.
data <- rbind(pn,pu,nu,ppnp,ppup,npup,pbnb,pbub,nbub,phnh,phuh,nhuh)
data$RUN <- factor(as.character(data$RUN),
levels=c('PN','NU','PU','PPNP','NPUP','PPUP','PBNB','NBUB','PBUB','PHNH','NHUH','PHUH'))
data$COL <- factor(as.character(data$COL),levels=c('PN','PU','NU'))
Then, the genome wide weighted mean of FST is loaded.
gwFST <- read.csv('../../2_output/08_popGen/05_fst/genome_wide_weighted_mean_fst.txt',sep='\t')
gwFST$RUN <- factor(gwFST$run,levels=levels(data$RUN))
4.2.2 Computing 99.98 FST percentiles
The upper 99.98 FST percentile is computed per run. Then windows above this threshold are identified for each global run.
threshs <- data %>% group_by(RUN) %>% summarise(thresh=quantile(WEIGHTED_FST,.9998))
data2 <- data %>%
filter(RUN %in% levels(data$RUN)[1:3]) %>%
merge(.,threshs,by='RUN',all.x=T) %>%
mutate(OUTL = (WEIGHTED_FST>thresh)) %>%
filter(OUTL) %>%
group_by(RUN) %>%
mutate(CHECK=cumsum(1-(BIN_START-lag(BIN_START,default = 0)==5000)),ID=paste(RUN,'-',CHECK,sep='')) %>%
ungroup() %>%
group_by(ID) %>%
summarise(CHROM=CHROM[1],
xmin = min(BIN_START+GSTART),
xmax=max(BIN_END+GSTART),
xmean=mean(GPOS),
LGmin=min(BIN_START),
LGmax=min(BIN_END),
LGmean=min(POS),
RUN=RUN[1],COL=COL[1]) %>%
ungroup() %>% mutate(muskS = letters[as.numeric(as.factor(xmin))],
musk=LETTERS[c(1,3,4,4,1,2,2,3)])
musks <- data2 %>% group_by(musk) %>% summarise(MUSKmin=min(xmin),
MUSKmax=max(xmax),
MUSKminLG=min(LGmin),
MUSKmaxLG=max(LGmax)) %>%
merge(data2,.,by='musk') %>% mutate(muskX = ifelse(musk %in% c("A","B"),
MUSKmin-1e+07,MUSKmax+1e+07))
4.2.3 Plotting
For plotting, colors,labels and the scaling of the secondary axis are pre-defined.
clr <- c('#fb8620','#d93327','#1b519c')
annoclr <- rgb(.4,.4,.4)
lgclr <- rgb(.9,.9,.9)
yl <- expression(italic('F'[ST]))
secScale <- 20
The base plot is done with ggplot()
.
p1 <- ggplot()+
# background shading for LGs
geom_rect(inherit.aes = F,data=karyo,aes(xmin=GSTART,xmax=GEND,
ymin=-Inf,ymax=Inf,fill=GROUP))+
# vertical lines for outlier windows
geom_vline(data=data2,aes(xintercept=xmean),col=annoclr,lwd=.2)+
# Fst values
geom_point(data=data,aes(x=GPOS,y=WEIGHTED_FST,col=COL),size=.01)+
# labels for the outlier windows
geom_text(data=musks,aes(x=muskX,y=.65,label=musk))+
# vertical line separating sliding Fst from genome wide Fst
geom_vline(data = data.frame(x=567000000),aes(xintercept=x),col=annoclr,lwd=.2)+
# genome wide Fst
geom_rect(data=gwFST,aes(xmin=570*10^6,xmax=573*10^6,ymin=0,ymax=gwFST*secScale))+
# layout of y axis and secondary y axis
scale_y_continuous(name = yl,limits=c(-.03,.83),
breaks=0:4*0.2,labels = c(0,'',0.4,'',0.8),
sec.axis = sec_axis(~./secScale,labels = c(0,'',.02,'',.04)))+
# layout of x axis
scale_x_continuous(expand = c(0,0),limits = c(0,577*10^6),
breaks = c((karyo$GSTART+karyo$GEND)/2,571*10^6),
labels = c(1:24,"g.w."),position = "top")+
# set color map
scale_color_manual(name='comparison',values=clr)+
# set fill of LG backgrounds
scale_fill_manual(values = c(NA,lgclr),guide=F)+
# pre-defined plot theme
theme_ipsum(grid = F,
axis_title_just = 'c',
axis_title_family = 'bold',
strip_text_size = 9)+
# tweaks of the plot theme
theme(plot.margin = unit(c(2,25,40,0),'pt'),
panel.spacing.y = unit(3,'pt'),
legend.position = 'none',
axis.title.x = element_blank(),
axis.ticks.y = element_line(color = annoclr),
axis.line.y = element_line(color = annoclr),
axis.text.y = element_text(margin = unit(c(0,3,0,0),'pt')),
axis.title.y = element_text(vjust = -1),
strip.background = element_blank(),
strip.text = element_blank())+
# separate the plots of the individual species pairs
facet_grid(RUN~.)
External images are loaded for annotation.
legGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/legend-pw-cairo.svg"))))
carGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/caribbean-cairo.svg"))))
panGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/panama-cairo.svg"))))
belGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/belize-cairo.svg"))))
honGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/honduras-cairo.svg"))))
# variables for annotation placement
ys <- .0765
ysc <- .0057
yd <- .2195
boxes = data.frame(x=rep(.9675,4),y=c(ys,ys+yd+ysc,ys+(yd+ysc)*2,ys+(yd+ysc)*3))
4.2.4 Composing the final figure
And finally the base plot is combined with the external annotations.
F2 <- ggdraw(p1)+
geom_rect(data = boxes, aes(xmin = x, xmax = x + .02, ymin = y, ymax = y + yd),
colour = NA, fill = c(rep(lgclr,3),annoclr))+
draw_label('Global', x = .9775, y = boxes$y[4]+.08,
size = 9, angle=-90,col='white')+
draw_label('Belize', x = .9775, y = boxes$y[2]+.08,
size = 9, angle=-90)+
draw_label('Honduras', x = .9775, y = boxes$y[1]+.08,
size = 9, angle=-90)+
draw_label('Panama', x = .9775, y = boxes$y[3]+.08,
size = 9, angle=-90)+
draw_grob(legGrob, 0.01, 0, 1, 0.08)+
draw_grob(carGrob, 0.954, boxes$y[4]+.78*yd, .045, .045)+
draw_grob(belGrob, 0.954, boxes$y[2]+.78*yd, .045, .045)+
draw_grob(honGrob, 0.954, boxes$y[1]+.78*yd, .045, .045)+
draw_grob(panGrob, 0.954, boxes$y[3]+.78*yd, .045, .045)
The FST outlier windows are then exported to be used as reference in other figures.
write.table(x = data2, file = '../../0_data/0_resources/fst_outlier_windows.txt',sep='\t',quote = F,row.names = F)
write.table(x = musks, file = '../../0_data/0_resources/fst_outlier_ID.txt',sep='\t',quote = F,row.names = F)
The figure is then exported using ggsave()
.
ggsave(plot = F2,filename = '../output/F2.png',width = 183,height = 183,dpi = 200,units = 'mm')
#ggsave('fst_mac02_weight_99.98.eps',width = 183,height = 183,dpi = 200,units = 'mm')