3 Figure 1

3.1 Summary

This is the accessory documentation of Figure 1. The Figure can be recreated by running the R script F1.R:

cd $WORK/3_figures/F_scripts

Rscript --vanilla F1.R
rm Rplots.pdf

3.2 Details of `F1.R`

In the following, the individual steps of the R script are documented. Is an executable R script that depends on a variety of image manipulation and geodata packages. Additionally, the supporting R script (F1.functions.R) in needed:

library(sp)
library(maptools)
library(scales)
library(PBSmapping)
library(RColorBrewer)
library(tidyverse)
library(ggforce)
library(cowplot)
library(scatterpie)
library(hrbrthemes)
library(cowplot)
library(grid)
library(gridSVG)
library(grImport2)
library(grConvert)
library(ggmap)
library(marmap)
source("../../0_data/0_scripts/F1.functions.R")

3.2.1 Fig. 1 A)

First, we load the data sheet containing the sampling location as well as the population specifics for each sample and sub sample it to include only the samples included in the re-sequencing study.

The Coordinates need to be transformed from (deg min sec) to decimal degrees. The sample inventory is summarized over populations and merged with the coordinates later used for the sample pointers.

dataAll <- read.csv('../../0_data/0_resources/F1.sample.txt',sep='\t') %>% mutate(loc=substrRight(as.character(id),3))
data <- dataAll %>% filter(sample=="sample")
dataSum <- data%>% rowwise()%>% mutate(nn = sum(as.numeric(strsplit(as.character(`Latittude.N`),' ')[[1]])*c(1,1/60,1/3600)),
                                       ww = sum(as.numeric(strsplit(as.character(`Longitude.W`),' ')[[1]])*c(1,1/60,1/3600))) %>%
  group_by(loc) %>% summarise(n=mean(nn,na.rm = T),w=-mean(ww,na.rm = T)) %>%
  as.data.frame() %>% cbind(.,read.csv('../../0_data/0_resources/F1.pointer.csv'))

Then, we read in the coordinates of helper points that will be used to draw the bezier curves of the sample pointers. We also set the spacial range of the map.

crvs <- read.csv('../../0_data/0_resources/F1.curve.csv')
xlimW = c(-100,-53); ylimW = c(7,30.8)

The base map relies on the world data from the maps package. This data is loaded and clipped to the extend of the plotting area.

worldmap = map_data("world")
names(worldmap) <- c("X","Y","PID","POS","region","subregion")
worldmap = clipPolys(worldmap, xlim=xlimW,ylim=ylimW, keepExtra=TRUE)

External images are loaded for annotation. This includes the hamlet images, the country flags and legend.

# compas and legend
compGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/north-cairo.svg"))))
legGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/legend-map-cairo.svg"))))
# hamlets
nigGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/nigricans-cairo.svg"))))
pueGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/puella-cairo.svg"))))
uniGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/unicolor-cairo.svg"))))
# flags
belGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/belize-cairo.svg"))))
honGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/honduras-cairo.svg"))))
panGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/panama-cairo.svg"))))
# globe and pairwise legend
globGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/caribbean-cairo.svg"))))
legPGrob <- gTree(children=gList(pictureGrob(readPicture("../../0_data/0_img/legend_pairs-cairo.svg"))))

We create a summary data table, containing the sample size of each population and the position for the annotation within the map.

ypos = c(2,1,0)*1.6;
xpos=c(1,0,1)*4.57
sampSize <- data %>% group_by(spec,loc) %>% summarise(n=length(id)) %>% filter(spec!='gum') %>% arrange(loc) %>%
  cbind(y=c(rep(24.4,3)+ypos, # belize
            rep(13.6,3)+ypos, # panama
            rep(9.2,3)+ypos), # honduras
        x=c(rep(-94.7,3)+xpos, # belize
            rep(-78,3)+xpos, # panama
            rep(-98.75,3)+xpos)) # honduras

Next, we read in the shape files containing the distribution of the hamlet species. One file contains the overlap of the distribution ranges of H. nigricans, H. puella and H. unicolor, while the other contains the distribution range of the whole genus (Hypoplectrus). These distributions rely on the data provided by the Smithsonian Tropical Research Institute that were processed using gimp and qgis (including the Georeferencer Plugin)

uS <- readShapeSpatial('../../0_data/0_resources/F1_shps/F1.npu_distribution.shp');
uS_f <- fortify(uS,region="DN");
names(uS_f) <- c("X","Y","POS","hole","piece","id","PID"); uS_f$PID <- as.numeric(uS_f$PID)
uS_f <- clipPolys(uS_f, xlim=xlimW,ylim=ylimW, keepExtra=TRUE)

hS <- readShapeSpatial('../../0_data/0_resources/F1_shps/F1.hypoplectrus_distribution.shp');
hS_f <- fortify(hS,region="DN");
names(hS_f) <- c("X","Y","POS","hole","piece","id","PID"); hS_f$PID <- as.numeric(hS_f$PID)
hS_f <- clipPolys(hS_f, xlim=xlimW,ylim=ylimW, keepExtra=TRUE)

We set some colors used for plotting:

clrShps <- c("#ffc097","#b1d4f8") # hamlet distributions
cFILL <- rgb(.7,.7,.7) # landmass
ccc<-rgb(0,.4,.8) # sample pointers

Now, we got everything needed for the base map (Fig. 1 a):

p1 <- ggplot()+
  # set coordinates
  coord_map(projection = 'mercator',xlim=xlimW,ylim=ylimW)+
  # axes labels
  labs(y="Latitude",x="Longitude") +
  # preset theme
  theme_mapK +
  # landmass layer
  geom_polygon(data=worldmap,aes(X,Y,group=PID),fill = cFILL,
               col=rgb(0,0,0),lwd=.2)+
  # genus distribution layer
  geom_polygon(data=hS_f,aes(X,Y,group=PID),fill=clrShps[2],col=rgb(.3,.3,.3),lwd=.2)+
  # species distribution layer
  geom_polygon(data=uS_f,aes(X,Y,group=PID),fill=clrShps[1],col=clrShps[1],lwd=.2)+
  # point background
  geom_point(data=sampSize,aes(x=x,y=y),col='white',size=7)+
  # sample size annotation
  geom_text(data=sampSize,aes(x=x,y=y,label=n),size=3)+
  # sample pointer curves
  geom_bezier(data = crvs,aes(x= x, y = y, group = loc),col=ccc,lwd=.4) +
  # sample pointer points
  geom_point(data = crvs[c(3,6,9),],aes(x= x, y = y, group = loc),size=5,col=ccc,shape=21,fill='white')+
  # sample pointer labels
  geom_text(data = crvs[c(3,6,9),],aes(x= x, y = y, label=c('B','P','H')),size=3)+
  # sample point anchors
  geom_point(data=dataSum,aes(x=w,y=n),size=1.5,shape=21,fill='white')

3.2.2 Fig. 1 B)

Next we create the sub figure containing the F_ST information (Fig. 1 B). We first read in the data and define the color palette.

fstdata <- read.csv('../../2_output/08_popGen/05_fst/genome_wide_weighted_mean_fst.txt',
                    sep='\t',skip = 1,head=F,col.names = c('pair','fst')) %>%
  filter(!pair %in% c('NU','PN','PU')) %>%
  arrange(fst) %>%
  mutate(x=row_number()) %>%
  rowwise() %>%
  mutate(loc = substr(as.character(pair),2,2),
         pop1 = substr(as.character(pair),1,1),
         pop2 = substr(as.character(pair),3,3),
         pairclass=paste(pop1,pop2,sep='-'))

# defining the pairwise color palette
clrP <- c('#1b519c','#fb8620','#d93327')

And than generate the bar plot:

p5 <- ggplot(fstdata,aes(x=x,y=fst,fill=pairclass))+
  # bar layer
  geom_bar(stat='identity')+
  # adding annotations
  annotation_custom(legPGrob,xmin =0.15,xmax =5,ymin =.008,ymax = .035)+
  # set color palette
  scale_fill_manual(values = clrP)+
  # y label and axis
  scale_y_continuous(name=expression(italic("F"['ST'])),
                     expand = c(0,0))+
  # x labels and axis
  scale_x_continuous(expand = c(.01,0),breaks = 1:9,
                     labels = c('H','H','B','H','P','B','B','P','P'))+
  # preset theme
  theme_mapK+
  # theme adjustments
  theme(legend.position = 'none',
        panel.grid.major.y = element_line(color=rgb(.9,.9,.9)),
        axis.title.x = element_blank(),
        axis.text.x = element_text(size = 8));

3.2.3 Fig. 1 C)

The last sub figure contains the PCA output for each location. For those, we define the color palette for hamlet species and read in the PCA data generated by pcadapt.

clr<-c('#000000','#d45500','#000000')
fll<-c('#000000','#d45500','#ffffff')
eVclr <- c(rep('darkred',2),rep(cFILL,8))

# Belize data
belPCA <- readRDS('../../2_output/08_popGen/04_pca/bel/belpca.Rds')
# Honduras data
honPCA <- readRDS('../../2_output/08_popGen/04_pca/hon/honpca.Rds')
# Panama data
panPCA <- readRDS('../../2_output/08_popGen/04_pca/pan/panpca.Rds')

The data for each PCA is merged with metadata about the samples and than plotted. Further, the explained variance of each PC is extracted for the axis labels.

# Honduras PCA (data)
dataHON <- cbind(dataAll %>% filter(sample=='sample',loc=='hon')%>% select(id,spec),
                 honPCA$scores); names(dataHON)[3:12]<- paste('PC',1:10,sep='')
exp_varHON <- (honPCA$singular.values[1:10])^2/length(honPCA$maf)
xlabHON <- paste('PC1 (',sprintf("%.1f",exp_varHON[1]*100),'%)')# explained varinace)')
ylabHON <- paste('PC2 (',sprintf("%.1f",exp_varHON[2]*100),'%)')# explained varinace)')

# Honduras PCA (plot)
p2 <- ggplot(dataHON,aes(x=PC1,y=PC2,col=spec,fill=spec))+geom_point(size=1.1,shape=21)+
  scale_color_manual(values=clr,guide=F)+
  ggtitle('Honduras')+
  scale_fill_manual(values=fll,guide=F)+theme_mapK+
  theme(legend.position='bottom',plot.title = element_text(size = 11,hjust = .45),
        panel.border = element_rect(color=rgb(.9,.9,.9),fill=rgb(1,1,1,0)),
        axis.title.y = element_text(vjust = -7),
        axis.title.x = element_text(vjust = 6),
        plot.margin = unit(rep(-10,4),'pt'))+#coord_equal()+
  scale_x_continuous(name=xlabHON,breaks = c(-.2,.5))+
  scale_y_continuous(name=ylabHON,breaks = c(-.4,.2))

# Belize PCA (data)
dataBEL <- cbind(dataAll %>% filter(sample=='sample',loc=='bel') %>% select(id,spec),
                 belPCA$scores); names(dataBEL)[3:12]<- paste('PC',1:10,sep='')

exp_varBEL <- (belPCA$singular.values[1:10])^2/length(belPCA$maf)
xlabBEL <- paste('PC1 (',sprintf("%.1f",exp_varBEL[1]*100),'%)')
ylabBEL <- paste('PC2 (',sprintf("%.1f",exp_varBEL[2]*100),'%)')

# Belize PCA (plot)
p3 <- ggplot(dataBEL,aes(x=PC1,y=PC2,col=spec,fill=spec))+geom_point(size=1.1,shape=21)+
  scale_color_manual(values=clr,guide=F)+
  ggtitle('Belize')+
  scale_fill_manual(values=fll,guide=F)+theme_mapK+
  theme(legend.position='bottom',plot.title = element_text(size = 11,hjust = .45),
        panel.border = element_rect(color=rgb(.9,.9,.9),fill=rgb(1,1,1,0)),
        axis.title.y = element_text(vjust = -7),
        axis.title.x = element_text(vjust = 6),
        plot.margin = unit(rep(-10,4),'pt'))+
  scale_x_continuous(name=xlabBEL,breaks = c(-.15,.25))+
  scale_y_continuous(name=ylabBEL,breaks = c(-.3,.2))

# Panama PCA (data)
dataPAN <- cbind(dataAll %>% filter(sample=='sample',loc=='boc',spec!='gum')%>% select(id,spec),
                 panPCA$scores); names(dataPAN)[3:12]<- paste('PC',1:10,sep='')
exp_varPAN <- (panPCA$singular.values[1:10])^2/length(panPCA$maf)
xlabPAN <- paste('PC1 (',sprintf("%.1f",exp_varPAN[1]*100),'%)')
ylabPAN <- paste('PC2 (',sprintf("%.1f",exp_varPAN[2]*100),'%)')

# Panama PCA (plot)
p4 <- ggplot(dataPAN,aes(x=PC1,y=PC2,col=spec,fill=spec))+geom_point(size=1.1,shape=21)+
  scale_color_manual(values=clr,guide=F)+
  ggtitle('Panama')+
  scale_fill_manual(values=fll,guide=F)+theme_mapK+
  theme(legend.position='bottom',plot.title = element_text(size = 11,hjust = .45),
        panel.border = element_rect(color=rgb(.9,.9,.9),fill=rgb(1,1,1,0)),
        axis.title.y = element_text(vjust = -7),
        axis.title.x = element_text(vjust = 6),
        plot.margin = unit(rep(-10,4),'pt'))+
  scale_x_continuous(name=xlabPAN,breaks = c(-.15,.2))+
  scale_y_continuous(name=ylabPAN,breaks = c(-.2,.2))

3.2.4 Fig 1 (complete)

Finally, we have all the pieces and can put together Fig. 1.

We need a few helper variables for the placing and scaling of the annotations:

# scaling (x,y)
hSC <- c(.07,.07)
# hamlet placing (within group)
yf <- c(0.07,0.035,0.00);xf<-c(0.07,0,0.07)
# hamlet group placing
yS <- c(.485,.577,.805);xS<-c(.09,.417,.156)

Using ggdraw() form the cowplot package, the figure is put together.

F1 <- ggdraw()+
  # include subplots
  draw_plot(plot = p1,0,.38,.81,.64)+
  draw_plot(plot = p5,0,.01,.24,.35)+
  draw_plot(plot = p2,.25,.01,.23,.37)+
  draw_plot(plot = p3,.5,.01,.23,.37)+
  draw_plot(plot = p4,.75,.01,.23,.37)+
  # annotations (legend)
  draw_grob(legGrob, .375,.395,.65,.65)+
  # annotations (compas)
  draw_grob(compGrob, 0.605, 0.482, 0.06, 0.06)+
  # annotations (hamlets)
  draw_grob(nigGrob, xf[1]+xS[1], yf[1]+yS[1], hSC[1], hSC[2])+ # Honduras
  draw_grob(pueGrob, xf[2]+xS[1], yf[2]+yS[1], hSC[1], hSC[2])+ # Honduras
  draw_grob(uniGrob, xf[3]+xS[1], yf[3]+yS[1], hSC[1], hSC[2])+ # Honduras

  draw_grob(nigGrob, xf[1]+xS[2], yf[1]+yS[2], hSC[1], hSC[2])+ # Panama
  draw_grob(pueGrob, xf[2]+xS[2], yf[2]+yS[2], hSC[1], hSC[2])+ # Panama
  draw_grob(uniGrob, xf[3]+xS[2], yf[3]+yS[2], hSC[1], hSC[2])+ # Panama

  draw_grob(nigGrob, xf[1]+xS[3], yf[1]+yS[3], hSC[1], hSC[2])+ # Belize
  draw_grob(pueGrob, xf[2]+xS[3], yf[2]+yS[3], hSC[1], hSC[2])+ # Belize
  draw_grob(uniGrob, xf[3]+xS[3], yf[3]+yS[3], hSC[1], hSC[2])+ # Belize
  # annotations (flags)
  draw_grob(panGrob, 0.3, 0.485, 0.042, 0.042)+
  draw_grob(belGrob, 0.2, 0.67, 0.042, 0.042)+
  draw_grob(honGrob, 0.28, 0.61, 0.042, 0.042)+

  draw_grob(honGrob, 0.285, 0.37, 0.042, 0.042)+
  draw_grob(belGrob, 0.535, 0.37, 0.042, 0.042)+
  draw_grob(panGrob, 0.785, 0.37, 0.042, 0.042)+
  # annotations (subfigure labels)
  draw_plot_label(x = c(.001,.001,.24),
                  y = c(.99,.42,.42),
                  label = letters[1:3])

It is then exported using ggsave().

ggsave(plot = F1,filename = '../output/F1.pdf',width = 183,height = 145,units = 'mm',device = cairo_pdf)