32 Supplementary Figure 5
32.1 Summary
This is the accessory documentation of Figure S5.
The Figure can be recreated by running the R script plot_SF5.R
:
cd $BASE_DIR
Rscript --vanilla R/fig/plot_SF5.R \
\
2_analysis/fst/50k/ 2_analysis/summaries/fst_globals.txt
32.2 Details of plot_SF5.R
In the following, the individual steps of the R script are documented. It is an executable R script that depends on the accessory R package GenomicOriginsScripts, as well as on the packages hypoimg, hypogen and patchwork
32.2.1 Config
The scripts start with a header that contains copy & paste templates to execute or debug the script:
#!/usr/bin/env Rscript
# run from terminal:
# Rscript --vanilla R/fig/plot_SF5.R \
# 2_analysis/fst/50k/ \
# 2_analysis/summaries/fst_globals.txt
# ===============================================================
# This script produces Suppl. Figure 5 of the study "Rapid radiation in a
# highly diverse marine environment" by Hench, Helmkampf, McMillan and Puebla
# ---------------------------------------------------------------
# ===============================================================
# args <- c('2_analysis/fst/50k/', '2_analysis/summaries/fst_globals.txt')
# script_name <- "R/fig/plot_SF5.R"
<- commandArgs(trailingOnly = FALSE) args
The next section processes the input from the command line.
It stores the arguments in the vector args
.
The needed R packages are loaded and the script name and the current working directory are stored inside variables (script_name
, plot_comment
).
This information will later be written into the meta data of the figure to help us tracing back the scripts that created the figures in the future.
Then we drop all the imported information besides the arguments following the script name and print the information to the terminal.
# setup -----------------------
::activate()
renvlibrary(GenomicOriginsScripts)
library(ggforce)
library(hypoimg)
library(hypogen)
library(vroom)
cat('\n')
<- args[5] %>%
script_name str_remove(.,'--file=')
<- script_name %>%
plot_comment str_c('mother-script = ',getwd(),'/',.)
<- process_input(script_name, args) args
#> ── Script: R/fig/plot_SF5.R ────────────────────────────────────────────
#> Parameters read:
#> ★ 1: 2_analysis/fst/50k/
#> ★ 2: 2_analysis/summaries/fst_globals.txt
#> ────────────────────────────────────────── /current/working/directory ──
The directory containing the PCA data is received and stored in a variable. Also the default color scheme is updated and the size of the hamlet ann.
# config -----------------------
<- as.character(args[1])
data_dir <- as.character(args[2]) globals_file
# script -----------------------
# locate data files
<- dir(path = data_dir, pattern = '.50k.windowed.weir.fst.gz') files
# load genome wide average fst data
<- vroom::vroom(globals_file, delim = '\t',
globals col_names = c('loc','run','mean','weighted')) %>%
mutate(run = str_c(loc,'-',run) %>%
reformat_run_name()
)
# prepare data import settings within a data table (tibble)
<- list(file = str_c(data_dir,files),
import_table fst_threshold = c(.5,.4,.3,.2,.1,.05,.02,.01)) %>%
cross_df() %>%
mutate( run = file %>%
str_remove('^.*/') %>%
str_sub(.,1,11) %>%
reformat_run_name())
# import dxy data and compute threshold stats
<- function(file, run, fst_threshold,...){
get_fst_fixed
<- hypogen::hypo_import_windows(file, ...) %>%
data mutate(rank = rank(WEIGHTED_FST, ties.method = "random"))%>%
mutate(thresh = fst_threshold) %>%
mutate(outl = (WEIGHTED_FST > thresh) %>% as.numeric()) %>%
filter(outl == 1 )
if(nrow(data) == 0){
return(tibble(run = run, n = 0, avg_length = NA, med_length = NA, min_length = NA, max_length = NA,
sd_length = NA, overal_length = NA, threshold_value = fst_threshold))
else {
} %>%
data # next, we want to collapse overlapping windows
group_by(CHROM) %>%
# we check for overlap and create 'region' IDs
mutate(check = 1-(lag(BIN_END,default = 0)>BIN_START),
ID = str_c(CHROM,'_',cumsum(check))) %>%
ungroup() %>%
# then we collapse the regions by ID
group_by(ID) %>%
summarise(run = run[1],
run = run[1],
treshold_value = thresh[1],
CHROM = CHROM[1],
BIN_START = min(BIN_START),
BIN_END = max(BIN_END)) %>%
mutate(PEAK_SIZE = BIN_END-BIN_START) %>%
summarize(run = run[1],
run = run[1],
n = length(ID),
avg_length = mean(PEAK_SIZE),
med_length = median(PEAK_SIZE),
min_length = min(PEAK_SIZE),
max_length = max(PEAK_SIZE),
sd_length = sd(PEAK_SIZE),
overal_length = sum(PEAK_SIZE),
threshold_value = treshold_value[1])
} }
# load data and compute statistics based on fixed fst threshold
<- purrr::pmap_dfr(import_table, get_fst_fixed) %>%
data left_join(globals) %>%
mutate(run = fct_reorder(run, weighted))
# pre-format labels
<- data %>%
data2 select(threshold_value,weighted,n,avg_length,overal_length) %>%
mutate(avg_length = avg_length/1000,
overal_length = overal_length/(10^6)) %>%
rename(`atop(Number~of,Regions)` = 'n',
`atop(Average~Region,Length~(kb))` = 'avg_length',
`atop(Cum.~Region,Length~(Mb))` = 'overal_length') %>%
pivot_longer(names_to = 'variable',values_to = 'Value',3:5) %>%
mutate(threshold_value = str_c('italic(F[ST])~threshold:~',
threshold_value),variable = factor(variable, levels = c('atop(Number~of,Regions)',
'atop(Average~Region,Length~(kb))',
'atop(Cum.~Region,Length~(Mb))')))
# set font size
<- "black" base_line_clr
# compile plot
<- data2 %>%
p_done # select thresholds of interest
filter(!(threshold_value %in% (c(0.02,.1,0.2, 0.3, .4) %>%
str_c("italic(F[ST])~threshold:~",.)))) %>%
ggplot(aes(x = weighted, y = Value#, fill = weighted
)+
)# add red line for genome extent in lowest row
geom_hline(data = tibble(variable = factor(c('atop(Cum.~Region,Length~(Mb))',
'atop(Average~Region,Length~(kb))',
'atop(Number~of,Regions)'),
levels = c('atop(Number~of,Regions)',
'atop(Average~Region,Length~(kb))',
'atop(Cum.~Region,Length~(Mb))')),
y = c(559649677/(10^6),NA,NA)),
aes(yintercept = y),
color = rgb(1,0,0,.25))+
# add data points
geom_point(size = plot_size,
color = plot_clr )+
# define plot stucture
facet_grid(variable~threshold_value,
scale='free',
switch = 'y',
labeller = label_parsed)+
# configure scales
scale_x_continuous(name = expression(Whole-genome~differentiation~(weighted~italic(F[ST]))),
breaks = c(0,.05,.1),
limits = c(-.00025,.10025),
labels = c("0", "0.05", "0.1"))+
# configure legend
guides(fill = guide_colorbar(barwidth = unit(150, "pt"),
label.position = "top",
barheight = unit(5,"pt")))+
# tweak plot apperance
theme_minimal()+
theme(axis.text = element_text(size = plot_text_size_small,
color = rgb(.6,.6,.6)),
axis.title.y = element_blank(),
axis.text.x = element_text(vjust = .5, angle = 0),
axis.title.x = element_text(vjust = -2),
panel.background = element_rect(fill = rgb(.95,.95,.95,.5),
color = rgb(.9,.9,.9,.5),
size = .3),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(size = plot_lwd),
legend.position = "bottom",
strip.text = element_text(size = plot_text_size),
legend.direction = "horizontal",
strip.placement = 'outside',
axis.title = element_text(size = plot_text_size),
legend.title = element_text(size = plot_text_size),
strip.background.y = element_blank(),
plot.background = element_blank())
Finally, we can export Figure S5.
# export figure 5
hypo_save(filename = 'figures/SF5.pdf',
plot = p_done,
width = f_width,
height = .5 * f_width,
device = cairo_pdf,
comment = plot_comment,
bg = "transparent")