31 Supplementary Figure 4
31.1 Summary
This is the accessory documentation of Figure S4.
The Figure can be recreated by running the R script plot_SF4.R
:
cd $BASE_DIR
Rscript --vanilla R/fig/plot_SF4.R \
\
2_analysis/fst/50k/ \
2_analysis/summaries/fst_outliers_998.tsv 2_analysis/summaries/fst_globals.txt
31.2 Details of plot_SF4.R
In the following, the individual steps of the R script are documented. It is an executable R script that depends on the accessory R package GenomicOriginsScripts, as well as on the packages hypoimg, hypogen and patchwork
31.2.1 Config
The scripts start with a header that contains copy & paste templates to execute or debug the script:
#!/usr/bin/env Rscript
# run from terminal:
# Rscript --vanilla R/fig/plot_SF4.R \
# 2_analysis/fst/50k/ \
# 2_analysis/summaries/fst_outliers_998.tsv \
# 2_analysis/summaries/fst_globals.txt
# ===============================================================
# This script produces Suppl. Figure 4 of the study "Rapid radiation in a
# highly diverse marine environment" by Hench, Helmkampf, McMillan and Puebla
# ---------------------------------------------------------------
# ===============================================================
# args <- c('2_analysis/fst/50k/',
# '2_analysis/summaries/fst_outliers_998.tsv',
# '2_analysis/summaries/fst_globals.txt')
# script_name <- "R/fig/plot_SF4.R"
<- commandArgs(trailingOnly = FALSE) args
The next section processes the input from the command line.
It stores the arguments in the vector args
.
The needed R packages are loaded and the script name and the current working directory are stored inside variables (script_name
, plot_comment
).
This information will later be written into the meta data of the figure to help us tracing back the scripts that created the figures in the future.
Then we drop all the imported information besides the arguments following the script name and print the information to the terminal.
# setup -----------------------
::activate()
renvlibrary(GenomicOriginsScripts)
library(hypoimg)
library(hypogen)
library(vroom)
library(ggtext)
cat('\n')
<- args[5] %>%
script_name str_remove(.,'--file=')
<- script_name %>%
plot_comment str_c('mother-script = ',getwd(),'/',.)
<- process_input(script_name, args) args
#> ── Script: R/fig/plot_SF4.R ────────────────────────────────────────────
#> Parameters read:
#> ★ 1: 2_analysis/fst/50k/
#> ★ 2: 2_analysis/summaries/fst_outliers_998.tsv
#> ★ 3: 2_analysis/summaries/fst_globals.txt
#> ────────────────────────────────────────── /current/working/directory ──
The directory containing the PCA data is received and stored in a variable. Also the default color scheme is updated and the size of the hamlet ann.
# config -----------------------
<- as.character(args[1])
data_path <- as.character(args[2])
outlier_file <- as.character(args[3]) globals_file
# load data -------------------
# locate fst data files
<- dir(data_path,pattern = '.50k.windowed.weir.fst.gz') files
# extract run names from data file names
<- files %>%
run_files str_sub(.,1,11) %>%
str_replace(.,pattern = '([a-z]{3})-([a-z]{3})-([a-z]{3})', '\\2\\1-\\3\\1')
# load genome wide average fst values for each run
<- vroom::vroom(globals_file, delim = '\t',
globals col_names = c('loc','run','mean','weighted')) %>%
separate(run, into = c('pop1','pop2')) %>%
mutate(run = str_c(pop1,loc,'-',pop2,loc),
run = fct_reorder(run,weighted))
# load all windowed fst data and collapse in to a single data frame
<- purrr::pmap(tibble(file = str_c(data_path,files),
data run = run_files),
%>%
hypo_import_windows) bind_rows() %>%
::set_names(., nm = c('CHROM', 'BIN_START', 'BIN_END', 'N_VARIANTS',
purrr'WEIGHTED_FST', 'MEAN_FST', 'GSTART', 'POS', 'GPOS', 'run')) %>%
mutate(pop1 = str_sub(run,1,3),
pop2 = str_sub(run,8,10),
loc = str_sub(run,4,6),
run_label = str_c("*H. ", sp_names[pop1],"* - *H. ", sp_names[pop2],"*<br>(",loc_names[loc],")" ))
# create table for the indication of genome wide average fst in the plot background
# (rescale covered fst range to the extent of the genome)
<- globals %>%
global_bar select(weighted,run) %>%
mutate(run = as.character(run)) %>%
setNames(.,nm = c('fst','run')) %>%
pmap(.,fst_bar_row_run) %>%
bind_rows() %>%
mutate(pop1 = str_sub(run,1,3),
pop2 = str_sub(run,8,10),
loc = str_sub(run,4,6),
run_label = str_c("*H. ", sp_names[pop1],"* - *H. ", sp_names[pop2],"*<br>(",loc_names[loc],")" ),
run_label = fct_reorder(run_label,xmax_org))
# pre-calculate secondary x-axis breaks
<- scales::cbreaks(c(0,max(globals$weighted)),
sc_ax ::pretty_breaks(4)) scales
# compose final figure
<- ggplot()+
p_done # general plot structure separated by run
facet_grid( run_label ~ ., as.table = TRUE) +
# add genome wide average fst in the background
geom_rect(data = global_bar %>%
mutate(xmax = xmax * hypo_karyotype$GEND[24]),
aes(xmin = 0, xmax = xmax,
ymin = -Inf, ymax = Inf),
color = rgb(1,1,1,0),
fill = clr_below) +
# add LG borders
geom_vline(data = hypogen::hypo_karyotype,
aes(xintercept = GEND),
color = hypo_clr_lg) +
# add fst data points
geom_point(data = data %>%
mutate(run_label = factor(run_label,
levels = levels(global_bar$run_label))),
aes(x = GPOS, y = WEIGHTED_FST),
size=.2,color = plot_clr) +
# axis layout
scale_x_hypo_LG(sec.axis = sec_axis(~ ./hypo_karyotype$GEND[24],
breaks = (sc_ax$breaks/max(globals$weighted)),
labels = sprintf("%.2f", sc_ax$breaks),
name = expression(Genomic~position/~Genome~wide~weighted~italic(F[ST])))) +
scale_y_continuous(name = expression(italic('F'[ST])),
limits = c(-.1,1),
breaks = c(0,.5,1)) +
# general plot layout
theme_hypo() +
theme(strip.text.y = element_markdown(angle = 0),
strip.background = element_blank(),
legend.position = 'none',
axis.title.x = element_text(),
axis.text.x.bottom = element_text(colour = 'darkgray'))
Finally, we can export Figure S4.
# export final figure
hypo_save(filename = 'figures/SF4.png',
plot = p_done,
width = 8,
height = 12,
dpi = 600,
type = "cairo",
comment = plot_comment)
system("convert figures/SF4.png figures/SF4.pdf")
system("rm figures/SF4.png")
<- str_c("exiftool -overwrite_original -Description=\"", plot_comment, "\" figures/SF4.pdf")
create_metadata system(create_metadata)