23 Figure 2
23.1 Summary
This is the accessory documentation of Figure 2.
The Figure can be recreated by running the R script plot_F2.R
from a (bash
terminal):
cd $BASE_DIR
Rscript --vanilla R/fig/plot_F2.R \
\
2_analysis/fst/50k/multi_fst.50k.tsv.gz \
2_analysis/GxP/50000/ \
2_analysis/summaries/fst_outliers_998.tsv \
https://raw.githubusercontent.com/simonhmartin/twisst/master/plot_twisst.R \
2_analysis/twisst/weights/ \
ressources/plugin/trees/ 2_analysis/summaries/fst_globals.txt
23.2 Details of plot_F2.R
In the following, the individual steps of the R script are documented. It is an executable R script that depends on the accessory R package GenomicOriginsScripts, BAMMtools and on the package hypoimg.
23.2.1 Config
The scripts start with a header that contains copy & paste templates to execute interactively or debug the script:
#!/usr/bin/env Rscript
# run from terminal:
# Rscript --vanilla R/fig/plot_F2.R \
# 2_analysis/fst/50k/multi_fst.50k.tsv.gz \
# 2_analysis/GxP/50000/ \
# 2_analysis/summaries/fst_outliers_998.tsv \
# https://raw.githubusercontent.com/simonhmartin/twisst/master/plot_twisst.R \
# 2_analysis/twisst/weights/ \
# ressources/plugin/trees/ \
# 2_analysis/summaries/fst_globals.txt
# ===============================================================
# This script produces Figure 2 of the study "Rapid radiation in a highly
# diverse marine environment" by Hench, Helmkampf, McMillan and Puebla
# ---------------------------------------------------------------
# ===============================================================
# args <- c('2_analysis/fst/50k/multi_fst.50k.tsv.gz',
# '2_analysis/GxP/50000/', '2_analysis/summaries/fst_outliers_998.tsv',
# 'https://raw.githubusercontent.com/simonhmartin/twisst/master/plot_twisst.R',
# '2_analysis/twisst/weights/', 'ressources/plugin/trees/',
# '2_analysis/summaries/fst_globals.txt')
# script_name <- "R/fig/plot_F2.R"
<- commandArgs(trailingOnly = FALSE) args
The next section processes the input from the command line.
It stores the arguments in the vector args
.
The needed R packages are loaded and the script name and the current working directory are stored inside variables (script_name
, plot_comment
).
This information will later be written into the meta data of the figure to help us tracing back the scripts that created the figures in the future.
Then we drop all the imported information besides the arguments following the script name and print the information to the terminal.
# setup -----------------------
::activate()
renvlibrary(GenomicOriginsScripts)
library(hypoimg)
library(hypogen)
cat('\n')
<- args[5] %>%
script_name str_remove(.,'--file=')
<- script_name %>%
plot_comment str_c('mother-script = ',getwd(),'/',.)
<- process_input(script_name, args) args
#> ── Script: R/fig/plot_F2.R ──────────────────────────────────────────────
#> Parameters read:
#> ★ 1: 2_analysis/fst/50k/multi_fst.50k.tsv.gz
#> ★ 2: 2_analysis/GxP/50000/
#> ★ 3: 2_analysis/summaries/fst_outliers_998.tsv
#> ★ 4: https://raw.githubusercontent.com/simonhmartin/twisst/master/plot_twisst.R
#> ★ 5: 2_analysis/twisst/weights/
#> ★ 6: ressources/plugin/trees/
#> ★ 7: 2_analysis/summaries/fst_globals.txt
#> ─────────────────────────────────────────── /current/working/directory ──
The directories for the different data types are received and stored in respective variables. Also, we set a few parameters for the plot layout:
# config -----------------------
<- as.character(args[1])
fst_file <- as.character(args[2])
gxp_dir <- as.character(args[3])
outlier_table <- as.character(args[4])
twisst_script <- as.character(args[5])
w_path <- as.character(args[6])
d_path <- as.character(args[7])
global_fst_file source(twisst_script)
23.2.2 Actual Script Start
# start script -------------------
# import fst data
<- vroom::vroom(fst_file, delim = '\t') %>%
fst_data select(CHROM, BIN_START, BIN_END, N_VARIANTS, WEIGHTED_FST) %>%
setNames(., nm = c('CHROM', 'BIN_START', 'BIN_END', 'n_snps', 'fst') ) %>%
add_gpos() %>%
select(GPOS, fst) %>%
setNames(., nm = c('GPOS','value')) %>%
mutate(window = str_c('bold(',project_case('a'),'):joint~italic(F[ST])'))
# set G x P traits to be imported
<- c("Bars.lm.50k.5k.txt.gz", "Peduncle.lm.50k.5k.txt.gz", "Snout.lm.50k.5k.txt.gz") traits
# set trait figure panels
<- c(Bars = str_c('bold(',project_case('d'),')'),
trait_panels Peduncle = str_c('bold(',project_case('e'),')'),
Snout = str_c('bold(',project_case('f'),')'))
# import G x P data
<- str_c(gxp_dir,traits) %>%
gxp_data ::map(get_gxp) %>%
purrrjoin_list() %>%
gather(key = 'window', value = 'value',2:4)
# import genome wide Fst data summary --------
<- vroom::vroom(global_fst_file, delim = '\t',
globals col_names = c('loc','run','mean','weighted')) %>%
mutate(run = str_c(str_sub(run,1,3),loc,'-',str_sub(run,5,7),loc),
run = fct_reorder(run,weighted))
# dxy and pi are only shown for one exemplary population (/pair)
# select dxy pair run (15 is one of the two central runs of the 28 pairs)
# here, the 15th lowest fst value is identified as "selector"
<- globals %>%
selectors_dxy arrange(weighted) %>%
$weighted %>%
.15] .[
# import topology weighting data
<- tibble(loc = c('bel','hon'),
twisst_data panel = c('b','c') %>% project_case() %>% str_c('bold(',.,')')) %>%
::pmap(match_twisst_files) %>%
purrrbind_rows() %>%
select(GPOS, topo3,topo_rel,window,weight)
# the "null-weighting" is computed for both locations
<- tibble(window = c(str_c('bold(',project_case('b'),'):~italic(w)[bel]'),
twisst_null str_c('bold(',project_case('c'),'):~italic(w)[hon]')),
weight = c(1/15, 1/105))
# combine data types --------
<- bind_rows(fst_data, gxp_data) data
# import fst outliers
<- vroom::vroom(outlier_table, delim = '\t') outliers
# the focal outlier IDs are set
<- c('LG04_1', 'LG12_3', 'LG12_4') outlier_pick
# the table for the outlier labels is created
<- outliers %>%
outlier_label filter(gid %in% outlier_pick) %>%
mutate(label = letters[row_number()] %>% project_inv_case(),
x_shift_label = c(-1,-1.2,1)*10^7,
gpos_label = gpos + x_shift_label,
gpos_label2 = gpos_label - sign(x_shift_label) *.5*10^7,
window = str_c('bold(',project_case('a'),'):joint~italic(F[ST])'))
# the y height of the outlier labels and the corresponding tags is set
<- .45
outlier_y <- .475 outlier_yend
# the icons for the traits of the GxP are loaded
<- tibble(window = c("bold(d):italic(p)[Bars]",
trait_tibble "bold(e):italic(p)[Peduncle]",
"bold(f):italic(p)[Snout]"),
grob = hypo_trait_img$grob_circle[hypo_trait_img$trait %in% c('Bars', 'Peduncle', 'Snout')])
# finally, the figure is being put together
<- ggplot()+
p_done # add gray/white LGs background
geom_hypo_LG()+
# the red highlights for the outlier regions are added
geom_vline(data = outliers, aes(xintercept = gpos), color = outlr_clr)+
# the tags of the outlier labels are added
geom_segment(data = outlier_label,
aes(x = gpos,
xend = gpos_label2, y = outlier_y, yend = outlier_yend),
color = alpha(outlr_clr,1),
size = .2)+
# the outlier labels are added
geom_text(data = outlier_label, aes(x = gpos_label, y = outlier_yend, label = label),
color = alpha(outlr_clr,1),
fontface = 'bold',
size = plot_text_size / ggplot2:::.pt)+
# the fst, delta dxy and gxp data is plotted
geom_point(data = data, aes(x = GPOS, y = value),size = plot_size, color = plot_clr) +
# the topology weighting data is plotted
geom_line(data = twisst_data, aes(x = GPOS, y = weight, color = topo_rel), size = .4) +
# the null weighting is added
geom_hline(data = twisst_null, aes(yintercept = weight), color = rgb(1, 1, 1, .5), size = .4) +
# the trait icons are added
geom_hypo_grob(data = trait_tibble,
aes(grob = grob, angle = 0, height = .65),
inherit.aes = FALSE, x = .95, y = 0.65)+
# setting the scales
scale_fill_hypo_LG_bg() +
scale_x_hypo_LG()+
scale_color_gradient( low = "#f0a830ff", high = "#084082ff", guide = FALSE)+
# organizing the plot across panels
facet_grid(window~.,scales = 'free',switch = 'y', labeller = label_parsed)+
# tweak plot appreance
theme_hypo()+
theme(text = element_text(size = plot_text_size),
legend.position = 'bottom',
axis.title = element_blank(),
strip.text = element_text(size = plot_text_size),
strip.background = element_blank(),
strip.placement = 'outside')
Finally, we can export Figure 2.
hypo_save(p_done,
filename = 'figures/F2.png',
width = f_width,
height = f_width * .5,
dpi = 600,
type = "cairo",
comment = plot_comment)
system("convert figures/F2.png figures/F2.pdf")
system("rm figures/F2.png")
<- str_c("exiftool -overwrite_original -Description=\"", plot_comment, "\" figures/F2.pdf")
create_metadata system(create_metadata)