26 Figure 5
26.1 Summary
This is the accessory documentation of Figure 5.
The Figure can be recreated by running the R script plot_F5.R
from a (bash
terminal):
cd $BASE_DIR
Rscript --vanilla R/fig/plot_F5.R \
\
2_analysis/twisst/weights/ \
ressources/plugin/trees/ \
https://raw.githubusercontent.com/simonhmartin/twisst/master/plot_twisst.R \
2_analysis/summaries/fst_outliers_998.tsv \
2_analysis/dxy/50k/ \
2_analysis/fst/50k/ \
2_analysis/summaries/fst_globals.txt \
2_analysis/GxP/50000/ \
200 \
5 2_analysis/revPoMo/outlier_regions/
26.2 Details of plot_F5.R
In the following, the individual steps of the R script are documented. It is an executable R script that depends on the accessory R package GenomicOriginsScripts, BAMMtools and on the package hypoimg.
26.2.1 Config
The scripts start with a header that contains copy & paste templates to execute interactively or debug the script:
#!/usr/bin/env Rscript
# run from terminal:
# Rscript --vanilla R/fig/plot_F5.R \
# 2_analysis/twisst/weights/ \
# ressources/plugin/trees/ \
# https://raw.githubusercontent.com/simonhmartin/twisst/master/plot_twisst.R \
# 2_analysis/summaries/fst_outliers_998.tsv \
# 2_analysis/dxy/50k/ \
# 2_analysis/fst/50k/ \
# 2_analysis/summaries/fst_globals.txt \
# 2_analysis/GxP/50000/ \
# 200 \
# 5 \
# 2_analysis/revPoMo/outlier_regions/
# ===============================================================
# This script produces Figure 5 of the study "Rapid radiation in a highly
# diverse marine environment" by Hench, Helmkampf, McMillan and Puebla
# ---------------------------------------------------------------
# ===============================================================
# args <- c('2_analysis/twisst/weights/', 'ressources/plugin/trees/',
# 'https://raw.githubusercontent.com/simonhmartin/twisst/master/plot_twisst.R',
# '2_analysis/summaries/fst_outliers_998.tsv',
# '2_analysis/dxy/50k/', '2_analysis/fst/50k/',
# '2_analysis/summaries/fst_globals.txt',
# '2_analysis/GxP/50000/', 200, 5,
# "2_analysis/revPoMo/outlier_regions/")
# script_name <- "R/fig/plot_F5.R"
<- commandArgs(trailingOnly = FALSE) args
The next section processes the input from the command line.
It stores the arguments in the vector args
.
The needed R packages are loaded and the script name and the current working directory are stored inside variables (script_name
, plot_comment
).
This information will later be written into the meta data of the figure to help us tracing back the scripts that created the figures in the future.
Then we drop all the imported information besides the arguments following the script name and print the information to the terminal.
# setup -----------------------
::activate()
renvlibrary(GenomicOriginsScripts)
library(hypoimg)
library(hypogen)
library(furrr)
library(ggtext)
library(ape)
library(ggtree)
library(patchwork)
library(phangorn)
library(igraph)
cat('\n')
<- args[5] %>%
script_name str_remove(.,'--file=')
<- script_name %>%
plot_comment str_c('mother-script = ',getwd(),'/',.)
<- process_input(script_name, args) args
#> ── Script: R/fig/plot_F5.R ──────────────────────────────────────────────
#> Parameters read:
#> ★ 1: 2_analysis/twisst/weights/
#> ★ 2: ressources/plugin/trees/
#> ★ 3: https://raw.githubusercontent.com/simonhmartin/twisst/master/plot_twisst.R
#> ★ 4: 2_analysis/summaries/fst_outliers_998.tsv
#> ★ 5: 2_analysis/dxy/50k/
#> ★ 6: 2_analysis/fst/50k/
#> ★ 7: 2_analysis/summaries/fst_globals.txt
#> ★ 8: 2_analysis/GxP/50000/
#> ★ 9: 200
#> ★ 10: 5
#> ★ 11: 2_analysis/revPoMo/outlier_regions/
#> ─────────────────────────────────────────── /current/working/directory ──
The directories for the different data types are received and stored in respective variables. Also, we set a few parameters for the plot layout:
# config -----------------------
<- as.character(args[1])
w_path <- as.character(args[2])
d_path <- as.character(args[3])
twisst_functions <- as.character(args[4])
out_table <- as.character(args[5])
dxy_dir <- as.character(args[6])
fst_dir <- as.character(args[7])
fst_globals <- as.character(args[8])
gxp_dir <- as.numeric(args[9])
twisst_size <- as.numeric(args[10])
resolution <- as.character(args[11])
pomo_path <- dir(pomo_path, pattern = "155_pop")
pomo_trees
source(twisst_functions, local = TRUE)
plan(multiprocess)
<- 2.5*10^5 window_buffer
26.2.2 Actual Script Start
# actual script =========================================================
# locate dxy data files
<- dir(dxy_dir, pattern = str_c('dxy.*[a-z]{3}.*.', resolution ,'0kb-', resolution ,'kb.tsv.gz')) dxy_files
# import dxy data
<- tibble(file = str_c(dxy_dir, dxy_files)) %>%
dxy_data ::pmap_dfr(get_dxy, kb = str_c(resolution, '0kb')) purrr
# set traits of interest for GxP
<- c('Bars', 'Snout', 'Peduncle') gxp_traits
# import GxP data
<- str_c(gxp_dir,gxp_traits,'.lm.', resolution ,'0k.', resolution ,'k.txt.gz') %>%
gxp_data future_map_dfr(get_gxp_long, kb = 50)
# set topology weighting color scheme
<- c(Blue = "#0140E5", Bars = "#E32210", Butter = "#E4E42E")
twisst_clr # set GxP color scheme
<- c(Bars = "#79009f", Snout = "#E48A00", Peduncle = "#5B9E2D") %>%
gxp_clr darken(factor = .95) %>%
set_names(., nm = gxp_traits)
# compute genome wide average dxy
<- dxy_data %>%
dxy_globals filter(BIN_START %% ( resolution * 10000 ) == 1 ) %>%
group_by( run ) %>%
summarise(mean_global_dxy = sum(dxy*N_SITES)/sum(N_SITES)) %>%
mutate(run = fct_reorder(run,mean_global_dxy))
# import genome wide average fst
# and order population pairs by genomewide average fst
<- vroom::vroom(fst_globals,delim = '\t',
fst_globals col_names = c('loc','run_prep','mean_fst','weighted_fst')) %>%
separate(run_prep,into = c('pop1','pop2'),sep = '-') %>%
mutate(run = str_c(pop1,loc,'-', pop2, loc),
run = fct_reorder(run,weighted_fst))
# locate fst data files
<- dir(fst_dir, pattern = str_c('.', resolution ,'0k.windowed.weir.fst.gz')) fst_files
# import fst data
<- str_c(fst_dir, fst_files) %>%
fst_data future_map_dfr(get_fst, kb = str_c(resolution, '0k')) %>%
left_join(dxy_globals) %>%
left_join(fst_globals) %>%
mutate(run = refactor(., fst_globals),
BIN_MID = (BIN_START+BIN_END)/2)
# order dxy population pairs by genomewide average fst
<- dxy_data %>%
dxy_data left_join(dxy_globals) %>%
left_join(fst_globals) %>%
mutate(run = refactor(dxy_data, fst_globals),
window = 'bold(italic(d[xy]))')
# compute delta dxy
<- dxy_data %>%
data_dxy_summary group_by(GPOS) %>%
summarise(scaffold = CHROM[1],
start = BIN_START[1],
end = BIN_END[1],
mid = BIN_MID[1],
min_dxy = min(dxy),
max_dxy = max(dxy),
mean_dxy = mean(dxy),
median_dxy = median(dxy),
sd_dxy = sd(dxy),
delta_dxy = max(dxy)-min(dxy))
# load fst outlier regions
<- vroom::vroom(out_table, delim = '\t') %>%
outlier_table setNames(., nm = c("outlier_id","lg", "start", "end", "gstart","gend","gpos"))
# set outlier regions of interest
= c('LG04_1', 'LG12_3', 'LG12_4') outlier_pick
# load group-level phylogeny data
<- str_c(pomo_path, pomo_trees) %>%
pomo_data ::map_dfr(import_tree) %>%
purrrmutate(rooted_tree = map2(.x = tree,.y = type, .f = root_hamlets))
# select genes to label
<- c('arl3','kif16b','cdx1','hmcn2',
cool_genes 'sox10','smarca4',
'rorb',
'alox12b','egr1',
'ube4b','casz1',
'hoxc8a','hoxc9','hoxc10a',
'hoxc13a','rarga','rarg',
'snai1','fam83d','mafb','sws2abeta','sws2aalpha','sws2b','lws','grm8')
# load twisst data
<- list(bel = prep_data(loc = 'bel'),
data_tables hon = prep_data(loc = 'hon'))
# set species sampled in belize
<- c('ind', 'may', 'nig', 'pue', 'uni') pops_bel
# set outlier region label
<- tibble(outlier_id = outlier_pick,
region_label_tibbles label = c('A','B','C'))
# load and recolor trait icons
<- tibble(svg = hypoimg::hypo_trait_img$grob_circle[hypoimg::hypo_trait_img$trait %in% gxp_traits],
trait_grob layer = c(4,3,7),
color = gxp_clr[gxp_traits %>% sort()])%>%
pmap(.f = hypo_recolor_svg) %>%
set_names(nm = gxp_traits %>% sort())
# recolor second bars-layer
"Bars"]] <- trait_grob[["Bars"]] %>% hypo_recolor_svg(layer = 7, color = gxp_clr[["Bars"]]) trait_grob[[
<- pomo_data$tree[[1]] %>%
p_pomo1 midpoint() %>%
::ggtree(layout = "circular") %>%
ggtreerotate(node = 18) %>%
$data %>%
.::mutate(support = as.numeric(label),
dplyrsupport_class = cut(support, c(0, 50, 70, 90, 100)) %>%
as.character() %>%
factor(levels = c("(0,50]", "(50,70]", "(70,90]", "(90,100]"))) %>%
plot_tree(higl_node = 21,
angle_in = 168,
color = twisst_clr[["Blue"]],
xlim = c(-.015, .1),
open_angle = 168)
<- pomo_data$tree[[2]] %>%
p_pomo2 midpoint() %>%
::ggtree(layout = "circular") %>%
ggtree$data %>%
.::mutate(support = as.numeric(label),
dplyrsupport_class = cut(support, c(0, 50, 70, 90, 100)) %>%
as.character() %>%
factor(levels = c("(0,50]", "(50,70]", "(70,90]", "(90,100]"))) %>%
plot_tree(higl_node = 27,
color = twisst_clr[["Bars"]],
xlim = c(-.015,.065),
angle_in = 168,
open_angle = 168)
<- pomo_data$tree[[3]] %>%
p_pomo3 midpoint() %>%
::ggtree(layout = "circular") %>%
ggtree$data %>%
.::mutate(support = as.numeric(label),
dplyrsupport_class = cut(support, c(0, 50, 70, 90, 100)) %>%
as.character() %>%
factor(levels = c("(0,50]", "(50,70]", "(70,90]", "(90,100]"))) %>%
plot_tree(higl_node = 28,
color = twisst_clr[["Butter"]],
xlim = c(-.01, .053),
angle_in = 168,
open_angle = 168)
<- list(p_pomo1, p_pomo2, p_pomo3) plot_list
# compose base figure
<- outlier_table %>%
p_single filter(outlier_id %in% outlier_pick) %>%
left_join(region_label_tibbles) %>%
mutate(outlier_nr = row_number(),
text = ifelse(outlier_nr == 1, TRUE, FALSE),
trait = c('Snout', 'Bars', 'Peduncle')) %>%
pmap(plot_curtain, cool_genes = cool_genes, data_tables = data_tables) %>%
c(., plot_list) %>%
::plot_grid(plotlist = ., nrow = 2,
cowplotrel_heights = c(1, .18),
labels = letters[1:length(outlier_pick)] %>% project_case(),
label_size = plot_text_size)
# compile legend
# dummy plot for fst legend
<- outlier_table %>% filter(row_number() == 1) %>%
p_dummy_fst ::pmap(plot_panel_fst) %>%
purrr1]] + guides(color = guide_colorbar(barheight = unit(3, "pt"),
.[[barwidth = unit(100, "pt"),
label.position = "top",
ticks.colour = "black"))
# dummy plot for gxp legend
<- outlier_table %>% filter(row_number() == 1) %>% purrr::pmap(plot_panel_gxp, trait = 'Bars') %>% .[[1]] p_dummy_gxp
# fst legend
<- (p_dummy_fst+theme(legend.position = 'bottom')) %>% get_legend()
p_leg_fst # gxp legend
<- (p_dummy_gxp+theme(legend.position = 'bottom')) %>% get_legend()
p_leg_gxp # create sub-legend 1
<- ((midpoint(pomo_data$tree[[1]]) %>%
p_leg_pomo ggtree(layout = "circular") %>%
$data %>%
.mutate(support = as.numeric(label),
support_class = cut(support, c(0,50,70,90,100)) %>%
as.character() %>%
factor(levels = c("(0,50]", "(50,70]", "(70,90]", "(90,100]")))) %>%
conditional_highlight(tree = .,
higl_node = 21,
highl = FALSE,
support_guide = TRUE) +
theme(text = element_text(size = plot_text_size),
legend.position = "bottom") ) %>%
get_legend()
<- cowplot::plot_grid(p_leg_fst,
p_leg1
p_leg_gxp,
p_leg_pomo,ncol = 1)
# create sub-legend 2 (phylogeny schematics)
<- tibble(spec1 = c('indigo', 'indigo','unicolor'),
p_leg2 spec2 = c('maya', 'puella',NA),
color = twisst_clr %>% unname() %>% darken(.,factor = .8),
mode = c(rep('pair',2),'isolation')) %>%
future_pmap(plot_leg) %>%
::plot_grid(plotlist = .,
cowplotnrow = 1)
# create sub-legend 3
<- cowplot::plot_grid(p_leg1,
p_leg
p_leg2,nrow = 1,
rel_widths = c(.6, 1))
# finalize figure
<- cowplot::plot_grid(p_single, p_leg,
p_done ncol = 1,
rel_heights = c(1, .17))
Finally, we can export Figure 5.
# export figure
hypo_save(plot = p_done,
filename = 'figures/F5.pdf',
width = f_width,
height = f_width * .93,
comment = script_name,
device = cairo_pdf,
bg = "transparent")