<- FALSE # only save PDFs on a local machine
save_pdf <- 10 # modify to get around GitHub actions "Killed"
sequence_scaling
suppressPackageStartupMessages({
library(slendr)
library(dplyr)
library(ggplot2)
library(purrr)
library(tidyr)
library(cowplot)
library(forcats)
})
<- 42
SEED set.seed(SEED)
# placeholder "figure" where a code chunk will be pasted
<- ggplot() +
p_code geom_text(aes(x = 1, y = 1), label = "code will be here") +
theme_void()
<- population("o", time = 1, N = 100)
o <- population("c", time = 2500, N = 100, parent = o)
c <- population("a", time = 2800, N = 100, parent = c)
a <- population("b", time = 3700, N = 100, parent = a)
b <- population("x1", time = 4000, N = 15000, parent = c)
x1 <- population("x2", time = 4300, N = 15000, parent = x1)
x2
<- gene_flow(from = b, to = x1, start = 5400, end = 5800, 0.1)
gf
<- compile_model(
model populations = list(o, a, b, c, x1, x2), gene_flow = gf,
generation_time = 1, simulation_length = 6000
)
plot_model(model, sizes = FALSE, proportions = TRUE)
<- msprime(model, sequence_length = 100e6 / sequence_scaling, recombination_rate = 1e-8,
ts random_seed = SEED) %>%
ts_mutate(mutation_rate = 1e-8)
<- ts_samples(ts) %>% group_by(pop) %>% sample_n(100)
samples
<- ts_divergence(ts, split(samples$name, samples$pop))
divergence
<- ts_f4ratio(
f4ratio X = filter(samples, pop %in% c("x1", "x2"))$name,
ts, A = "a_1", B = "b_1", C = "c_1", O = "o_1"
)
<- slim(model, sequence_length = 100e6 / sequence_scaling, recombination_rate = 1e-8, random_seed = SEED) %>%
ts_slim ts_mutate(mutation_rate = 1e-8)
<- ts_divergence(ts_slim, split(samples$name, samples$pop))
divergence_slim
<- ts_f4ratio(
f4ratio_slim X = filter(samples, pop %in% c("x1", "x2"))$name,
ts_slim, A = "a_1", B = "b_1", C = "c_1", O = "o_1"
)
<- bind_rows(
divergence_both %>% mutate(backend = "msprime"),
divergence %>% mutate(backend = "SLiM")
divergence_slim %>%
) mutate(pair = paste(x, "-", y))
<- bind_rows(
f4ratio_both %>% mutate(backend = "msprime"),
f4ratio %>% mutate(backend = "SLiM")
f4ratio_slim %>% mutate(population = gsub("^(.*)_.*$", "\\1", X), alpha = alpha * 100)
)
<- divergence_both %>%
p_ex1_divergence ggplot(aes(fct_reorder(pair, divergence), color = backend, shape = backend, divergence)) +
geom_point(size = 3) +
xlab("population pair") + ylab("pairwise divergence") +
theme_minimal() +
scale_alpha_manual(values = c(1, 0.25)) +
scale_color_manual(values = c("black", "darkgray")) +
guides(shape = guide_legend("slendr simulation engine used:"),
color = guide_legend("slendr simulation engine used:",
override.aes = list(size = 3))) +
theme(legend.position = "bottom",
legend.text = element_text(size = 10),
axis.text.x = element_text(hjust = 1, angle = 45, size = 9),
axis.title.x = element_blank())
<- f4ratio_both %>%
p_ex1_f4ratio ggplot(aes(population, alpha)) +
geom_hline(yintercept = 0, linetype = 2) +
geom_jitter(aes(color = backend, shape = backend), alpha = 0.75,
position = position_jitterdodge(jitter.width = 0.25)) +
ylab(base::expression(italic("f")[4]~"-ratio ancestry proportion [%]")) +
scale_color_manual(values = c("black", "darkgray")) +
theme_minimal() +
coord_cartesian(ylim = c(0, 20)) +
theme(legend.position = "none",
axis.text.x = element_text(size = 11),
axis.title.x = element_blank(),
panel.grid.major.x = element_blank())#; p_ex3_f4ratio
# let's avoid ggpubr as another dependency:
# https://github.com/kassambara/ggpubr/blob/master/R/as_ggplot.R#L27
<- ggdraw() + draw_grob(grid::grobTree(get_legend(p_ex1_divergence)))
p_ex1_legend
<- plot_model(model, sizes = FALSE, proportions = TRUE)
p_ex1_model
<- plot_grid(
p_ex1
p_code,plot_grid(
p_ex1_model,plot_grid(
+ theme(legend.position = "none"),
p_ex1_divergence
p_ex1_f4ratio,ncol = 2, rel_widths = c(1, 0.8), labels = c("C", "D")
),nrow = 3, rel_heights = c(1, 1, 0.1),
p_ex1_legend, labels = "B"
),nrow = 1, labels = c("A", "")
)
p_ex1
<- world(xrange = c(0, 10), yrange = c(0, 10),
map landscape = region(center = c(5, 5), radius = 5))
<- population("pop1", time = 1, N = 2000, map = map, competition = 0)
p1 <- population("pop2", time = 1, N = 2000, map = map, competition = 9)
p2 <- population("pop3", time = 1, N = 2000, map = map, competition = 6)
p3 <- population("pop4", time = 1, N = 2000, map = map, competition = 5)
p4 <- population("pop5", time = 1, N = 2000, map = map, competition = 4)
p5 <- population("pop6", time = 1, N = 2000, map = map, competition = 3)
p6 <- population("pop7", time = 1, N = 2000, map = map, competition = 2)
p7 <- population("pop8", time = 1, N = 2000, map = map, competition = 1)
p8
<- compile_model(
model populations = list(p1, p2, p3, p4, p5, p6, p7, p8),
generation_time = 1, simulation_length = 5000, resolution = 0.1,
mating = 0.1, dispersal = 0.05
)
<- slim(model, sequence_length = 10e6 / sequence_scaling, recombination_rate = 1e-8, verbose = TRUE) %>%
ts ts_simplify() %>%
ts_mutate(mutation_rate = 1e-7)
<- ts_nodes(ts) %>% filter(time == max(time))
locations
<- ts_samples(ts) %>%
heterozygosity group_by(pop) %>%
sample_n(100) %>%
mutate(pi = ts_diversity(ts, name)$diversity)
<- ts_nodes(ts) %>% filter(time == max(time))
locations
<- ggplot() +
p_ex2_clustering geom_sf(data = map) +
geom_sf(data = locations, aes(color = pop), size = 0.05, alpha = 0.25) +
facet_grid(. ~ pop, switch = "x") +
xlab("spatial distributions emerged in the simulation") +
theme(
strip.background = element_blank(),
strip.text = element_text(size = 11),
panel.grid = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
panel.background = element_blank()
+
) guides(color = "none")
<- ggplot(heterozygosity, aes(pop, pi, color = pop)) +
p_ex2_diversity geom_violin(color = "black") +
geom_jitter(alpha = 0.5) +
labs(y = "individual heterozygosity") +
guides(color = "none") +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(), panel.grid.major.x = element_blank(),
plot.margin = margin(t = 0.2, r = 0.2, b = -0.1, l = 0.2, "cm"))
<- plot_grid(
p_ex2
p_code,plot_grid(
p_ex2_diversity,+
p_ex2_clustering theme(plot.margin = margin(t = 0, r = 0.4, b = 0, l = 1.8, "cm")),
nrow = 2,
rel_heights = c(1, 0.5),
labels = c("B", "C")
),nrow = 2, labels = c("A", ""), rel_heights = c(1.5, 1)
)
p_ex2
<- world(xrange = c(-15, 60), yrange = c(20, 65), crs = 3035)
map
<- region(
R1 "EHG range", map,
polygon = list(c(26, 55), c(38, 53), c(48, 53), c(60, 53),
c(60, 60), c(48, 63), c(38, 63), c(26, 60))
)<- region(
R2 "Europe", map,
polygon = list(
c(-8, 35), c(-5, 36), c(10, 38), c(20, 35), c(25, 35),
c(33, 45), c(20, 58), c(-5, 60), c(-15, 50)
)
)<- region(
R3 "Anatolia", map,
polygon = list(c(28, 35), c(40, 35), c(42, 40),
c(30, 43), c(27, 40), c(25, 38))
)<- join(R2, R3)
R4 <- region(
R5 "YAM range", map,
polygon = list(c(26, 50), c(38, 49), c(48, 50),
c(48, 56), c(38, 59), c(26, 56))
)
<- list(c(40, 30), c(50, 30), c(60, 40), c(45, 55)) ooa_trajectory
<- world(xrange = c(-15, 60), yrange = c(20, 65), crs = 3035)
map
<- population(
ooa "OOA", time = 50000, N = 500, remove = 23000,
map = map, center = c(33, 30), radius = 400e3
%>%
) move(trajectory = ooa_trajectory, start = 50000, end = 40000, snapshots = 30)
<- population(
ehg "EHG", time = 28000, N = 1000, parent = ooa, remove = 6000,
map = map, polygon = R1
)
<- population(
eur "EUR", time = 30000, N = 2000, parent = ooa,
map = map, polygon = R2
%>%
) resize(N = 10000, time = 5000, end = 0, how = "exponential")
<- population(
ana "ANA", time = 25000, N = 4000, parent = ooa, remove = 3000,
map = map, polygon = R3
%>%
) expand_range(by = 3e6, start = 10000, end = 7000, polygon = R4, snapshots = 15)
<- population(
yam "YAM", time = 7000, N = 600, parent = ehg, remove = 2500,
map = m, polygon = R5
%>%
) move(trajectory = list(c(15, 50)), start = 5000, end = 3000, snapshots = 10)
<- list(
gf gene_flow(ana, to = yam, rate = 0.5, start = 6500, end = 5000),
gene_flow(ana, to = eur, rate = 0.6, start = 8000, end = 6000),
gene_flow(yam, to = eur, rate = 0.7, start = 3500, end = 3000)
)
<- compile_model(
model populations = list(ooa, ehg, eur, ana, yam), gene_flow = gf,
generation_time = 30, resolution = 10e3,
competition = 130e3, mating = 100e3,
dispersal = 70e3,
)
<- schedule_sampling(
samples times = seq(0, 50000, by = 1000),
model, list(ehg, 20), list(ana, 20), list(yam, 20), list(eur, 20)
)
plot_model(model, sizes = FALSE)
plot_map(model)
<- slim(
ts burnin = 200000, samples = samples, random_seed = SEED,
model, sequence_length = 200000, recombination_rate = 1e-8
)
<- plot_map(model) +
p_map theme(legend.position = "bottom") +
guides(alpha = "none")
<- plot_grid(
p_ex3
p_code,plot_grid(
plot_model(model, sizes = FALSE),
p_map,labels = c("B", "C"), nrow = 2, rel_heights = c(1, 1)
),ncol = 2, labels = c("A", ""), rel_widths = c(1, 1.2)
)
p_ex3
<- ts_simplify(ts, c("EUR_599", "ANA_322", "EHG_7",
ts_small "EUR_578", "EUR_501", "YAM_30"))
<- ts_phylo(ts_small, i = 10)
tree #> Starting checking the validity of tree...
#> Found number of tips: n = 12
#> Found number of nodes: m = 11
#> Done.
<- ts_nodes(tree)
nodes <- ts_edges(tree)
edges
<- ts_ancestors(ts, "EUR_599") ancestors
library(ggtree)
# prepare annotation table for ggtree linking R phylo node ID (not tskit integer
# ID!) of each node with its population name
<- as_tibble(nodes) %>% select(node = phylo_id, pop)
df
<- as_tibble(nodes) %>% dplyr::filter(name == "EUR_599") %>% .$phylo_id
highlight_nodes <- ggtree(tree, aes(color = pop, fill = pop)) %<+% df +
p_tree geom_tiplab(align = TRUE, geom = "label", offset = 2000,
color = "white", fontface = "bold", size = 3) +
geom_tiplab(align = TRUE, geom = NULL, linetype = "dotted", size = 0) +
geom_point2(aes(subset = (node %in% highlight_nodes)), color = "black", size = 3) +
geom_label2(aes(label = label, subset = !isTip),
color = "black", size = 3) +
theme_tree2() +
theme(legend.position = "none") +
xlab("time before present [years ago]") +
scale_x_continuous(limits = c(-55000, 22000), labels = abs,
breaks = -c(60, 40, 20, 0) * 1000)
<- revts(p_tree)
p_tree
<- ggplot() +
p_map geom_sf(data = map) +
geom_sf(data = edges, aes(color = parent_pop), size = 0.5) +
geom_sf(data = filter(nodes, is.na(name)),
aes(color = pop, shape = pop), size = 5) +
geom_sf_label(data = nodes[!nodes$sampled, ],
aes(label = node_id, fill = pop), size = 3) +
geom_sf_label(data = nodes[nodes$sampled, ],
aes(label = node_id, fill = pop), size = 3,
fontface = "bold", color = "white") +
coord_sf(xlim = c(4047066.1, 8688656.9),
ylim = c(757021.7, 4972983.3), expand = 0) +
labs(x = "longitude", y = "latitude") +
guides(fill = guide_legend("", override.aes = aes(label = ""))) +
guides(color = "none", shape = "none") +
theme_bw() +
theme(legend.position = "bottom")
<- stats::setNames(
chrom_names c("EUR_599 (chromosome 1, node 10)", "EUR_599 (chromosome 2, node 11)"),
unique(ancestors$node_id)
)
<- ggplot() +
p_ancestors geom_sf(data = map) +
geom_sf(data = ancestors, size = 0.5, aes(alpha = parent_time)) +
geom_sf(data = sf::st_set_geometry(ancestors, "parent_location"),
aes(shape = parent_pop, color = parent_pop)) +
geom_sf(data = filter(ts_nodes(ts), name == "EUR_599"), size = 3) +
coord_sf(expand = 0) +
labs(x = "longitude", y = "latitude") +
theme_bw() +
facet_grid(. ~ node_id, labeller = labeller(node_id = chrom_names)) +
theme(legend.position = "none")
<- ggdraw() + draw_grob(grid::grobTree(get_legend(p_map)))
p_legend
<- plot_grid(
p_ex4
p_code,plot_grid(p_tree + theme(legend.position = "none"),
+ theme(legend.position = "none"),
p_map labels = c("B", "C")),
p_ancestors,
p_legend,labels = c("A", "", "D", ""),
nrow = 4, rel_heights = c(0.5, 1, 1, 0.1)
)
p_ex4