Here we quickly show how the data objects of the example data set can be reproduced.
library("recount3")
library("DESeq2")
library("dplyr")
library("stringr")
library("SummarizedExperiment")
library("tidyr")
library("tibble")
library("magrittr")
# download data recount3
<- available_projects() %>%
proj_info filter(project == "SRP093386")
<- create_rse(proj_info)
se
<- assay(se)
count_mat rownames(count_mat) <- str_replace(rownames(count_mat), "\\.[:number:]+$", "")
<- colData(se) %>%
meta as_tibble() %>%
separate(sra.sample_title, into = c("cell_line", "treatment", "mutation", "replicate"), sep = "-") %>%
select(cell_line, treatment, mutation, replicate)
colnames(count_mat) <- paste0(meta$treatment, "_", meta$mutation, "_", meta$replicate)
# subset to cell line T47D
<- count_mat[, meta$cell_line == "T47D"]
count_mat <- meta[meta$cell_line == "T47D", c("treatment", "mutation", "replicate")]
meta
<- make_dds(count_mat, meta, ah_record = "AH89426")
T47D <- T47D[rowSums(assay(T47D))>0,]
T47D
# round some numeric data to reduce the size of the data object
rowData(T47D)$gc_content <- round(rowData(T47D)$gc_content,1)
<- T47D
dds <- filter_genes(dds, min_count = 5, min_rep = 4)
dds $mutation <- as.factor(dds$mutation)
dds$treatment <- as.factor(dds$treatment)
ddsdesign(dds) <- ~ mutation + treatment
# to not run DESeq2 in the main vignette,
# wo pre-compute the dispersion plot and diff testing results
<- DESeq(dds, parallel=T)
dds
png(filename="disp_ests.png", width=7, height=5, units="in", res=200)
plotDispEsts(dds)
dev.off()
<- lfcShrink(dds, coef = "mutation_WT_vs_D538G", lfcThreshold = log2(1.5), type = "normal", parallel = TRUE)
T47D_diff_testing $stat <- NULL
T47D_diff_testing$lfcSE <- NULL
T47D_diff_testing$pvalue <- NULL T47D_diff_testing