Load example DO data from web. For convenience, we attach package ‘ggplot2’ for the autoplot
function. Functions from ‘qtl2’ are explicitly referenced with prefix qtl2::
.
library(qtl2ggplot)
library(ggplot2)
Download ‘qtl2’ cross2
object.
<-
DOex ::read_cross2(
qtl2file.path(
"https://raw.githubusercontent.com/rqtl",
"qtl2data/master/DOex",
"DOex.zip"))
With multiple alleles, it is useful to examine an additive allele model. Download pre-calculated allele probabilities (~5 MB) as follows:
<- tempfile()
tmpfile <- paste0("https://raw.githubusercontent.com/rqtl/",
file "qtl2data/master/DOex/DOex_alleleprobs.rds")
download.file(file, tmpfile)
<- readRDS(tmpfile)
apr unlink(tmpfile)
Alternatively, calculate these directly.
<- qtl2::calc_genoprob(DOex, error_prob=0.002)
pr <- qtl2::genoprob_to_alleleprob(pr) apr
Genome allele scan.
<- qtl2::scan1(apr, DOex$pheno) scan_apr
Summary of peaks.
::find_peaks(scan_apr, DOex$pmap) qtl2
## lodindex lodcolumn chr pos lod
## 1 1 OF_immobile_pct 2 96.84223 10.173313
## 2 1 OF_immobile_pct 3 15.02006 5.971503
## 3 1 OF_immobile_pct X 74.57257 6.939151
New summary method:
summary(scan_apr, DOex$pmap)
## # A tibble: 3 × 5
## pheno chr pos lod marker
## <chr> <fct> <dbl> <dbl> <chr>
## 1 OF_immobile_pct 2 96.8 10.2 backupUNC020000070
## 2 OF_immobile_pct 3 15.0 5.97 backupUNC030729939
## 3 OF_immobile_pct X 74.6 6.94 UNC200000454
The basic plot of genome scan,
plot(scan_apr, DOex$pmap)
and the grammar of graphics (ggplot2
) version.
autoplot(scan_apr, DOex$pmap)
Subset to chr 2.
<- DOex[,"2"]
DOex <- subset(apr, chr = "2") apr
<- qtl2::scan1(apr, DOex$pheno) scan_apr
::find_peaks(scan_apr, DOex$pmap) qtl2
## lodindex lodcolumn chr pos lod
## 1 1 OF_immobile_pct 2 96.84223 10.17331
plot(scan_apr, DOex$pmap)
autoplot(scan_apr, DOex$pmap)
<- qtl2::scan1coef(apr, DOex$pheno) coefs
New summary
method:
summary(coefs, scan_apr, DOex$pmap)
## # A tibble: 1 × 14
## pheno chr pos lod marker A B C D E F G
## <chr> <fct> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 OF_immo… 2 96.8 10.2 backupUN… -6.93 -3.80 -6.27 -5.97 -30.1 -14.3 -3.87
## # … with 2 more variables: H <dbl>, intercept <dbl>
plot(coefs, DOex$pmap, 1:8, col = qtl2::CCcolors)
autoplot(coefs, DOex$pmap)
Plot allele effects over LOD scan.
plot(coefs, DOex$pmap, 1:8, col = qtl2::CCcolors, scan1_output = scan_apr)
autoplot(coefs, DOex$pmap, scan1_output = scan_apr,
legend.position = "none")
Examine just some of the founder effects, without centering.
plot(coefs, DOex$pmap, c(5,8), col = qtl2::CCcolors[c(5,8)])
autoplot(coefs, DOex$pmap, c(5,8))
autoplot(coefs, DOex$pmap, c(5,8), facet = "geno")
plot(coefs, DOex$pmap, 4:5, col = qtl2::CCcolors[4:5], scan1_output = scan_apr)
autoplot(coefs, DOex$pmap, 4:5, scan1_output = scan_apr, legend.position = "none")
For SNP association mapping, be sure to use the genotype allele pair probabilities pr
rather than the additive model allele probabilities apr
. Download pre-calculated genotype probabilities (~19 MB) and subset to Chr 2.
<- tempfile()
tmpfile <- paste0("https://raw.githubusercontent.com/rqtl/",
file "qtl2data/master/DOex/DOex_genoprobs.rds")
download.file(file, tmpfile)
<- readRDS(tmpfile)
pr unlink(tmpfile)
<- subset(pr, chr = "2") pr
Or, alternatively, calculate directly using the subsetted DOex
.
<- qtl2::calc_genoprob(DOex, error_prob=0.002) pr
Download SNP information from web.
<- file.path("https://raw.githubusercontent.com/rqtl",
filename "qtl2data/master/DOex",
"c2_snpinfo.rds")
<- tempfile()
tmpfile download.file(filename, tmpfile, quiet=TRUE)
<- readRDS(tmpfile)
snpinfo unlink(tmpfile)
Or alternatively, use query
function approach.
<- system.file("extdata", "cc_variants_small.sqlite", package="qtl2")
snpdb_file <- qtl2::create_variant_query_func(snpdb_file)
query_variant <- query_variant("2", 96.5, 98.5) snpinfo
The SNP routines in ‘qtl2ggplot’ can distinguish SNP variants artificially add type
to snpinfo
with about 20% DEL
to show how variants get plotted.
<- c("snp","indel","SV","INS","DEL","INV")
variants $type <-
snpinfofactor(
sample(
c(sample(variants[-1], 5000, replace = TRUE),
rep("snp", nrow(snpinfo) - 5000))),
variants)
Perform SNP association mapping. It is possible to use qtl2::scan1snps
instead, which bundles these three routines, but we want to have the SNP probabilities for later use.
<- qtl2::index_snps(DOex$pmap, snpinfo)
snpinfo <- qtl2::genoprob_to_snpprob(pr, snpinfo)
snppr <- qtl2::scan1(snppr, DOex$pheno) scan_snppr
Plot results.
plot(scan_snppr, snpinfo, drop_hilit = 1.5)
autoplot(scan_snppr, snpinfo, drop_hilit = 1.5)
Plot just subset of distinct SNPs
plot(scan_snppr, snpinfo, show_all_snps=FALSE, drop_hilit = 1.5)
autoplot(scan_snppr, snpinfo, show_all_snps=FALSE, drop_hilit = 1.5)
Highlight the top snps (with LOD within 1.5 of max). Show as open circles of size 1.
plot(scan_snppr, snpinfo, drop_hilit=1.5, cex=1, pch=1)
autoplot(scan_snppr, snpinfo, drop_hilit=1.5, cex=2)
SNP assocation mapping is more useful with plots that emphasized the strain distribution pattern (SDP), which separate out SNPs based on their SDP and plot the top patterns. For instance sdp = 52
corresponds to pattern ABDGH:CEF
. That is, the SNP genotype "AA"
resulting from qtl2::genoprob_to_snpprob
applied to pr
corresponds to any of the 36 allele pairs with the two alleles drawn from the reference (ref
) set of ABDGH
(15 pairs: AA, AB, AD, AG, AH, BB, BD, BG, BH, DD, DG, DH, GG, GH, HH
), "BB"
has two alleles from the alternate (alt
) set CEF
(6 pairs: CC, CE, CF, EE, EF, FF
), and "AB"
has one from each for the heterogeneous (het
) set (15 pairs: AC, AE, ..., HF
). There are 255 possible sdp
s, but only a few (4 in our example) that need be examined carefully. One can think of these as a subset of markers for genome scan, where interest is only in those SNPS following a particular sdp
; as with genome scans, we can fill in for missing data. That is, only a few SNPs may show a particular pattern, but key differences might be seen nearby if we impute SNPs of the same pattern. Here we highlight SDPs in SNPs within 3 of max; connect with lines.
autoplot(scan_snppr, snpinfo, patterns="all", drop_hilit=3, cex=2)
Highlight only top SDP patterns in SNPs.
autoplot(scan_snppr, snpinfo, patterns="hilit", drop_hilit=3, cex=2)
Looking at all SNPS is more useful than just focusing on mapped SNPs.
autoplot(scan_snppr, snpinfo, patterns="hilit", drop_hilit=3, cex=2,
show_all_snps = FALSE)
Download Gene info for DOex from web via RDS.
<- file.path("https://raw.githubusercontent.com/rqtl",
filename "qtl2data/master/DOex",
"c2_genes.rds")
<- tempfile()
tmpfile download.file(filename, tmpfile, quiet=TRUE)
<- readRDS(tmpfile)
gene_tbl unlink(tmpfile)
Or alternatively use query
function approach.
<- system.file("extdata", "mouse_genes_small.sqlite", package="qtl2")
dbfile <- qtl2::create_gene_query_func(dbfile, filter="(source=='MGI')")
query_genes <- query_genes("2", 96.5, 98.5) gene_tbl
Plot genes. These can be aligned with the SNP association map or SDP scans.
::plot_genes(gene_tbl, xlim = c(96,99)) qtl2
ggplot_genes(gene_tbl)
Plot routines (except scan patterns for now) can accommodate multiple phenotypes. At present, it is best to stick to under 10. In the preambl of this document, a second phenotype, asin
, was artifically created for illustration purposes.
Create artificial second phenotype as arcsic sqrt of first one.
$pheno <- cbind(DOex$pheno,
DOexasin = asin(sqrt(DOex$pheno[,1] / 100)))
Redo genome allele scans on both phenotypes.
<- qtl2::scan1(apr, DOex$pheno) scan_apr
::find_peaks(scan_apr, DOex$pmap) qtl2
## lodindex lodcolumn chr pos lod
## 1 1 OF_immobile_pct 2 96.84223 10.173313
## 2 2 asin 2 96.84223 9.422816
Similar summary using new summary
method:
summary(scan_apr, DOex$pmap)
## # A tibble: 2 × 5
## pheno chr pos lod marker
## <chr> <fct> <dbl> <dbl> <chr>
## 1 OF_immobile_pct 2 96.8 10.2 backupUNC020000070
## 2 asin 2 96.8 9.42 backupUNC020000070
plot(scan_apr, DOex$pmap, 1)
plot(scan_apr, DOex$pmap, 2, add = TRUE, col = "red")
autoplot(scan_apr, DOex$pmap, 1:2)
autoplot(scan_apr, DOex$pmap, 1:2, facet="pheno", scales = "free_x", shape = "free_x")
Redo SNP scans on both phenotypes.
<- qtl2::scan1(snppr, DOex$pheno) scan_snppr
Using new summary
method. The summary includes a range (min
and max
) for pos
, as there could be multiple SNPs across a range of positions.
summary(scan_snppr, DOex$pmap, snpinfo)
## # A tibble: 8 × 7
## pheno max_pos min_pos lod sdp pattern snp_id
## <chr> <dbl> <dbl> <dbl> <int> <chr> <chr>
## 1 OF_immobile_pct 97.0 96.9 7.31 52 ABDGH:CEF 3 SNPs
## 2 OF_immobile_pct 96.8 96.8 7.04 48 ABCDGH:EF 12 SNPs
## 3 asin 96.8 96.8 6.55 48 ABCDGH:EF 12 SNPs
## 4 asin 97.0 96.9 6.49 52 ABDGH:CEF 3 SNPs
## 5 OF_immobile_pct 98.2 96.9 5.99 16 ABCDFGH:E 25 SNPs
## 6 OF_immobile_pct 96.9 96.9 5.97 20 ABDFGH:CE 9 SNPs
## 7 asin 97.0 96.9 5.56 56 ABCGH:DEF 88 SNPs
## 8 asin 97.0 96.9 5.32 60 ABGH:CDEF 69 SNPs
Plot results.
plot(scan_snppr, snpinfo, lodcolumn=1, cex=1, pch=1, drop_hilit = 1.5)
plot(scan_snppr, snpinfo, lodcolumn=2, cex=1, pch=1, drop_hilit = 1.5)
autoplot(scan_snppr, snpinfo, 1:2, facet="pheno",
drop_hilit = 1.5)
plot(scan_snppr, snpinfo, lodcolumn=1, cex=1, pch=1,
show_all_snps = FALSE, drop_hilit = 1.5)
plot(scan_snppr, snpinfo, lodcolumn=2, cex=1, pch=1,
show_all_snps = FALSE, drop_hilit = 1.5)
autoplot(scan_snppr, snpinfo, 1:2, show_all_snps = FALSE, facet="pheno",
cex=2, drop_hilit = 1.5)
Note that in the autoplot
(using qtl2ggplot
), the hilit
points for the second trait are fewer than with the plot
(using package ‘qtl2’). This is because the maxlod
for the faceted autoplot
is across both traits, and the other points for the second trait are too low.
autoplot(scan_snppr, snpinfo, 2, show_all_snps = FALSE, facet="pheno",
cex=2, drop_hilit = 1.5)
Distinguish high values by color but leave others gray.
autoplot(scan_snppr, snpinfo, 1:2,show_all_snps = FALSE,
facet_var = "pheno", drop_hilit = 2,
col=8, col_hilit=1:2, cex=2) +
geom_hline(yintercept = max(scan_snppr) - 2, col = "darkgrey", linetype = "dashed")
### SDP scans for multiple phenotypes
autoplot(scan_snppr, snpinfo, 2, patterns = "all",
cex=2, drop_hilit=2)
autoplot(scan_snppr, snpinfo, 1:2, patterns = "all", cex=2,
facet = "pheno", drop_hilit=3)
autoplot(scan_snppr, snpinfo, 1:2, patterns = "hilit", cex=2,
drop_hilit=3, facet = "pheno", scales = "free")
autoplot(scan_snppr, snpinfo, 1:2, patterns = "hilit",
show_all_snps = TRUE, cex=2,
drop_hilit=3, facet = "pattern")
<- qtl2::find_peaks(scan_apr, DOex$pmap, drop = 1.5)) (peaks
## lodindex lodcolumn chr pos lod ci_lo ci_hi
## 1 1 OF_immobile_pct 2 96.84223 10.173313 91.60175 103.8778
## 2 2 asin 2 96.84223 9.422816 91.60175 103.8778
::plot_peaks(peaks, DOex$pmap) qtl2
ggplot_peaks(peaks, DOex$pmap)
<- listof_scan1coef(apr, DOex$pheno, center = TRUE) out
New summary method:
summary(out, scan_apr, DOex$pmap)
## # A tibble: 2 × 14
## pheno chr pos lod marker A B C D E F
## <chr> <fct> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 OF_im… 2 96.8 10.2 backup… -7.49 -4.36 -6.83 -6.53 -30.6 -14.8
## 2 asin 2 96.8 9.42 backup… -0.108 -0.0661 -0.0936 -0.109 -0.392 -0.209
## # … with 3 more variables: G <dbl>, H <dbl>, intercept <dbl>
::autoplot(out, DOex$pmap, scales = "free") ggplot2
summary(out, scan_apr, DOex$pmap)
## # A tibble: 2 × 14
## pheno chr pos lod marker A B C D E F
## <chr> <fct> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 OF_im… 2 96.8 10.2 backup… -7.49 -4.36 -6.83 -6.53 -30.6 -14.8
## 2 asin 2 96.8 9.42 backup… -0.108 -0.0661 -0.0936 -0.109 -0.392 -0.209
## # … with 3 more variables: G <dbl>, H <dbl>, intercept <dbl>
This last section shows some very noisy images of coefficients for the 36 allele pairs. Generally, these will not be useful unless the cross is quite large. See also package ‘qtl2pattern’.
QTL effects for 36 allele pair model. Note that they are quite unstable, and the 36 allele pair max LOD is far from the peak for the additive (haplotype) model. Only showing effects with at least one E
allele. Plots are truncated at +/-100 for viewability. Note also that ‘qtl2ggplot’ routines have some centering built in.
Find coefficients for 36 allele pair genome scan.
<- qtl2::scan1coef(pr, DOex$pheno) coefs36
## Warning in qtl2::scan1coef(pr, DOex$pheno): Considering only the first
## phenotype.
All 36 allele pair QTL effects.
plot(coefs36, DOex$pmap, 1:36, col = 1:36, ylim=c(-100,100))
autoplot(coefs36, DOex$pmap, ylim=c(-100,100), colors = NULL, legend.position = "none")
The autoplot
is centered by default (so mean across all alleles is mean of trait) to make coefficient plots easier to view. This can be turned off with the hidden center
option.
autoplot(coefs36, DOex$pmap, ylim=c(-100,100), center = FALSE,
colors = NULL, legend.position = "none")
Only 8 allele pair QTL effects that contain E
.
<- qtl2ggplot:::modify_object(coefs36,
tmp ::str_detect(dimnames(coefs36)[[2]], "E")])
coefs36[, stringrautoplot(tmp, DOex$pmap, ylim=c(-100,100))