# Install NACHO from CRAN:
install.packages("NACHO")
# Or the the development version from GitHub:
# install.packages("remotes")
::install_github("mcanouil/NACHO") remotes
NACHO (NAnostring quality
Control dasHbOard) is
developed for NanoString nCounter data.
NanoString nCounter data is a messenger-RNA/micro-RNA (mRNA/miRNA)
expression assay and works with fluorescent barcodes.
Each barcode is assigned a mRNA/miRNA, which can be counted after
bonding with its target.
As a result each count of a specific barcode represents the presence of
its target mRNA/miRNA.
NACHO is able to load, visualise and normalise the exported
NanoString nCounter data and facilitates the user in performing a
quality control.
NACHO does this by visualising quality control metrics,
expression of control genes, principal components and sample specific
size factors in an interactive web application.
With the use of two functions, RCC files are summarised and
visualised, namely: load_rcc()
and
visualise()
.
load_rcc()
function is used to preprocess the
data.visualise()
function initiates a Shiny-based dashboard that
visualises all relevant QC plots.NACHO also includes a function normalise()
,
which (re)calculates sample specific size factors and normalises the
data.
normalise()
function creates a list in which your
settings, the raw counts and normalised counts are stored.In addition (since v0.6.0) NACHO includes two (three) additional functions:
render()
function renders a full quality-control
report (HTML) based on the results of a call to load_rcc()
or normalise()
(using print()
in a Rmarkdown
chunk).autoplot()
function draws any quality-control
metrics from visualise()
and render()
.For more vignette("NACHO")
and
vignette("NACHO-analysis")
.
Canouil M, Bouland GA, Bonnefond A, Froguel P, Hart L, Slieker R (2019). “NACHO: an R package for quality control of NanoString nCounter data.” Bioinformatics. ISSN 1367-4803, doi:10.1093/bioinformatics/btz647.
@Article{,
title = {{NACHO}: an {R} package for quality control of {NanoString} {nCounter} data},
author = {Mickaël Canouil and Gerard A. Bouland and Amélie Bonnefond and Philippe Froguel and Leen Hart and Roderick Slieker},
journal = {Bioinformatics},
address = {Oxford, England},
year = {2019},
month = {aug},
issn = {1367-4803},
doi = {10.1093/bioinformatics/btz647},
}
library(NACHO)
library(GEOquery, quietly = TRUE, warn.conflicts = FALSE)
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:NACHO':
##
## normalize
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, append,
## as.data.frame, basename, cbind, colnames, dirname, do.call,
## duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
## lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
GSE70970
from GEO (or use your own data)<- file.path(tempdir(), "GSE70970", "Data")
data_directory
# Download data
<- getGEO("GSE70970")
gse ## Found 1 file(s)
## GSE70970_series_matrix.txt.gz
getGEOSuppFiles(GEO = "GSE70970", baseDir = tempdir())
## size
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar 1986560
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz 672
## isdir
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar FALSE
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz FALSE
## mode
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar 644
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz 644
## mtime
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar 2022-05-31 08:51:26
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz 2022-05-31 08:51:26
## ctime
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar 2022-05-31 08:51:26
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz 2022-05-31 08:51:26
## atime
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar 2022-05-31 08:51:23
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz 2022-05-31 08:51:26
## uid
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar 501
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz 501
## gid
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar 20
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz 20
## uname
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar mcanouil
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz mcanouil
## grname
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_RAW.tar staff
## /var/folders/gn/mxv05rj52wd1yg1hb018s4s40000gn/T//RtmpT9FzPf/GSE70970/GSE70970_characteristics_readme.txt.gz staff
# Unzip data
untar(
tarfile = file.path(tempdir(), "GSE70970", "GSE70970_RAW.tar"),
exdir = data_directory
)# Get phenotypes and add IDs
<- pData(phenoData(gse[[1]]))
targets $IDFILE <- list.files(data_directory) targets
<- load_rcc(data_directory, targets, id_colname = "IDFILE")
GSE70970 ## [NACHO] Importing RCC files.
## Error in load_rcc(data_directory, targets, id_colname = "IDFILE"): [NACHO] Multiple Nanostring file/software versions detected.
## Please provide a set of files with the same version.
## - FileVersion: '1.6', '1.6'
## - SoftwareVersion: '2.1.2.3', '2.1.1.0005'
limma
library(limma)
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
<- GSE70970[["nacho"]][
selected_pheno = lapply(unique(.SD), function(x) ifelse(x == "NA", NA, x)),
j = c("IDFILE", "age:ch1", "gender:ch1", "chemo:ch1", "disease.event:ch1")
.SDcols
]## Error in eval(expr, envir, enclos): object 'GSE70970' not found
<- na.exclude(selected_pheno)
selected_pheno ## Error in na.exclude(selected_pheno): object 'selected_pheno' not found
## Error in head(selected_pheno): object 'selected_pheno' not found
<- GSE70970[["nacho"]][
expr_counts = grepl("Endogenous", CodeClass),
i = as.matrix(
j dcast(.SD, Name ~ IDFILE, value.var = "Count_Norm"),
"Name"
),= c("IDFILE", "Name", "Count_Norm")
.SDcols
]## Error in eval(expr, envir, enclos): object 'GSE70970' not found
## Error in eval(expr, envir, enclos): object 'expr_counts' not found
Alternatively, "Accession"
number is also available.
"nacho"]][
GSE70970[[= grepl("Endogenous", CodeClass),
i = as.matrix(
j dcast(.SD, Accession ~ IDFILE, value.var = "Count_Norm"),
"Accession"
),= c("IDFILE", "Accession", "Count_Norm")
.SDcols ]
<- intersect(selected_pheno[["IDFILE"]], colnames(expr_counts))
samples_kept ## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'intersect': object 'selected_pheno' not found
<- expr_counts[, samples_kept]
expr_counts ## Error in eval(expr, envir, enclos): object 'expr_counts' not found
<- selected_pheno[IDFILE %in% c(samples_kept)]
selected_pheno ## Error in eval(expr, envir, enclos): object 'selected_pheno' not found
<- model.matrix(~ `disease.event:ch1`, selected_pheno)
design ## Error in terms.formula(object, data = data): object 'selected_pheno' not found
limma
eBayes(lmFit(expr_counts, design))
## Error in lmFit(expr_counts, design): object 'expr_counts' not found
lm
(or any other model)"nacho"]][
GSE70970[[= grepl("Endogenous", CodeClass),
i = lapply(unique(.SD), function(x) ifelse(x == "NA", NA, x)),
j = c(
.SDcols "IDFILE", "Name", "Accession", "Count", "Count_Norm",
"age:ch1", "gender:ch1", "chemo:ch1", "disease.event:ch1"
)
][%in% head(unique(Name), 10)
Name
][= as.data.table(
j coef(summary(lm(
formula = Count_Norm ~ `disease.event:ch1`,
data = na.exclude(.SD)
))),"term"
),= c("Name", "Accession")
by
]## Error in eval(expr, envir, enclos): object 'GSE70970' not found