library(minSNPs)
#> The minSNPs version loaded is: 0.0.3
library(Biostrings) # optional
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:parallel':
#>
#> clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#> clusterExport, clusterMap, parApply, parCapply, parLapply,
#> parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> Filter, Find, Map, Position, Reduce, anyDuplicated, append,
#> as.data.frame, basename, cbind, colnames, dirname, do.call,
#> duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#> lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#> pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
#> tapply, union, unique, unsplit, which, which.max, which.min
#> Loading required package: S4Vectors
#> Loading required package: stats4
#>
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:base':
#>
#> expand.grid
#> Loading required package: IRanges
#> Loading required package: XVector
#>
#> Attaching package: 'Biostrings'
#> The following object is masked from 'package:base':
#>
#> strsplit
library(BiocParallel) # optional, but needed for parallel processing
read_fasta
is provided as a way to read fasta file, equivalent function, e.g., from Biostrings
and read.fasta
from seqinr
can be used.
isolates_from_biostring <- readBStringSet(
system.file("extdata", "Chlamydia_mapped.fasta", package = "minSNPs"))
isolates_from_default <- read_fasta(
system.file("extdata", "Chlamydia_mapped.fasta", package = "minSNPs"))
processed_from_biostring <- process_allele(isolates_from_biostring)
#> Ignored samples:
#>
#> Ignored 0 positions
processed_from_default <- process_allele(isolates_from_default)
#> Ignored samples:
#>
#> Ignored 0 positions
Subsequent analyses can use output from process_allele
.
cat("High D SNPs\n")
#> High D SNPs
output_result(high_d_snps)
#> Result - 1
#> Position(s) Score
#> "1988" 0.734415584415584
#>
#> Groups
#> T "A_D213, H_S1432, Ia_SotoGIa3, Ia_SotoGIa1, C_UW1, A_7249, A_HAR-13, C_Aus10, H_R31975, A_2497, L3_404, K_SotoGK1, A_363, J_6276, A_5291, C_TW3"
#> G "B_Aus3, L1_440, Ba_Aus25, B_TZ1A828, Ba_Apache2, D_UW-3, Ds_2923, L1_SA16, B_Har36, D_SotoGD6, D_SotoGD5, B_Jali20, D_SotoGD1"
#> A "F_SW5, L2b_CV204, L2_LST, L2b_UCH-1, L2b_795, L1_224, G_11074, F_70, L2b_C1, G_SotoGG1, G_11222, L2b_8200, F_SW4, G_9301, L2b_C2, G_9768, L2_434, F_SotoGF3, L1_115, L2b_UCH-2"
#> C "E_150, E_Bour, E_SotoGE4, E_SW2, E_SW3, E_SotoGE8, E_11023"
#>
#>
#> Additional details
#> Metric: simpson
#> Excluded Positions: ""
#> Excluded Positions From process_allele: ""
#> Included Positions: ""
#> Group of interest: ""
#> All analysed sequences: "A_D213, H_S1432, Ia_SotoGIa3, B_Aus3, Ia_SotoGIa1, C_UW1, F_SW5, L2b_CV204, L1_440, A_7249, Ba_Aus25, L2_LST, B_TZ1A828, A_HAR-13, E_150, E_Bour, C_Aus10, H_R31975, E_SotoGE4, L2b_UCH-1, L2b_795, A_2497, Ba_Apache2, E_SW2, L3_404, L1_224, D_UW-3, G_11074, Ds_2923, F_70, K_SotoGK1, E_SW3, L2b_C1, E_SotoGE8, G_SotoGG1, G_11222, A_363, L1_SA16, L2b_8200, J_6276, F_SW4, G_9301, L2b_C2, A_5291, G_9768, L2_434, F_SotoGF3, C_TW3, E_11023, L1_115, B_Har36, L2b_UCH-2, D_SotoGD6, D_SotoGD5, B_Jali20, D_SotoGD1"
cat("SNPs discriminating against A_D213, H_S1432\n")
#> SNPs discriminating against A_D213, H_S1432
output_result(discriminating_snps)
#> Result - 1
#> Position(s) Score
#> "1806" 0.944444444444444
#>
#> Groups
#> *target* - G "A_D213, H_S1432, C_UW1, C_Aus10, C_TW3"
#> C "Ia_SotoGIa3, Ia_SotoGIa1, A_7249, A_HAR-13, A_2497, D_UW-3, Ds_2923, K_SotoGK1, A_363, J_6276, A_5291, D_SotoGD6, D_SotoGD5, D_SotoGD1"
#> A "B_Aus3, F_SW5, L2b_CV204, L1_440, Ba_Aus25, L2_LST, B_TZ1A828, E_150, E_Bour, H_R31975, E_SotoGE4, L2b_UCH-1, L2b_795, Ba_Apache2, E_SW2, L3_404, L1_224, G_11074, F_70, E_SW3, L2b_C1, E_SotoGE8, G_SotoGG1, G_11222, L1_SA16, L2b_8200, F_SW4, G_9301, L2b_C2, G_9768, L2_434, F_SotoGF3, E_11023, L1_115, B_Har36, L2b_UCH-2, B_Jali20"
#> Residuals: "C_UW1 (G), C_Aus10 (G), C_TW3 (G)"
#>
#>
#> Additional details
#> Metric: percent
#> Excluded Positions: ""
#> Excluded Positions From process_allele: ""
#> Included Positions: ""
#> Group of interest: "A_D213, H_S1432"
#> All analysed sequences: "A_D213, H_S1432, Ia_SotoGIa3, B_Aus3, Ia_SotoGIa1, C_UW1, F_SW5, L2b_CV204, L1_440, A_7249, Ba_Aus25, L2_LST, B_TZ1A828, A_HAR-13, E_150, E_Bour, C_Aus10, H_R31975, E_SotoGE4, L2b_UCH-1, L2b_795, A_2497, Ba_Apache2, E_SW2, L3_404, L1_224, D_UW-3, G_11074, Ds_2923, F_70, K_SotoGK1, E_SW3, L2b_C1, E_SotoGE8, G_SotoGG1, G_11222, A_363, L1_SA16, L2b_8200, J_6276, F_SW4, G_9301, L2b_C2, A_5291, G_9768, L2_434, F_SotoGF3, C_TW3, E_11023, L1_115, B_Har36, L2b_UCH-2, D_SotoGD6, D_SotoGD5, B_Jali20, D_SotoGD1"