MinSNPs_Workflow

library(minSNPs) #> The minSNPs version loaded is: 0.0.3 library(Biostrings) # optional #> Loading required package: BiocGenerics #> Loading required package: parallel #> #> Attaching package: 'BiocGenerics' #> The following objects are masked from 'package:parallel': #> #> clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, #> clusterExport, clusterMap, parApply, parCapply, parLapply, #> parLapplyLB, parRapply, parSapply, parSapplyLB #> The following objects are masked from 'package:stats': #> #> IQR, mad, sd, var, xtabs #> The following objects are masked from 'package:base': #> #> Filter, Find, Map, Position, Reduce, anyDuplicated, append, #> as.data.frame, basename, cbind, colnames, dirname, do.call, #> duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted, #> lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin, #> pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table, #> tapply, union, unique, unsplit, which, which.max, which.min #> Loading required package: S4Vectors #> Loading required package: stats4 #> #> Attaching package: 'S4Vectors' #> The following object is masked from 'package:base': #> #> expand.grid #> Loading required package: IRanges #> Loading required package: XVector #> #> Attaching package: 'Biostrings' #> The following object is masked from 'package:base': #> #> strsplit library(BiocParallel) # optional, but needed for parallel processing

Reading & processing input for further analysis

read_fasta is provided as a way to read fasta file, equivalent function, e.g., from Biostrings and read.fasta from seqinr can be used.

isolates_from_biostring <- readBStringSet(
  system.file("extdata", "Chlamydia_mapped.fasta", package = "minSNPs"))
isolates_from_default <- read_fasta(
  system.file("extdata", "Chlamydia_mapped.fasta", package = "minSNPs"))
processed_from_biostring <- process_allele(isolates_from_biostring)
#> Ignored samples: 
#>  
#> Ignored  0  positions
processed_from_default <- process_allele(isolates_from_default)
#> Ignored samples: 
#>  
#> Ignored  0  positions

Subsequent analyses can use output from process_allele.

discriminating_snps <- find_optimised_snps(seqc = processed_from_biostring, metric = "percent", number_of_result = 1, max_depth = 1, included_positions = c(), excluded_positions = c(), goi = c("A_D213", "H_S1432"))

cat("High D SNPs\n") #> High D SNPs output_result(high_d_snps) #> Result - 1 #> Position(s) Score #> "1988" 0.734415584415584 #> #> Groups #> T "A_D213, H_S1432, Ia_SotoGIa3, Ia_SotoGIa1, C_UW1, A_7249, A_HAR-13, C_Aus10, H_R31975, A_2497, L3_404, K_SotoGK1, A_363, J_6276, A_5291, C_TW3" #> G "B_Aus3, L1_440, Ba_Aus25, B_TZ1A828, Ba_Apache2, D_UW-3, Ds_2923, L1_SA16, B_Har36, D_SotoGD6, D_SotoGD5, B_Jali20, D_SotoGD1" #> A "F_SW5, L2b_CV204, L2_LST, L2b_UCH-1, L2b_795, L1_224, G_11074, F_70, L2b_C1, G_SotoGG1, G_11222, L2b_8200, F_SW4, G_9301, L2b_C2, G_9768, L2_434, F_SotoGF3, L1_115, L2b_UCH-2" #> C "E_150, E_Bour, E_SotoGE4, E_SW2, E_SW3, E_SotoGE8, E_11023" #> #> #> Additional details #> Metric: simpson #> Excluded Positions: "" #> Excluded Positions From process_allele: "" #> Included Positions: "" #> Group of interest: "" #> All analysed sequences: "A_D213, H_S1432, Ia_SotoGIa3, B_Aus3, Ia_SotoGIa1, C_UW1, F_SW5, L2b_CV204, L1_440, A_7249, Ba_Aus25, L2_LST, B_TZ1A828, A_HAR-13, E_150, E_Bour, C_Aus10, H_R31975, E_SotoGE4, L2b_UCH-1, L2b_795, A_2497, Ba_Apache2, E_SW2, L3_404, L1_224, D_UW-3, G_11074, Ds_2923, F_70, K_SotoGK1, E_SW3, L2b_C1, E_SotoGE8, G_SotoGG1, G_11222, A_363, L1_SA16, L2b_8200, J_6276, F_SW4, G_9301, L2b_C2, A_5291, G_9768, L2_434, F_SotoGF3, C_TW3, E_11023, L1_115, B_Har36, L2b_UCH-2, D_SotoGD6, D_SotoGD5, B_Jali20, D_SotoGD1" cat("SNPs discriminating against A_D213, H_S1432\n") #> SNPs discriminating against A_D213, H_S1432 output_result(discriminating_snps) #> Result - 1 #> Position(s) Score #> "1806" 0.944444444444444 #> #> Groups #> *target* - G "A_D213, H_S1432, C_UW1, C_Aus10, C_TW3" #> C "Ia_SotoGIa3, Ia_SotoGIa1, A_7249, A_HAR-13, A_2497, D_UW-3, Ds_2923, K_SotoGK1, A_363, J_6276, A_5291, D_SotoGD6, D_SotoGD5, D_SotoGD1" #> A "B_Aus3, F_SW5, L2b_CV204, L1_440, Ba_Aus25, L2_LST, B_TZ1A828, E_150, E_Bour, H_R31975, E_SotoGE4, L2b_UCH-1, L2b_795, Ba_Apache2, E_SW2, L3_404, L1_224, G_11074, F_70, E_SW3, L2b_C1, E_SotoGE8, G_SotoGG1, G_11222, L1_SA16, L2b_8200, F_SW4, G_9301, L2b_C2, G_9768, L2_434, F_SotoGF3, E_11023, L1_115, B_Har36, L2b_UCH-2, B_Jali20" #> Residuals: "C_UW1 (G), C_Aus10 (G), C_TW3 (G)" #> #> #> Additional details #> Metric: percent #> Excluded Positions: "" #> Excluded Positions From process_allele: "" #> Included Positions: "" #> Group of interest: "A_D213, H_S1432" #> All analysed sequences: "A_D213, H_S1432, Ia_SotoGIa3, B_Aus3, Ia_SotoGIa1, C_UW1, F_SW5, L2b_CV204, L1_440, A_7249, Ba_Aus25, L2_LST, B_TZ1A828, A_HAR-13, E_150, E_Bour, C_Aus10, H_R31975, E_SotoGE4, L2b_UCH-1, L2b_795, A_2497, Ba_Apache2, E_SW2, L3_404, L1_224, D_UW-3, G_11074, Ds_2923, F_70, K_SotoGK1, E_SW3, L2b_C1, E_SotoGE8, G_SotoGG1, G_11222, A_363, L1_SA16, L2b_8200, J_6276, F_SW4, G_9301, L2b_C2, A_5291, G_9768, L2_434, F_SotoGF3, C_TW3, E_11023, L1_115, B_Har36, L2b_UCH-2, D_SotoGD6, D_SotoGD5, B_Jali20, D_SotoGD1"

MinSNPs_Workflow

Reading & processing input for further analysis

Identifying SNPs with high Simpson’s index

Identifying SNPs discriminating a group of interest

Displaying/saving result