Automatic ClustAssess pipeline. Generating the ClustAssess shiny-app
Source:vignettes/stability-pipeline-shiny.Rmd
stability-pipeline-shiny.Rmd
ClustAssess
provide the option to run the entire stability pipeline automatically, by choosing the parameters based on their highest stability. Another useful feature is the option to create, based on this object, a shiny app that user can interact with and perform the assessment of the PhenoGraph configuration and of the clusters.
library(Seurat)
#> Loading required package: SeuratObject
#> Loading required package: sp
#>
#> Attaching package: 'SeuratObject'
#> The following objects are masked from 'package:base':
#>
#> intersect, t
library(ClustAssess)
library(SeuratData)
library(ggplot2)
#>
#> Attaching package: 'ggplot2'
#> The following object is masked from 'package:base':
#>
#> is.element
packageVersion("ClustAssess") # should be 1.0.0
#> [1] '1.0.0'
Process the PBMC 3k Seurat object similarly to the Stability-based parameter assessment
vignette.
InstallData("pbmc3k")
#> Warning: The following packages are already installed and will not be
#> reinstalled: pbmc3k
data("pbmc3k")
pbmc3k <- UpdateSeuratObject(pbmc3k)
#> Validating object structure
#> Updating object slots
#> Ensuring keys are in the proper structure
#> Warning: Assay RNA changing from Assay to Assay
#> Ensuring keys are in the proper structure
#> Ensuring feature names don't have underscores or pipes
#> Updating slots in RNA
#> Validating object structure for Assay 'RNA'
#> Object representation is consistent with the most current Seurat version
pbmc3k <- PercentageFeatureSet(pbmc3k, pattern = "^MT-", col.name = "percent.mito")
pbmc3k <- PercentageFeatureSet(pbmc3k, pattern = "^RP[SL][[:digit:]]", col.name = "percent.rp")
# remove MT and RP genes
all.index <- seq_len(nrow(pbmc3k))
MT.index <- grep(pattern = "^MT-", x = rownames(pbmc3k), value = FALSE)
RP.index <- grep(pattern = "^RP[SL][[:digit:]]", x = rownames(pbmc3k), value = FALSE)
pbmc3k <- pbmc3k[!((all.index %in% MT.index) | (all.index %in% RP.index)), ]
pbmc3k <- subset(pbmc3k, nFeature_RNA < 2000 & nCount_RNA < 2500 & percent.mito < 7 & percent.rp > 7)
pbmc3k <- NormalizeData(pbmc3k, verbose = FALSE)
pbmc3k <- FindVariableFeatures(pbmc3k, selection.method = "vst", nfeatures = 3000, verbose = FALSE)
features <- dimnames(pbmc3k@assays$RNA)[[1]]
var_features <- pbmc3k@assays[["RNA"]]@var.features
n_abundant <- 3000
most_abundant_genes <- rownames(pbmc3k@assays$RNA)[order(Matrix::rowSums(pbmc3k@assays$RNA),
decreasing = TRUE
)]
pbmc3k <- ScaleData(pbmc3k, features = features, verbose = FALSE)
pbmc3k <- RunPCA(pbmc3k,
npcs = 30,
approx = FALSE,
verbose = FALSE,
features = intersect(most_abundant_genes, pbmc3k@assays$RNA@var.features)
)
pbmc3k <- RunUMAP(pbmc3k,
reduction = "pca",
dims = 1:30,
n.neighbors = 30,
min.dist = 0.3,
metric = "cosine",
verbose = FALSE
)
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session
Initialise the parallel backend.
RhpcBLASctl::blas_set_num_threads(1)
ncores <- 1
my_cluster <- parallel::makeCluster(
ncores,
type = "PSOCK"
)
doParallel::registerDoParallel(cl = my_cluster)
Define the feature sets of interest.
features <- dimnames(pbmc3k@assays$RNA)[[1]]
var_features <- pbmc3k@assays[["RNA"]]@var.features
n_abundant <- 3000
most_abundant_genes <- rownames(pbmc3k@assays$RNA)[order(Matrix::rowSums(pbmc3k@assays$RNA),
decreasing = TRUE
)]
steps <- seq(from = 500, to = 3000, by = 500)
ma_hv_genes_intersection_sets <- sapply(steps, function(x) intersect(most_abundant_genes[1:x], var_features[1:x]))
ma_hv_genes_intersection <- Reduce(union, ma_hv_genes_intersection_sets)
ma_hv_steps <- sapply(ma_hv_genes_intersection_sets, length)
Apply the automatic assessment.
automm_output <- automatic_stability_assessment(
expression_matrix = pbmc3k@assays$RNA@scale.data,
n_repetitions = 10,
n_neigh_sequence = seq(from = 5, to = 50, by = 5),
resolution_sequence = seq(from = 0.1, to = 1, by = 0.1),
features_sets = list(
"HV" = var_features,
"MA" = most_abundant_genes[1:3000]
),
steps = list(
"HV" = steps,
"MA" = steps
),
n_top_configs = 2,
umap_arguments = list(
min_dist = 0.3,
n_neighbors = 30,
metric = "cosine"
),
save_temp = FALSE,
verbose = TRUE
)
#> [2024-11-06 19:11:55.687212] Assessing the stability of the dimensionality reduction
#> [1] "HV"
#> [1] "MA"
#> [2024-11-06 19:14:20.65324] Choosing the top 2
#> [2024-11-06 19:14:20.883779] Assessing the stability of the connected components
#> [2024-11-06 19:14:24.894831] Assessing the stability of the graph construction parameters
#> [2024-11-06 19:16:11.24012] Assessing the stability of the graph clustering method
#> [1] "HV 500"
#> [1] "HV 1000"
#> [1] "MA 2000"
#> [1] "MA 3000"
Close the connections opened when using multiple cores.
foreach::registerDoSEQ()
Create the shiny app based on the ClustAssess output. You should also specify either a seurat object or a normalized expression matrix. Note: Please make sure that the directory mentioned in the parameter project_folder
is empty / doesn’t exist.
# generate using a seurat object
write_shiny_app(
object = pbmc3k,
assay_name = "RNA",
clustassess_object = automm_output,
project_folder = "clustassess_app_dir_seurat",
shiny_app_title = "PBMC 3k dataset"
)
# generate using a normalized expression matrix
write_shiny_app(
object = pbmc3k@assays$RNA@data,
metadata = pbmc3k@meta.data,
clustassess_object = automm_output,
project_folder = "clustassess_app_dir_expr",
shiny_app_title = "PBMC 3k dataset"
)
The app can be run using the following command.
shiny::runApp("clustassess_app_dir_seurat")
Session info
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Europe/Bucharest
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggplot2_3.5.1.9000 ClustAssess_1.0.0
#> [3] Seurat_5.1.0 SeuratObject_5.0.2
#> [5] sp_2.1-4 pbmcMultiome.SeuratData_0.1.4
#> [7] pbmc3k.SeuratData_3.1.4 SeuratData_0.2.2.9001
#> [9] devtools_2.4.5 usethis_2.2.3
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 jsonlite_1.8.8 magrittr_2.0.3
#> [4] spatstat.utils_3.1-0 rmarkdown_2.27 fs_1.6.4
#> [7] ragg_1.3.3 vctrs_0.6.5 ROCR_1.0-11
#> [10] memoise_2.0.1 spatstat.explore_3.2-7 progress_1.2.3
#> [13] htmltools_0.5.8.1 sass_0.4.9 sctransform_0.4.1
#> [16] parallelly_1.37.1 KernSmooth_2.23-22 bslib_0.7.0
#> [19] htmlwidgets_1.6.4 desc_1.4.3 ica_1.0-3
#> [22] plyr_1.8.9 plotly_4.10.4 zoo_1.8-12
#> [25] cachem_1.1.0 igraph_2.0.3 iterators_1.0.14
#> [28] mime_0.12 lifecycle_1.0.4 pkgconfig_2.0.3
#> [31] Matrix_1.7-0 R6_2.5.1 fastmap_1.2.0
#> [34] fitdistrplus_1.1-11 future_1.33.2 shiny_1.8.1.1
#> [37] digest_0.6.35 colorspace_2.1-1 patchwork_1.2.0
#> [40] tensor_1.5 RSpectra_0.16-2 irlba_2.3.5.1
#> [43] pkgload_1.3.4 textshaping_0.4.0 progressr_0.14.0
#> [46] fansi_1.0.6 spatstat.sparse_3.0-3 httr_1.4.7
#> [49] polyclip_1.10-6 abind_1.4-5 compiler_4.4.0
#> [52] remotes_2.5.0 doParallel_1.0.17 withr_3.0.1
#> [55] fastDummies_1.7.3 pkgbuild_1.4.4 MASS_7.3-60
#> [58] rappdirs_0.3.3 sessioninfo_1.2.2 tools_4.4.0
#> [61] lmtest_0.9-40 httpuv_1.6.15 future.apply_1.11.2
#> [64] goftest_1.2-3 glue_1.7.0 nlme_3.1-163
#> [67] promises_1.3.0 grid_4.4.0 Rtsne_0.17
#> [70] cluster_2.1.6 reshape2_1.4.4 generics_0.1.3
#> [73] gtable_0.3.5 spatstat.data_3.0-4 tidyr_1.3.1
#> [76] hms_1.1.3 data.table_1.15.4 utf8_1.2.4
#> [79] BiocGenerics_0.50.0 spatstat.geom_3.2-9 RcppAnnoy_0.0.22
#> [82] foreach_1.5.2 ggrepel_0.9.5 RANN_2.6.1
#> [85] pillar_1.9.0 stringr_1.5.1 spam_2.10-0
#> [88] RcppHNSW_0.6.0 later_1.3.2 splines_4.4.0
#> [91] dplyr_1.1.4 lattice_0.22-5 survival_3.5-8
#> [94] deldir_2.0-4 tidyselect_1.2.1 miniUI_0.1.1.1
#> [97] pbapply_1.7-2 knitr_1.47 gridExtra_2.3
#> [100] scattermore_1.2 RhpcBLASctl_0.23-42 xfun_0.44
#> [103] SharedObject_1.19.1 matrixStats_1.3.0 stringi_1.8.4
#> [106] lazyeval_0.2.2 yaml_2.3.8 evaluate_1.0.0
#> [109] codetools_0.2-19 tibble_3.2.1 cli_3.6.3
#> [112] uwot_0.2.2 xtable_1.8-4 reticulate_1.37.0
#> [115] systemfonts_1.1.0 munsell_0.5.1 jquerylib_0.1.4
#> [118] Rcpp_1.0.13 globals_0.16.3 spatstat.random_3.2-3
#> [121] png_0.1-8 parallel_4.4.0 ellipsis_0.3.2
#> [124] pkgdown_2.1.1 prettyunits_1.2.0 dotCall64_1.1-1
#> [127] profvis_0.3.8 urlchecker_1.0.1 listenv_0.9.1
#> [130] viridisLite_0.4.2 scales_1.3.0 ggridges_0.5.6
#> [133] crayon_1.5.3 leiden_0.4.3.1 purrr_1.0.2
#> [136] rlang_1.1.4 cowplot_1.1.3