IrGSEA
The integration of single cell rank-based gene set enrichment analysis
Install / Use
/learn @chuiqin/IrGSEAREADME
irGSEA
<!-- badges: start --> <!-- badges: end -->Integrate all single cell rank-based gene set enrichment analysis and easy to visualize the results.
For more details, please view irGSEA And you can view: Chinese tutorial_1 Chinese tutorial_2
Cite: irGSEA: the integration of single-cell rank-based gene set enrichment analysis, Briefings in Bioinformatics, Volume 25, Issue 4, July 2024, bbae243, https://doi.org/10.1093/bib/bbae243
0.Graph Abstrast
<figure> <img src="./man/figures/Abstrast.jpg" alt="Graph Abstrast" /> <figcaption aria-hidden="true">Graph Abstrast</figcaption> </figure>1.Installation
# install packages from CRAN
cran.packages <- c("aplot", "BiocManager", "circlize", "cowplot","data.table",
"devtools", "doParallel", "doRNG", "dplyr", "ggfun", "gghalves",
"ggplot2", "ggplotify", "ggridges", "ggsci", "irlba",
"magrittr", "Matrix", "msigdbr", "pagoda2", "plyr", "pointr",
"purrr", "RcppML", "readr", "reshape2", "reticulate",
"rlang", "RMTstat", "RobustRankAggreg", "roxygen2",
"Seurat", "SeuratObject", "stringr", "tibble", "tidyr",
"tidyselect", "tidytree", "VAM")
for (i in cran.packages) {
if (!requireNamespace(i, quietly = TRUE)) {
install.packages(i, ask = F, update = F)
}
}
# install packages from Bioconductor
bioconductor.packages <- c("AUCell", "BiocParallel", "ComplexHeatmap",
"decoupleR", "fgsea", "ggtree", "GSEABase",
"GSVA", "Nebulosa", "scde", "singscore",
"SummarizedExperiment", "UCell",
"viper","sparseMatrixStats")
for (i in bioconductor.packages) {
if (!requireNamespace(i, quietly = TRUE)) {
BiocManager::install(i, ask = F, update = F)
}
}
# install packages from Github
if (!requireNamespace("irGSEA", quietly = TRUE)) {
devtools::install_github("chuiqin/irGSEA", force =T)
}
Optional installation if you want to perform VISION, gficf, Sargent, ssGSEApy, GSVApy, etc
#### install packages from Github
# VISION
if (!requireNamespace("VISION", quietly = TRUE)) {
devtools::install_github("YosefLab/VISION", force =T)
}
# mdt need ranger
if (!requireNamespace("ranger", quietly = TRUE)) {
devtools::install_github("imbs-hl/ranger", force =T)
}
# gficf need RcppML (version > 0.3.7) package
if (!utils::packageVersion("RcppML") > "0.3.7") {
message("The version of RcppML should greater than 0.3.7 and install RcppML package from Github")
devtools::install_github("zdebruine/RcppML", force =T)
}
# please first `library(RcppML)` if you want to perform gficf
if (!requireNamespace("gficf", quietly = TRUE)) {
devtools::install_github("gambalab/gficf", force =T)
}
# GSVApy and ssGSEApy need SeuratDisk package
if (!requireNamespace("SeuratDisk", quietly = TRUE)) {
devtools::install_github("mojaveazure/seurat-disk", force =T)
}
# sargent
if (!requireNamespace("sargent", quietly = TRUE)) {
# devtools::install_github("Sanofi-Public/PMCB-Sargent", force =T)
devtools::install_git("https://gitee.com/fan_chuiqin/PMCB-Sargent.git", force =T)
}
# pagoda2 need scde package
if (!requireNamespace("scde", quietly = TRUE)) {
devtools::install_github("hms-dbmi/scde", force =T)
}
# if error1 (functio 'sexp_as_cholmod_sparse' not provided by package 'Matrix')
# or error2 (functio 'as_cholmod_sparse' not provided by package 'Matrix') occurs
# when you perform pagoda2, please check the version of irlba and Matrix
# It's ok when I test as follow:
# R 4.2.2 irlba(v 2.3.5.1) Matrix(1.5-3)
# R 4.3.1 irlba(v 2.3.5.1) Matrix(1.6-1.1)
# R 4.3.2 irlba(v 2.3.5.1) Matrix(1.6-3)
#### create conda env
# If error (Unable to find conda binary. Is Anaconda installed) occurs,
# please perform `reticulate::install_miniconda()`
if (! "irGSEA" %in% reticulate::conda_list()$name) {
reticulate::conda_create("irGSEA")
}
# if python package exist
python.package <- reticulate::py_list_packages(envname = "irGSEA")$package
require.package <- c("anndata", "scanpy", "argparse", "gseapy", "decoupler")
for (i in seq_along(require.package)) {
if (i %in% python.package) {
reticulate::conda_install(envname = "irGSEA", packages = i, pip = T)
}
}
Some users can accelerate by mirror
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
options("repos" = c(CRAN="http://mirrors.cloud.tencent.com/CRAN/"))
# install packages from CRAN
cran.packages <- c("aplot", "BiocManager", "circlize", "cowplot", "data.table",
"devtools", "doParallel", "doRNG", "dplyr", "ggfun", "gghalves",
"ggplot2", "ggplotify", "ggridges", "ggsci", "irlba",
"magrittr", "Matrix", "msigdbr", "pagoda2", "plyr", "pointr",
"purrr", "RcppML", "readr", "reshape2", "reticulate",
"rlang", "RMTstat", "RobustRankAggreg", "roxygen2",
"Seurat", "SeuratObject", "stringr", "tibble", "tidyr",
"tidyselect", "tidytree", "VAM")
for (i in cran.packages) {
if (!requireNamespace(i, quietly = TRUE)) {
install.packages(i, ask = F, update = F)
}
}
# install packages from Bioconductor
bioconductor.packages <- c("AUCell", "BiocParallel", "ComplexHeatmap",
"decoupleR", "fgsea", "ggtree", "GSEABase",
"GSVA", "Nebulosa", "scde", "singscore",
"SummarizedExperiment", "UCell", "viper")
for (i in bioconductor.packages) {
if (!requireNamespace(i, quietly = TRUE)) {
BiocManager::install(i, ask = F, update = F)
}
}
# install packages from git
if (!requireNamespace("irGSEA", quietly = TRUE)) {
devtools::install_git("https://gitee.com/fan_chuiqin/irGSEA.git", force =T)
}
# VISION
if (!requireNamespace("VISION", quietly = TRUE)) {
devtools::install_git("https://gitee.com/fan_chuiqin/VISION.git", force =T)
}
# mdt need ranger
if (!requireNamespace("ranger", quietly = TRUE)) {
devtools::install_git("https://gitee.com/fan_chuiqin/ranger.git", force =T)
}
# gficf need RcppML (version > 0.3.7) package
if (!utils::packageVersion("RcppML") > "0.3.7") {
message("The version of RcppML should greater than 0.3.7 and install RcppML package from Git")
devtools::install_git("https://gitee.com/fan_chuiqin/RcppML.git", force =T)
}
# please first `library(RcppML)` if you want to perform gficf
if (!requireNamespace("gficf", quietly = TRUE)) {
devtools::install_git("https://gitee.com/fan_chuiqin/gficf.git", force =T)
}
# GSVApy and ssGSEApy need SeuratDisk package
if (!requireNamespace("SeuratDisk", quietly = TRUE)) {
devtools::install_git("https://gitee.com/fan_chuiqin/seurat-disk.git",
force =T)}
# sargent
if (!requireNamespace("sargent", quietly = TRUE)) {
devtools::install_git("https://gitee.com/fan_chuiqin/PMCB-Sargent.git",
force =T)}
# pagoda2 need scde package
if (!requireNamespace("scde", quietly = TRUE)) {
devtools::install_git("https://gitee.com/fan_chuiqin/scde.git", force =T)
}
#### create conda env
# If error (Unable to find conda binary. Is Anaconda installed) occurs,
# please perform `reticulate::install_miniconda()`
if (! "irGSEA" %in% reticulate::conda_list()$name) {
reticulate::conda_create("irGSEA")
}
# if python package exist
python.package <- reticulate::py_list_packages(envname = "irGSEA")$package
require.package <- c("anndata", "scanpy", "argparse", "gseapy", "decoupler")
for (i in require.package) {
if (! i %in% python.package) {
reticulate::conda_install(envname = "irGSEA", packages = i, pip = T,
pip_options = "-i https://pypi.tuna.tsinghua.edu.cn/simple")
}
}
2.Available method
The time and peak memory consumption associated with 50 Hallmark gene sets across 19 scoring methods for datasets of varying sizes.To address memory peak issues for datasets exceeding 50,000 cells, we implemented a strategy of partitioning them into processing units of 5,000 cells each for scoring. While this strategy mitigates memory peak issues, it extends the processing time.
The time and peak memory
consumption associated with 50 Hallmark gene sets across irGSEA (AUCell,
UCell, singscore, ssgsea, JASMINE and viper) for datasets of varying
sizes while the parameter chunk works. 
3.Example dataset
pre-processing workflow
Start from the 10X output file and organize it into the format required by the irGSEA package
# load library
library(dplyr)
library(Seurat)
# download the pbmc3k.final dataset
# the dataset is the output of the Cell Ranger pipeline from 10X
# decompress the file to the specified directory
download.file("https://cf.10xgenomics.com/samples/cell/pbmc3k.final3k/pbmc3k.final3k_filtered_gene_bc_matrices.tar.gz",
destfile = "./pbmc3k.final3k_filtered_gene_bc_matrices.tar.gz")
untar("./pbmc3k.final3k_filtered_gene_bc_matrices.tar.gz", exdir = "./")
# Load the pbmc3k.final dataset
# Initialize the Seurat object with the raw (non-normalized data)
# filter genes expressed by less than 10 cells
pbmc3k.final <- Read10X(data.dir = "./filtered_gene_bc_matrices/hg19/")
options(Seurat.object.assay.version = "v3")
pbmc3k.final <- CreateSeuratObject(counts = pbmc3k.final,
min.cells = 3, min.features = 200)
pbmc3k.final
# filter cells that have unique feature counts over 2500 or less than 200
# filter cells that have >5% mitochondrial counts
# filter cells that have >10% hemoglobin related counts
# Specific filtration criteria need to be adjusted according
Related Skills
node-connect
343.1kDiagnose OpenClaw node connection and pairing failures for Android, iOS, and macOS companion apps
frontend-design
90.0kCreate distinctive, production-grade frontend interfaces with high design quality. Use this skill when the user asks to build web components, pages, or applications. Generates creative, polished code that avoids generic AI aesthetics.
openai-whisper-api
343.1kTranscribe audio via OpenAI Audio Transcriptions API (Whisper).
qqbot-media
343.1kQQBot 富媒体收发能力。使用 <qqmedia> 标签,系统根据文件扩展名自动识别类型(图片/语音/视频/文件)。
