SkillAgentSearch skills...

IrGSEA

The integration of single cell rank-based gene set enrichment analysis

Install / Use

/learn @chuiqin/IrGSEA
About this skill

Quality Score

0/100

Supported Platforms

Universal

README

<!-- README.md is generated from README.Rmd. Please edit that file -->

irGSEA

<!-- badges: start --> <!-- badges: end -->

Integrate all single cell rank-based gene set enrichment analysis and easy to visualize the results.

For more details, please view irGSEA And you can view: Chinese tutorial_1 Chinese tutorial_2

Cite: irGSEA: the integration of single-cell rank-based gene set enrichment analysis, Briefings in Bioinformatics, Volume 25, Issue 4, July 2024, bbae243, https://doi.org/10.1093/bib/bbae243

0.Graph Abstrast

<figure> <img src="./man/figures/Abstrast.jpg" alt="Graph Abstrast" /> <figcaption aria-hidden="true">Graph Abstrast</figcaption> </figure>

1.Installation


# install packages from CRAN
cran.packages <- c("aplot", "BiocManager", "circlize", "cowplot","data.table", 
                   "devtools", "doParallel", "doRNG", "dplyr", "ggfun", "gghalves", 
                   "ggplot2", "ggplotify", "ggridges", "ggsci", "irlba",
                   "magrittr", "Matrix", "msigdbr", "pagoda2", "plyr", "pointr", 
                   "purrr", "RcppML", "readr", "reshape2", "reticulate", 
                   "rlang", "RMTstat", "RobustRankAggreg", "roxygen2", 
                   "Seurat", "SeuratObject", "stringr", "tibble", "tidyr", 
                   "tidyselect", "tidytree", "VAM")

for (i in cran.packages) {
  if (!requireNamespace(i, quietly = TRUE)) {
    install.packages(i, ask = F, update = F)
  }
}

# install packages from Bioconductor
bioconductor.packages <- c("AUCell", "BiocParallel", "ComplexHeatmap", 
                           "decoupleR", "fgsea", "ggtree", "GSEABase", 
                           "GSVA", "Nebulosa", "scde", "singscore",
                           "SummarizedExperiment", "UCell",
                           "viper","sparseMatrixStats")

for (i in bioconductor.packages) {
  if (!requireNamespace(i, quietly = TRUE)) {
    BiocManager::install(i, ask = F, update = F)
  }
}

# install packages from Github
if (!requireNamespace("irGSEA", quietly = TRUE)) { 
    devtools::install_github("chuiqin/irGSEA", force =T)
}

Optional installation if you want to perform VISION, gficf, Sargent, ssGSEApy, GSVApy, etc


#### install packages from Github
# VISION
if (!requireNamespace("VISION", quietly = TRUE)) { 
    devtools::install_github("YosefLab/VISION", force =T)
}

# mdt need ranger
if (!requireNamespace("ranger", quietly = TRUE)) { 
  devtools::install_github("imbs-hl/ranger", force =T)
}

# gficf need RcppML (version > 0.3.7) package
if (!utils::packageVersion("RcppML") > "0.3.7") {
  message("The version of RcppML should greater than 0.3.7 and install RcppML package from Github")
  devtools::install_github("zdebruine/RcppML", force =T)
}

# please first `library(RcppML)` if you want to perform gficf
if (!requireNamespace("gficf", quietly = TRUE)) { 
    devtools::install_github("gambalab/gficf", force =T)
}

# GSVApy and ssGSEApy need SeuratDisk package
if (!requireNamespace("SeuratDisk", quietly = TRUE)) { 
    devtools::install_github("mojaveazure/seurat-disk", force =T)
}

# sargent
if (!requireNamespace("sargent", quietly = TRUE)) { 
    # devtools::install_github("Sanofi-Public/PMCB-Sargent", force =T)
    devtools::install_git("https://gitee.com/fan_chuiqin/PMCB-Sargent.git", force =T)
}

# pagoda2 need scde package
if (!requireNamespace("scde", quietly = TRUE)) { 
  devtools::install_github("hms-dbmi/scde", force =T)
}

# if error1 (functio 'sexp_as_cholmod_sparse' not provided by package 'Matrix')
# or error2 (functio 'as_cholmod_sparse' not provided by package 'Matrix') occurs
# when you perform pagoda2, please check the version of irlba and Matrix
# It's ok when I test as follow:
# R 4.2.2 irlba(v 2.3.5.1) Matrix(1.5-3)
# R 4.3.1 irlba(v 2.3.5.1) Matrix(1.6-1.1)
# R 4.3.2 irlba(v 2.3.5.1) Matrix(1.6-3)


#### create conda env
# If error (Unable to find conda binary. Is Anaconda installed) occurs, 
# please perform `reticulate::install_miniconda()`
if (! "irGSEA" %in% reticulate::conda_list()$name) {
      reticulate::conda_create("irGSEA")
}

# if python package exist
python.package <- reticulate::py_list_packages(envname = "irGSEA")$package
require.package <- c("anndata", "scanpy", "argparse", "gseapy", "decoupler")
for (i in seq_along(require.package)) {
  if (i %in% python.package) {
    reticulate::conda_install(envname = "irGSEA", packages = i, pip = T)
  }
}

Some users can accelerate by mirror

options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
options("repos" = c(CRAN="http://mirrors.cloud.tencent.com/CRAN/"))

# install packages from CRAN
cran.packages <- c("aplot", "BiocManager", "circlize", "cowplot", "data.table",
                   "devtools", "doParallel", "doRNG", "dplyr", "ggfun", "gghalves", 
                   "ggplot2", "ggplotify", "ggridges", "ggsci", "irlba",
                   "magrittr", "Matrix", "msigdbr", "pagoda2", "plyr", "pointr", 
                   "purrr", "RcppML", "readr", "reshape2", "reticulate", 
                   "rlang", "RMTstat", "RobustRankAggreg", "roxygen2", 
                   "Seurat", "SeuratObject", "stringr", "tibble", "tidyr", 
                   "tidyselect", "tidytree", "VAM")

for (i in cran.packages) {
  if (!requireNamespace(i, quietly = TRUE)) {
    install.packages(i, ask = F, update = F)
  }
}

# install packages from Bioconductor
bioconductor.packages <- c("AUCell", "BiocParallel", "ComplexHeatmap", 
                           "decoupleR", "fgsea", "ggtree", "GSEABase", 
                           "GSVA", "Nebulosa", "scde", "singscore",
                           "SummarizedExperiment", "UCell", "viper")

for (i in bioconductor.packages) {
  if (!requireNamespace(i, quietly = TRUE)) {
    BiocManager::install(i, ask = F, update = F)
  }
}

# install packages from git
if (!requireNamespace("irGSEA", quietly = TRUE)) { 
    devtools::install_git("https://gitee.com/fan_chuiqin/irGSEA.git", force =T)
}
# VISION
if (!requireNamespace("VISION", quietly = TRUE)) { 
    devtools::install_git("https://gitee.com/fan_chuiqin/VISION.git", force =T)
}

# mdt need ranger
if (!requireNamespace("ranger", quietly = TRUE)) { 
  devtools::install_git("https://gitee.com/fan_chuiqin/ranger.git", force =T)
}

# gficf need RcppML (version > 0.3.7) package
if (!utils::packageVersion("RcppML") > "0.3.7") {
  message("The version of RcppML should greater than 0.3.7 and install RcppML package from Git")
  devtools::install_git("https://gitee.com/fan_chuiqin/RcppML.git", force =T)
}

# please first `library(RcppML)` if you want to perform gficf
if (!requireNamespace("gficf", quietly = TRUE)) { 
    devtools::install_git("https://gitee.com/fan_chuiqin/gficf.git", force =T)
}

# GSVApy and ssGSEApy need SeuratDisk package
if (!requireNamespace("SeuratDisk", quietly = TRUE)) { 
    devtools::install_git("https://gitee.com/fan_chuiqin/seurat-disk.git", 
                          force =T)}

# sargent
if (!requireNamespace("sargent", quietly = TRUE)) { 
    devtools::install_git("https://gitee.com/fan_chuiqin/PMCB-Sargent.git", 
                          force =T)}

# pagoda2 need scde package
if (!requireNamespace("scde", quietly = TRUE)) { 
  devtools::install_git("https://gitee.com/fan_chuiqin/scde.git", force =T)
}

#### create conda env
# If error (Unable to find conda binary. Is Anaconda installed) occurs, 
# please perform `reticulate::install_miniconda()`
if (! "irGSEA" %in% reticulate::conda_list()$name) {
      reticulate::conda_create("irGSEA")
}

# if python package exist
python.package <- reticulate::py_list_packages(envname = "irGSEA")$package
require.package <- c("anndata", "scanpy", "argparse", "gseapy", "decoupler")
for (i in require.package) {
  if (! i %in% python.package) {
    reticulate::conda_install(envname = "irGSEA", packages = i, pip = T,
     pip_options = "-i https://pypi.tuna.tsinghua.edu.cn/simple")
  }
}

2.Available method

The time and peak memory consumption associated with 50 Hallmark gene sets across 19 scoring methods for datasets of varying sizes.To address memory peak issues for datasets exceeding 50,000 cells, we implemented a strategy of partitioning them into processing units of 5,000 cells each for scoring. While this strategy mitigates memory peak issues, it extends the processing time.

Available method The time and peak memory consumption associated with 50 Hallmark gene sets across irGSEA (AUCell, UCell, singscore, ssgsea, JASMINE and viper) for datasets of varying sizes while the parameter chunk works. Available
method

3.Example dataset

pre-processing workflow

Start from the 10X output file and organize it into the format required by the irGSEA package

# load library
library(dplyr)
library(Seurat)
# download the pbmc3k.final dataset
# the dataset is the output of the Cell Ranger pipeline from 10X
# decompress the file to the specified directory
download.file("https://cf.10xgenomics.com/samples/cell/pbmc3k.final3k/pbmc3k.final3k_filtered_gene_bc_matrices.tar.gz",
              destfile = "./pbmc3k.final3k_filtered_gene_bc_matrices.tar.gz")
untar("./pbmc3k.final3k_filtered_gene_bc_matrices.tar.gz", exdir = "./")

# Load the pbmc3k.final dataset
# Initialize the Seurat object with the raw (non-normalized data)
# filter genes expressed by less than 10 cells 
pbmc3k.final <- Read10X(data.dir = "./filtered_gene_bc_matrices/hg19/")
options(Seurat.object.assay.version = "v3")
pbmc3k.final <- CreateSeuratObject(counts = pbmc3k.final, 
                           min.cells = 3, min.features = 200)
pbmc3k.final

# filter cells that have unique feature counts over 2500 or less than 200
# filter cells that have >5% mitochondrial counts
# filter cells that have >10% hemoglobin related counts
# Specific filtration criteria need to be adjusted according 

Related Skills

View on GitHub
GitHub Stars148
CategoryDevelopment
Updated1d ago
Forks25

Languages

R

Security Score

80/100

Audited on Mar 30, 2026

No findings