Files
openpipeline_spatial/src/workflows/multiomics/spatial_process_samples/config.vsh.yaml
CI 560dea5ec5 Build branch openpipeline_spatial/niche-compass with version niche-compass to openpipeline_spatial on branch niche-compass (0c1677b)
Build pipeline: openpipelines-bio.openpipeline-spatial.niche-compass-z8ftz

Source commit: 0c1677bb93

Source message: trigger ci
2025-12-08 21:24:13 +00:00

312 lines
11 KiB
YAML

name: "spatial_process_samples"
namespace: "workflows/multiomics"
scope: "public"
description: "A pipeline to pre-process multiple spatial omics samples."
authors:
- __merge__: /src/authors/dries_schaumont.yaml
roles: [ author, maintainer ]
- __merge__: /src/authors/dorien_roosen.yaml
roles: [ contributor ]
- __merge__: /src/authors/weiwei_schultz.yaml
roles: [ contributor ]
argument_groups:
- name: Inputs
arguments:
- name: "--id"
required: true
type: string
description: ID of the sample.
example: foo
- name: "--input"
alternatives: [-i]
description: Path to the sample.
required: true
example: input.h5mu
type: file
- name: "--rna_layer"
type: string
description: "Input layer for the gene expression modality. If not specified, .X is used."
required: false
- name: "--prot_layer"
type: string
description: "Input layer for the antibody capture modality. If not specified, .X is used."
required: false
- name: "Outputs"
arguments:
- name: "--output"
type: file
required: true
direction: output
description: Destination path to the output.
example: output.h5mu
- name: "Sample ID options"
description: |
Options for adding the id to .obs on the MuData object. Having a sample
id present in a requirement of several components for this pipeline.
arguments:
- name: "--add_id_to_obs"
description: "Add the value passed with --id to .obs."
type: boolean
default: true
- name: --add_id_obs_output
description: |
.Obs column to add the sample IDs to. Required and only used when
--add_id_to_obs is set to 'true'
type: string
default: "sample_id"
- name: "--add_id_make_observation_keys_unique"
type: boolean
description: |
Join the id to the .obs index (.obs_names).
Only used when --add_id_to_obs is set to 'true'.
default: true
- name: "RNA filtering options"
arguments:
- name: "--rna_min_counts"
example: 200
min: 1
type: integer
description: Minimum number of counts captured per cell.
- name: "--rna_max_counts"
example: 5000000
min: 1
type: integer
description: Maximum number of counts captured per cell.
- name: "--rna_min_genes_per_cell"
type: integer
min: 1
example: 200
description: Minimum of non-zero values per cell.
- name: "--rna_max_genes_per_cell"
example: 1500000
min: 1
type: integer
description: Maximum of non-zero values per cell.
- name: "--rna_min_cells_per_gene"
example: 3
min: 1
type: integer
description: Minimum of non-zero values per gene.
- name: "--rna_min_fraction_mito"
example: 0
min: 0
max: 1
type: double
description: Minimum fraction of UMIs that are mitochondrial.
- name: "--rna_max_fraction_mito"
type: double
min: 0
max: 1
example: 0.2
description: Maximum fraction of UMIs that are mitochondrial.
- name: "--rna_min_fraction_ribo"
example: 0
min: 0
max: 1
type: double
description: Minimum fraction of UMIs that are mitochondrial.
- name: "--rna_max_fraction_ribo"
type: double
min: 0
max: 1
example: 0.2
description: Maximum fraction of UMIs that are mitochondrial.
- name: "Protein filtering options"
arguments:
- name: "--prot_min_counts"
description: Minimum number of counts per cell.
type: integer
min: 1
example: 3
- name: "--prot_max_counts"
description: Minimum number of counts per cell.
type: integer
min: 1
example: 5000000
- name: "--prot_min_proteins_per_cell"
type: integer
min: 1
example: 200
description: Minimum of non-zero values per cell.
- name: "--prot_max_proteins_per_cell"
description: Maximum of non-zero values per cell.
type: integer
min: 1
example: 100000000
- name: "--prot_min_cells_per_protein"
example: 3
min: 1
type: integer
description: Minimum of non-zero values per protein.
- name: "Highly variable features detection"
arguments:
- name: "--highly_variable_features_var_output"
alternatives: ["--filter_with_hvg_var_output"]
required: false
type: string
default: "filter_with_hvg"
description: In which .var slot to store a boolean array corresponding to the highly variable genes.
- name: "--highly_variable_features_obs_batch_key"
alternatives: ["--filter_with_hvg_obs_batch_key"]
type: string
default: "sample_id"
required: false
description: |
If specified, highly-variable genes are selected within each batch separately and merged. This simple
process avoids the selection of batch-specific genes and acts as a lightweight batch correction method.
- name: "Mitochondrial & Ribosomal Gene Detection"
arguments:
- name: "--var_gene_names"
required: false
example: "gene_symbol"
type: string
description: |
.var column name to be used to detect mitochondrial/ribosomal genes instead of .var_names (default if not set).
Gene names matching with the regex value from --mitochondrial_gene_regex or --ribosomal_gene_regex will be
identified as mitochondrial or ribosomal genes, respectively.
- name: "--var_name_mitochondrial_genes"
type: string
required: false
description: |
In which .var slot to store a boolean array corresponding the mitochondrial genes.
- name: "--obs_name_mitochondrial_fraction"
type: string
required: false
description: |
When specified, write the fraction of counts originating from mitochondrial genes
(based on --mitochondrial_gene_regex) to an .obs column with the specified name.
Requires --var_name_mitochondrial_genes.
- name: --mitochondrial_gene_regex
type: string
description: |
Regex string that identifies mitochondrial genes from --var_gene_names.
By default will detect human and mouse mitochondrial genes from a gene symbol.
required: false
default: "^[mM][tT]-"
- name: "--var_name_ribosomal_genes"
type: string
required: false
description: |
In which .var slot to store a boolean array corresponding the ribosomal genes.
- name: "--obs_name_ribosomal_fraction"
type: string
required: false
description: |
When specified, write the fraction of counts originating from ribosomal genes
(based on --ribosomal_gene_regex) to an .obs column with the specified name.
Requires --var_name_ribosomal_genes.
- name: --ribosomal_gene_regex
type: string
description: |
Regex string that identifies ribosomal genes from --var_gene_names.
By default will detect human and mouse ribosomal genes from a gene symbol.
required: false
default: "^[Mm]?[Rr][Pp][LlSs]"
- name: "QC metrics calculation options"
arguments:
- name: "--var_qc_metrics"
description: |
Keys to select a boolean (containing only True or False) column from .var.
For each cell, calculate the proportion of total values for genes which are labeled 'True',
compared to the total sum of the values for all genes. Defaults to the combined values specified for
--var_name_mitochondrial_genes and --highly_variable_features_var_output.
type: string
multiple: True
multiple_sep: ','
required: false
example: "ercc,highly_variable"
- name: "--top_n_vars"
type: integer
description: |
Number of top vars to be used to calculate cumulative proportions.
If not specified, proportions are not calculated. `--top_n_vars 20,50` finds
cumulative proportion to the 20th and 50th most expressed vars.
multiple: true
multiple_sep: ','
required: false
default: [50, 100, 200, 500]
- name: "PCA options"
arguments:
- name: "--pca_overwrite"
type: boolean_true
description: "Allow overwriting slots for PCA output."
- name: "CLR options"
arguments:
- name: "--clr_axis"
type: integer
description: "Axis to perform the CLR transformation on."
default: 0
required: false
- name: "RNA Scaling options"
description: |
Options for enabling scaling of the log-normalized data to unit variance and zero mean.
The scaled data will be output a different layer and representation with reduced dimensions
will be created and stored in addition to the non-scaled data.
arguments:
- name: "--rna_enable_scaling"
description: "Enable scaling for the RNA modality."
type: boolean_true
- name: "--rna_scaling_output_layer"
type: string
default: "scaled"
description: "Output layer where the scaled log-normalized data will be stored."
- name: "--rna_scaling_pca_obsm_output"
type: string
description: |
Name of the .obsm key where the PCA representation of the log-normalized
and scaled data is stored.
default: "scaled_pca"
- name: "--rna_scaling_pca_loadings_varm_output"
type: string
description: |
Name of the .varm key where the PCA loadings of the log-normalized and scaled
data is stored.
default: "scaled_pca_loadings"
- name: "--rna_scaling_pca_variance_uns_output"
type: string
description: |
Name of the .uns key where the variance and variance ratio will be stored as a map.
The map will contain two keys: variance and variance_ratio respectively.
default: "scaled_pca_variance"
- name: "--rna_scaling_umap_obsm_output"
type: string
description:
Name of the .obsm key where the UMAP representation of the log-normalized
and scaled data is stored.
default: "scaled_umap"
- name: "--rna_scaling_max_value"
description: "Clip (truncate) data to this value after scaling. If not specified, do not clip."
required: false
type: double
- name: "--rna_scaling_zero_center"
type: boolean_false
description: If set, omit zero-centering variables, which allows to handle sparse input efficiently."
dependencies:
- name: workflows/multiomics/process_samples
alias: spatial_sample_processing
repository: openpipeline
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
test_resources:
- type: nextflow_script
path: test.nf
entrypoint: test_wf
- path: /resources_test/xenium/xenium_tiny.h5mu
runners:
- type: nextflow