Build pipeline: openpipelines-bio.openpipeline-spatial.niche-compass-z8ftz
Source commit: 0c1677bb93
Source message: trigger ci
312 lines
11 KiB
YAML
312 lines
11 KiB
YAML
name: "spatial_process_samples"
|
|
namespace: "workflows/multiomics"
|
|
scope: "public"
|
|
description: "A pipeline to pre-process multiple spatial omics samples."
|
|
authors:
|
|
- __merge__: /src/authors/dries_schaumont.yaml
|
|
roles: [ author, maintainer ]
|
|
- __merge__: /src/authors/dorien_roosen.yaml
|
|
roles: [ contributor ]
|
|
- __merge__: /src/authors/weiwei_schultz.yaml
|
|
roles: [ contributor ]
|
|
|
|
argument_groups:
|
|
- name: Inputs
|
|
arguments:
|
|
- name: "--id"
|
|
required: true
|
|
type: string
|
|
description: ID of the sample.
|
|
example: foo
|
|
- name: "--input"
|
|
alternatives: [-i]
|
|
description: Path to the sample.
|
|
required: true
|
|
example: input.h5mu
|
|
type: file
|
|
- name: "--rna_layer"
|
|
type: string
|
|
description: "Input layer for the gene expression modality. If not specified, .X is used."
|
|
required: false
|
|
- name: "--prot_layer"
|
|
type: string
|
|
description: "Input layer for the antibody capture modality. If not specified, .X is used."
|
|
required: false
|
|
|
|
- name: "Outputs"
|
|
arguments:
|
|
- name: "--output"
|
|
type: file
|
|
required: true
|
|
direction: output
|
|
description: Destination path to the output.
|
|
example: output.h5mu
|
|
|
|
- name: "Sample ID options"
|
|
description: |
|
|
Options for adding the id to .obs on the MuData object. Having a sample
|
|
id present in a requirement of several components for this pipeline.
|
|
arguments:
|
|
- name: "--add_id_to_obs"
|
|
description: "Add the value passed with --id to .obs."
|
|
type: boolean
|
|
default: true
|
|
- name: --add_id_obs_output
|
|
description: |
|
|
.Obs column to add the sample IDs to. Required and only used when
|
|
--add_id_to_obs is set to 'true'
|
|
type: string
|
|
default: "sample_id"
|
|
- name: "--add_id_make_observation_keys_unique"
|
|
type: boolean
|
|
description: |
|
|
Join the id to the .obs index (.obs_names).
|
|
Only used when --add_id_to_obs is set to 'true'.
|
|
default: true
|
|
|
|
- name: "RNA filtering options"
|
|
arguments:
|
|
- name: "--rna_min_counts"
|
|
example: 200
|
|
min: 1
|
|
type: integer
|
|
description: Minimum number of counts captured per cell.
|
|
- name: "--rna_max_counts"
|
|
example: 5000000
|
|
min: 1
|
|
type: integer
|
|
description: Maximum number of counts captured per cell.
|
|
- name: "--rna_min_genes_per_cell"
|
|
type: integer
|
|
min: 1
|
|
example: 200
|
|
description: Minimum of non-zero values per cell.
|
|
- name: "--rna_max_genes_per_cell"
|
|
example: 1500000
|
|
min: 1
|
|
type: integer
|
|
description: Maximum of non-zero values per cell.
|
|
- name: "--rna_min_cells_per_gene"
|
|
example: 3
|
|
min: 1
|
|
type: integer
|
|
description: Minimum of non-zero values per gene.
|
|
- name: "--rna_min_fraction_mito"
|
|
example: 0
|
|
min: 0
|
|
max: 1
|
|
type: double
|
|
description: Minimum fraction of UMIs that are mitochondrial.
|
|
- name: "--rna_max_fraction_mito"
|
|
type: double
|
|
min: 0
|
|
max: 1
|
|
example: 0.2
|
|
description: Maximum fraction of UMIs that are mitochondrial.
|
|
- name: "--rna_min_fraction_ribo"
|
|
example: 0
|
|
min: 0
|
|
max: 1
|
|
type: double
|
|
description: Minimum fraction of UMIs that are mitochondrial.
|
|
- name: "--rna_max_fraction_ribo"
|
|
type: double
|
|
min: 0
|
|
max: 1
|
|
example: 0.2
|
|
description: Maximum fraction of UMIs that are mitochondrial.
|
|
|
|
- name: "Protein filtering options"
|
|
arguments:
|
|
- name: "--prot_min_counts"
|
|
description: Minimum number of counts per cell.
|
|
type: integer
|
|
min: 1
|
|
example: 3
|
|
- name: "--prot_max_counts"
|
|
description: Minimum number of counts per cell.
|
|
type: integer
|
|
min: 1
|
|
example: 5000000
|
|
- name: "--prot_min_proteins_per_cell"
|
|
type: integer
|
|
min: 1
|
|
example: 200
|
|
description: Minimum of non-zero values per cell.
|
|
- name: "--prot_max_proteins_per_cell"
|
|
description: Maximum of non-zero values per cell.
|
|
type: integer
|
|
min: 1
|
|
example: 100000000
|
|
- name: "--prot_min_cells_per_protein"
|
|
example: 3
|
|
min: 1
|
|
type: integer
|
|
description: Minimum of non-zero values per protein.
|
|
|
|
- name: "Highly variable features detection"
|
|
arguments:
|
|
- name: "--highly_variable_features_var_output"
|
|
alternatives: ["--filter_with_hvg_var_output"]
|
|
required: false
|
|
type: string
|
|
default: "filter_with_hvg"
|
|
description: In which .var slot to store a boolean array corresponding to the highly variable genes.
|
|
- name: "--highly_variable_features_obs_batch_key"
|
|
alternatives: ["--filter_with_hvg_obs_batch_key"]
|
|
type: string
|
|
default: "sample_id"
|
|
required: false
|
|
description: |
|
|
If specified, highly-variable genes are selected within each batch separately and merged. This simple
|
|
process avoids the selection of batch-specific genes and acts as a lightweight batch correction method.
|
|
- name: "Mitochondrial & Ribosomal Gene Detection"
|
|
arguments:
|
|
- name: "--var_gene_names"
|
|
required: false
|
|
example: "gene_symbol"
|
|
type: string
|
|
description: |
|
|
.var column name to be used to detect mitochondrial/ribosomal genes instead of .var_names (default if not set).
|
|
Gene names matching with the regex value from --mitochondrial_gene_regex or --ribosomal_gene_regex will be
|
|
identified as mitochondrial or ribosomal genes, respectively.
|
|
- name: "--var_name_mitochondrial_genes"
|
|
type: string
|
|
required: false
|
|
description: |
|
|
In which .var slot to store a boolean array corresponding the mitochondrial genes.
|
|
- name: "--obs_name_mitochondrial_fraction"
|
|
type: string
|
|
required: false
|
|
description: |
|
|
When specified, write the fraction of counts originating from mitochondrial genes
|
|
(based on --mitochondrial_gene_regex) to an .obs column with the specified name.
|
|
Requires --var_name_mitochondrial_genes.
|
|
- name: --mitochondrial_gene_regex
|
|
type: string
|
|
description: |
|
|
Regex string that identifies mitochondrial genes from --var_gene_names.
|
|
By default will detect human and mouse mitochondrial genes from a gene symbol.
|
|
required: false
|
|
default: "^[mM][tT]-"
|
|
- name: "--var_name_ribosomal_genes"
|
|
type: string
|
|
required: false
|
|
description: |
|
|
In which .var slot to store a boolean array corresponding the ribosomal genes.
|
|
- name: "--obs_name_ribosomal_fraction"
|
|
type: string
|
|
required: false
|
|
description: |
|
|
When specified, write the fraction of counts originating from ribosomal genes
|
|
(based on --ribosomal_gene_regex) to an .obs column with the specified name.
|
|
Requires --var_name_ribosomal_genes.
|
|
- name: --ribosomal_gene_regex
|
|
type: string
|
|
description: |
|
|
Regex string that identifies ribosomal genes from --var_gene_names.
|
|
By default will detect human and mouse ribosomal genes from a gene symbol.
|
|
required: false
|
|
default: "^[Mm]?[Rr][Pp][LlSs]"
|
|
|
|
- name: "QC metrics calculation options"
|
|
arguments:
|
|
- name: "--var_qc_metrics"
|
|
description: |
|
|
Keys to select a boolean (containing only True or False) column from .var.
|
|
For each cell, calculate the proportion of total values for genes which are labeled 'True',
|
|
compared to the total sum of the values for all genes. Defaults to the combined values specified for
|
|
--var_name_mitochondrial_genes and --highly_variable_features_var_output.
|
|
type: string
|
|
multiple: True
|
|
multiple_sep: ','
|
|
required: false
|
|
example: "ercc,highly_variable"
|
|
- name: "--top_n_vars"
|
|
type: integer
|
|
description: |
|
|
Number of top vars to be used to calculate cumulative proportions.
|
|
If not specified, proportions are not calculated. `--top_n_vars 20,50` finds
|
|
cumulative proportion to the 20th and 50th most expressed vars.
|
|
multiple: true
|
|
multiple_sep: ','
|
|
required: false
|
|
default: [50, 100, 200, 500]
|
|
|
|
- name: "PCA options"
|
|
arguments:
|
|
- name: "--pca_overwrite"
|
|
type: boolean_true
|
|
description: "Allow overwriting slots for PCA output."
|
|
|
|
- name: "CLR options"
|
|
arguments:
|
|
- name: "--clr_axis"
|
|
type: integer
|
|
description: "Axis to perform the CLR transformation on."
|
|
default: 0
|
|
required: false
|
|
|
|
- name: "RNA Scaling options"
|
|
description: |
|
|
Options for enabling scaling of the log-normalized data to unit variance and zero mean.
|
|
The scaled data will be output a different layer and representation with reduced dimensions
|
|
will be created and stored in addition to the non-scaled data.
|
|
arguments:
|
|
- name: "--rna_enable_scaling"
|
|
description: "Enable scaling for the RNA modality."
|
|
type: boolean_true
|
|
- name: "--rna_scaling_output_layer"
|
|
type: string
|
|
default: "scaled"
|
|
description: "Output layer where the scaled log-normalized data will be stored."
|
|
- name: "--rna_scaling_pca_obsm_output"
|
|
type: string
|
|
description: |
|
|
Name of the .obsm key where the PCA representation of the log-normalized
|
|
and scaled data is stored.
|
|
default: "scaled_pca"
|
|
- name: "--rna_scaling_pca_loadings_varm_output"
|
|
type: string
|
|
description: |
|
|
Name of the .varm key where the PCA loadings of the log-normalized and scaled
|
|
data is stored.
|
|
default: "scaled_pca_loadings"
|
|
- name: "--rna_scaling_pca_variance_uns_output"
|
|
type: string
|
|
description: |
|
|
Name of the .uns key where the variance and variance ratio will be stored as a map.
|
|
The map will contain two keys: variance and variance_ratio respectively.
|
|
default: "scaled_pca_variance"
|
|
- name: "--rna_scaling_umap_obsm_output"
|
|
type: string
|
|
description:
|
|
Name of the .obsm key where the UMAP representation of the log-normalized
|
|
and scaled data is stored.
|
|
default: "scaled_umap"
|
|
- name: "--rna_scaling_max_value"
|
|
description: "Clip (truncate) data to this value after scaling. If not specified, do not clip."
|
|
required: false
|
|
type: double
|
|
- name: "--rna_scaling_zero_center"
|
|
type: boolean_false
|
|
description: If set, omit zero-centering variables, which allows to handle sparse input efficiently."
|
|
|
|
dependencies:
|
|
- name: workflows/multiomics/process_samples
|
|
alias: spatial_sample_processing
|
|
repository: openpipeline
|
|
|
|
resources:
|
|
- type: nextflow_script
|
|
path: main.nf
|
|
entrypoint: run_wf
|
|
|
|
test_resources:
|
|
- type: nextflow_script
|
|
path: test.nf
|
|
entrypoint: test_wf
|
|
- path: /resources_test/xenium/xenium_tiny.h5mu
|
|
|
|
runners:
|
|
- type: nextflow |