name: "spatial_process_samples" namespace: "workflows/multiomics" scope: "public" description: "A pipeline to pre-process multiple spatial omics samples." authors: - __merge__: /src/authors/dries_schaumont.yaml roles: [ author, maintainer ] - __merge__: /src/authors/dorien_roosen.yaml roles: [ contributor ] - __merge__: /src/authors/weiwei_schultz.yaml roles: [ contributor ] argument_groups: - name: Inputs arguments: - name: "--id" required: true type: string description: ID of the sample. example: foo - name: "--input" alternatives: [-i] description: Path to the sample. required: true example: input.h5mu type: file - name: "--rna_layer" type: string description: "Input layer for the gene expression modality. If not specified, .X is used." required: false - name: "--prot_layer" type: string description: "Input layer for the antibody capture modality. If not specified, .X is used." required: false - name: "Outputs" arguments: - name: "--output" type: file required: true direction: output description: Destination path to the output. example: output.h5mu - name: "Sample ID options" description: | Options for adding the id to .obs on the MuData object. Having a sample id present in a requirement of several components for this pipeline. arguments: - name: "--add_id_to_obs" description: "Add the value passed with --id to .obs." type: boolean default: true - name: --add_id_obs_output description: | .Obs column to add the sample IDs to. Required and only used when --add_id_to_obs is set to 'true' type: string default: "sample_id" - name: "--add_id_make_observation_keys_unique" type: boolean description: | Join the id to the .obs index (.obs_names). Only used when --add_id_to_obs is set to 'true'. default: true - name: "RNA filtering options" arguments: - name: "--rna_min_counts" example: 200 min: 1 type: integer description: Minimum number of counts captured per cell. - name: "--rna_max_counts" example: 5000000 min: 1 type: integer description: Maximum number of counts captured per cell. - name: "--rna_min_genes_per_cell" type: integer min: 1 example: 200 description: Minimum of non-zero values per cell. - name: "--rna_max_genes_per_cell" example: 1500000 min: 1 type: integer description: Maximum of non-zero values per cell. - name: "--rna_min_cells_per_gene" example: 3 min: 1 type: integer description: Minimum of non-zero values per gene. - name: "--rna_min_fraction_mito" example: 0 min: 0 max: 1 type: double description: Minimum fraction of UMIs that are mitochondrial. - name: "--rna_max_fraction_mito" type: double min: 0 max: 1 example: 0.2 description: Maximum fraction of UMIs that are mitochondrial. - name: "--rna_min_fraction_ribo" example: 0 min: 0 max: 1 type: double description: Minimum fraction of UMIs that are mitochondrial. - name: "--rna_max_fraction_ribo" type: double min: 0 max: 1 example: 0.2 description: Maximum fraction of UMIs that are mitochondrial. - name: "Protein filtering options" arguments: - name: "--prot_min_counts" description: Minimum number of counts per cell. type: integer min: 1 example: 3 - name: "--prot_max_counts" description: Minimum number of counts per cell. type: integer min: 1 example: 5000000 - name: "--prot_min_proteins_per_cell" type: integer min: 1 example: 200 description: Minimum of non-zero values per cell. - name: "--prot_max_proteins_per_cell" description: Maximum of non-zero values per cell. type: integer min: 1 example: 100000000 - name: "--prot_min_cells_per_protein" example: 3 min: 1 type: integer description: Minimum of non-zero values per protein. - name: "Highly variable features detection" arguments: - name: "--highly_variable_features_var_output" alternatives: ["--filter_with_hvg_var_output"] required: false type: string default: "filter_with_hvg" description: In which .var slot to store a boolean array corresponding to the highly variable genes. - name: "--highly_variable_features_obs_batch_key" alternatives: ["--filter_with_hvg_obs_batch_key"] type: string default: "sample_id" required: false description: | If specified, highly-variable genes are selected within each batch separately and merged. This simple process avoids the selection of batch-specific genes and acts as a lightweight batch correction method. - name: "Mitochondrial & Ribosomal Gene Detection" arguments: - name: "--var_gene_names" required: false example: "gene_symbol" type: string description: | .var column name to be used to detect mitochondrial/ribosomal genes instead of .var_names (default if not set). Gene names matching with the regex value from --mitochondrial_gene_regex or --ribosomal_gene_regex will be identified as mitochondrial or ribosomal genes, respectively. - name: "--var_name_mitochondrial_genes" type: string required: false description: | In which .var slot to store a boolean array corresponding the mitochondrial genes. - name: "--obs_name_mitochondrial_fraction" type: string required: false description: | When specified, write the fraction of counts originating from mitochondrial genes (based on --mitochondrial_gene_regex) to an .obs column with the specified name. Requires --var_name_mitochondrial_genes. - name: --mitochondrial_gene_regex type: string description: | Regex string that identifies mitochondrial genes from --var_gene_names. By default will detect human and mouse mitochondrial genes from a gene symbol. required: false default: "^[mM][tT]-" - name: "--var_name_ribosomal_genes" type: string required: false description: | In which .var slot to store a boolean array corresponding the ribosomal genes. - name: "--obs_name_ribosomal_fraction" type: string required: false description: | When specified, write the fraction of counts originating from ribosomal genes (based on --ribosomal_gene_regex) to an .obs column with the specified name. Requires --var_name_ribosomal_genes. - name: --ribosomal_gene_regex type: string description: | Regex string that identifies ribosomal genes from --var_gene_names. By default will detect human and mouse ribosomal genes from a gene symbol. required: false default: "^[Mm]?[Rr][Pp][LlSs]" - name: "QC metrics calculation options" arguments: - name: "--var_qc_metrics" description: | Keys to select a boolean (containing only True or False) column from .var. For each cell, calculate the proportion of total values for genes which are labeled 'True', compared to the total sum of the values for all genes. Defaults to the combined values specified for --var_name_mitochondrial_genes and --highly_variable_features_var_output. type: string multiple: True multiple_sep: ',' required: false example: "ercc,highly_variable" - name: "--top_n_vars" type: integer description: | Number of top vars to be used to calculate cumulative proportions. If not specified, proportions are not calculated. `--top_n_vars 20,50` finds cumulative proportion to the 20th and 50th most expressed vars. multiple: true multiple_sep: ',' required: false default: [50, 100, 200, 500] - name: "PCA options" arguments: - name: "--pca_overwrite" type: boolean_true description: "Allow overwriting slots for PCA output." - name: "CLR options" arguments: - name: "--clr_axis" type: integer description: "Axis to perform the CLR transformation on." default: 0 required: false - name: "RNA Scaling options" description: | Options for enabling scaling of the log-normalized data to unit variance and zero mean. The scaled data will be output a different layer and representation with reduced dimensions will be created and stored in addition to the non-scaled data. arguments: - name: "--rna_enable_scaling" description: "Enable scaling for the RNA modality." type: boolean_true - name: "--rna_scaling_output_layer" type: string default: "scaled" description: "Output layer where the scaled log-normalized data will be stored." - name: "--rna_scaling_pca_obsm_output" type: string description: | Name of the .obsm key where the PCA representation of the log-normalized and scaled data is stored. default: "scaled_pca" - name: "--rna_scaling_pca_loadings_varm_output" type: string description: | Name of the .varm key where the PCA loadings of the log-normalized and scaled data is stored. default: "scaled_pca_loadings" - name: "--rna_scaling_pca_variance_uns_output" type: string description: | Name of the .uns key where the variance and variance ratio will be stored as a map. The map will contain two keys: variance and variance_ratio respectively. default: "scaled_pca_variance" - name: "--rna_scaling_umap_obsm_output" type: string description: Name of the .obsm key where the UMAP representation of the log-normalized and scaled data is stored. default: "scaled_umap" - name: "--rna_scaling_max_value" description: "Clip (truncate) data to this value after scaling. If not specified, do not clip." required: false type: double - name: "--rna_scaling_zero_center" type: boolean_false description: If set, omit zero-centering variables, which allows to handle sparse input efficiently." dependencies: - name: workflows/multiomics/process_samples alias: spatial_sample_processing repository: openpipeline resources: - type: nextflow_script path: main.nf entrypoint: run_wf test_resources: - type: nextflow_script path: test.nf entrypoint: test_wf - path: /resources_test/xenium/xenium_tiny.h5mu runners: - type: nextflow