Build pipeline: openpipelines-bio.openpipeline-spatial.build-main-4vs7c
Source commit: 6f308e6a3c
Source message: deploy: 1caa96d9de9a6decd32f44c56ea33f7b9aff9791
421 lines
12 KiB
YAML
421 lines
12 KiB
YAML
name: "spatial_qc"
|
|
namespace: "workflows/qc"
|
|
version: "build_main"
|
|
authors:
|
|
- name: "Dries Schaumont"
|
|
roles:
|
|
- "author"
|
|
- "maintainer"
|
|
info:
|
|
role: "Core Team Member"
|
|
links:
|
|
email: "dries@data-intuitive.com"
|
|
github: "DriesSchaumont"
|
|
orcid: "0000-0002-4389-0440"
|
|
linkedin: "dries-schaumont"
|
|
organizations:
|
|
- name: "Data Intuitive"
|
|
href: "https://www.data-intuitive.com"
|
|
role: "Data Scientist"
|
|
- name: "Dorien Roosen"
|
|
roles:
|
|
- "contributor"
|
|
info:
|
|
role: "Core Team Member"
|
|
links:
|
|
email: "dorien@data-intuitive.com"
|
|
github: "dorien-er"
|
|
linkedin: "dorien-roosen"
|
|
organizations:
|
|
- name: "Data Intuitive"
|
|
href: "https://www.data-intuitive.com"
|
|
role: "Data Scientist"
|
|
- name: "Weiwei Schultz"
|
|
roles:
|
|
- "contributor"
|
|
info:
|
|
role: "Contributor"
|
|
organizations:
|
|
- name: "Janssen R&D US"
|
|
role: "Associate Director Data Sciences"
|
|
argument_groups:
|
|
- name: "Inputs"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--id"
|
|
description: "ID of the sample."
|
|
info: null
|
|
example:
|
|
- "foo"
|
|
required: true
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--input"
|
|
alternatives:
|
|
- "-i"
|
|
description: "Path to the sample."
|
|
info: null
|
|
example:
|
|
- "input.h5mu"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: true
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--modality"
|
|
description: "Which modality to process."
|
|
info: null
|
|
default:
|
|
- "rna"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--layer"
|
|
description: "Use specified layer for calculation of qc metrics. If not specified,\
|
|
\ adata.X is used."
|
|
info: null
|
|
example:
|
|
- "raw_counts"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Mitochondrial & Ribosomal Gene Detection"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--var_gene_names"
|
|
description: ".var column name to be used to detect mitochondrial/ribosomal genes\
|
|
\ instead of .var_names (default if not set).\nGene names matching with the\
|
|
\ regex value from --mitochondrial_gene_regex or --ribosomal_gene_regex will\
|
|
\ be \nidentified as mitochondrial or ribosomal genes, respectively.\n"
|
|
info: null
|
|
example:
|
|
- "gene_symbol"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--var_name_mitochondrial_genes"
|
|
description: "In which .var slot to store a boolean array corresponding the mitochondrial\
|
|
\ genes.\n"
|
|
info: null
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--obs_name_mitochondrial_fraction"
|
|
description: ".Obs slot to store the fraction of reads found to be mitochondrial.\
|
|
\ Defaults to 'fraction_' suffixed by the value of --var_name_mitochondrial_genes\n"
|
|
info: null
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--mitochondrial_gene_regex"
|
|
description: "Regex string that identifies mitochondrial genes from --var_gene_names.\n\
|
|
By default will detect human and mouse mitochondrial genes from a gene symbol.\n"
|
|
info: null
|
|
default:
|
|
- "^[mM][tT]-"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--var_name_ribosomal_genes"
|
|
description: "In which .var slot to store a boolean array corresponding the ribosomal\
|
|
\ genes.\n"
|
|
info: null
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--obs_name_ribosomal_fraction"
|
|
description: "When specified, write the fraction of counts originating from ribosomal\
|
|
\ genes \n(based on --ribosomal_gene_regex) to an .obs column with the specified\
|
|
\ name.\nRequires --var_name_ribosomal_genes.\n"
|
|
info: null
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--ribosomal_gene_regex"
|
|
description: "Regex string that identifies ribosomal genes from --var_gene_names.\n\
|
|
By default will detect human and mouse ribosomal genes from a gene symbol.\n"
|
|
info: null
|
|
default:
|
|
- "^[Mm]?[Rr][Pp][LlSs]"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "QC metrics calculation options"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--var_qc_metrics"
|
|
description: "Keys to select a boolean (containing only True or False) column\
|
|
\ from .var.\nFor each cell, calculate the proportion of total values for genes\
|
|
\ which are labeled 'True', \ncompared to the total sum of the values for all\
|
|
\ genes. Defaults to the value from\n--var_name_mitochondrial_genes.\n"
|
|
info: null
|
|
example:
|
|
- "ercc,highly_variable"
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ","
|
|
- type: "integer"
|
|
name: "--top_n_vars"
|
|
description: "Number of top vars to be used to calculate cumulative proportions.\n\
|
|
If not specified, proportions are not calculated. `--top_n_vars 20,50` finds\n\
|
|
cumulative proportion to the 20th and 50th most expressed vars.\n"
|
|
info: null
|
|
default:
|
|
- 50
|
|
- 100
|
|
- 200
|
|
- 500
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ","
|
|
- type: "string"
|
|
name: "--output_obs_num_nonzero_vars"
|
|
description: "Name of column in .obs describing, for each observation, the number\
|
|
\ of stored values\n(including explicit zeroes). In other words, the name of\
|
|
\ the column that counts\nfor each row the number of columns that contain data.\n"
|
|
info: null
|
|
default:
|
|
- "num_nonzero_vars"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--output_obs_total_counts_vars"
|
|
description: "Name of the column for .obs describing, for each observation (row),\n\
|
|
the sum of the stored values in the columns.\n"
|
|
info: null
|
|
default:
|
|
- "total_counts"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--output_var_num_nonzero_obs"
|
|
description: "Name of column describing, for each feature, the number of stored\
|
|
\ values\n(including explicit zeroes). In other words, the name of the column\
|
|
\ that counts\nfor each column the number of rows that contain data.\n"
|
|
info: null
|
|
default:
|
|
- "num_nonzero_obs"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--output_var_total_counts_obs"
|
|
description: "Name of the column in .var describing, for each feature (column),\n\
|
|
the sum of the stored values in the rows.\n"
|
|
info: null
|
|
default:
|
|
- "total_counts"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--output_var_obs_mean"
|
|
description: "Name of the column in .obs providing the mean of the values in each\
|
|
\ row.\n"
|
|
info: null
|
|
default:
|
|
- "obs_mean"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--output_var_pct_dropout"
|
|
description: "Name of the column in .obs providing for each feature the percentage\
|
|
\ of\nobservations the feature does not appear on (i.e. is missing). Same as\
|
|
\ `--output_var_num_nonzero_obs`\nbut percentage based.\n"
|
|
info: null
|
|
default:
|
|
- "pct_dropout"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Outputs"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--output"
|
|
description: "Destination path to the output."
|
|
info: null
|
|
example:
|
|
- "output.h5mu"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: true
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
resources:
|
|
- type: "nextflow_script"
|
|
path: "main.nf"
|
|
is_executable: true
|
|
entrypoint: "run_wf"
|
|
- type: "file"
|
|
path: "nextflow_labels.config"
|
|
dest: "nextflow_labels.config"
|
|
description: "A pipeline to add basic qc statistics to a MuData containing spatial\
|
|
\ data."
|
|
test_resources:
|
|
- type: "nextflow_script"
|
|
path: "test.nf"
|
|
is_executable: true
|
|
entrypoint: "test_wf"
|
|
- type: "file"
|
|
path: "xenium_tiny.h5mu"
|
|
info:
|
|
test_dependencies:
|
|
- name: "qc_test"
|
|
namespace: "test_workflows/qc"
|
|
status: "enabled"
|
|
scope:
|
|
image: "public"
|
|
target: "public"
|
|
dependencies:
|
|
- name: "workflows/qc/qc"
|
|
alias: "spatial_qc_workflow"
|
|
repository:
|
|
type: "github"
|
|
repo: "openpipelines-bio/openpipeline"
|
|
tag: "2.1.2"
|
|
repositories:
|
|
- type: "github"
|
|
name: "openpipeline"
|
|
repo: "openpipelines-bio/openpipeline"
|
|
tag: "2.1.2"
|
|
links:
|
|
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
|
docker_registry: "ghcr.io"
|
|
runners:
|
|
- type: "nextflow"
|
|
id: "nextflow"
|
|
directives:
|
|
tag: "$id"
|
|
auto:
|
|
simplifyInput: true
|
|
simplifyOutput: false
|
|
transcript: false
|
|
publish: false
|
|
config:
|
|
labels:
|
|
mem1gb: "memory = 1000000000.B"
|
|
mem2gb: "memory = 2000000000.B"
|
|
mem5gb: "memory = 5000000000.B"
|
|
mem10gb: "memory = 10000000000.B"
|
|
mem20gb: "memory = 20000000000.B"
|
|
mem50gb: "memory = 50000000000.B"
|
|
mem100gb: "memory = 100000000000.B"
|
|
mem200gb: "memory = 200000000000.B"
|
|
mem500gb: "memory = 500000000000.B"
|
|
mem1tb: "memory = 1000000000000.B"
|
|
mem2tb: "memory = 2000000000000.B"
|
|
mem5tb: "memory = 5000000000000.B"
|
|
mem10tb: "memory = 10000000000000.B"
|
|
mem20tb: "memory = 20000000000000.B"
|
|
mem50tb: "memory = 50000000000000.B"
|
|
mem100tb: "memory = 100000000000000.B"
|
|
mem200tb: "memory = 200000000000000.B"
|
|
mem500tb: "memory = 500000000000000.B"
|
|
mem1gib: "memory = 1073741824.B"
|
|
mem2gib: "memory = 2147483648.B"
|
|
mem4gib: "memory = 4294967296.B"
|
|
mem8gib: "memory = 8589934592.B"
|
|
mem16gib: "memory = 17179869184.B"
|
|
mem32gib: "memory = 34359738368.B"
|
|
mem64gib: "memory = 68719476736.B"
|
|
mem128gib: "memory = 137438953472.B"
|
|
mem256gib: "memory = 274877906944.B"
|
|
mem512gib: "memory = 549755813888.B"
|
|
mem1tib: "memory = 1099511627776.B"
|
|
mem2tib: "memory = 2199023255552.B"
|
|
mem4tib: "memory = 4398046511104.B"
|
|
mem8tib: "memory = 8796093022208.B"
|
|
mem16tib: "memory = 17592186044416.B"
|
|
mem32tib: "memory = 35184372088832.B"
|
|
mem64tib: "memory = 70368744177664.B"
|
|
mem128tib: "memory = 140737488355328.B"
|
|
mem256tib: "memory = 281474976710656.B"
|
|
mem512tib: "memory = 562949953421312.B"
|
|
cpu1: "cpus = 1"
|
|
cpu2: "cpus = 2"
|
|
cpu5: "cpus = 5"
|
|
cpu10: "cpus = 10"
|
|
cpu20: "cpus = 20"
|
|
cpu50: "cpus = 50"
|
|
cpu100: "cpus = 100"
|
|
cpu200: "cpus = 200"
|
|
cpu500: "cpus = 500"
|
|
cpu1000: "cpus = 1000"
|
|
script:
|
|
- "includeConfig(\"nextflow_labels.config\")"
|
|
debug: false
|
|
container: "docker"
|
|
engines:
|
|
- type: "native"
|
|
id: "native"
|
|
build_info:
|
|
config: "src/workflows/qc/spatial_qc/config.vsh.yaml"
|
|
runner: "nextflow"
|
|
engine: "native"
|
|
output: "target/nextflow/workflows/qc/spatial_qc"
|
|
executable: "target/nextflow/workflows/qc/spatial_qc/main.nf"
|
|
viash_version: "0.9.4"
|
|
git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b"
|
|
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
|
dependencies:
|
|
- "target/dependencies/github/openpipelines-bio/openpipeline/2.1.2/nextflow/workflows/qc/qc"
|
|
package_config:
|
|
name: "openpipeline_spatial"
|
|
version: "build_main"
|
|
info:
|
|
test_resources:
|
|
- type: "s3"
|
|
path: "s3://openpipelines-bio/openpipeline_spatial/resources_test"
|
|
dest: "resources_test"
|
|
repositories:
|
|
- type: "github"
|
|
name: "openpipeline"
|
|
repo: "openpipelines-bio/openpipeline"
|
|
tag: "2.1.2"
|
|
viash_version: "0.9.4"
|
|
source: "src"
|
|
target: "target"
|
|
config_mods:
|
|
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
|
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
|
|
)'"
|
|
- ".engines += { type: \"native\" }"
|
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
|
- ".engines[.type == 'docker'].target_tag := 'build_main'"
|
|
organization: "vsh"
|
|
links:
|
|
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
|
docker_registry: "ghcr.io"
|