name: "pre_processing" namespace: "workflows" version: "main" argument_groups: - name: "Inputs" arguments: - type: "string" name: "--id" description: "ID of the sample." info: null example: - "foo" required: true direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--fastq_1" description: "Path to the sample (or read 1 of paired end sample)." info: null example: - "input.fastq.gz" must_exist: true create_parent: true required: true direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--fastq_2" description: "Path to read 2 of the sample." info: null must_exist: false create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--strandedness" description: "Sample strand-specificity. Must be one of unstranded, forward, reverse\ \ or auto" info: null default: - "auto" required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--bbsplit_index" description: "BBsplit index" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--bbsplit_fasta_list" description: "Path to comma-separated file containing a list of reference genomes\ \ to filter reads against with BBSplit. To use BBSplit, \"--skip_bbsplit\" must\ \ be explicitly set to \"false\". The file should contain 2 (comma separated)\ \ columns - short name and full path to reference genome(s)" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--ribo_database_manifest" description: "Text file containing paths to fasta files (one per line) that will\ \ be used to create the database for SortMeRNA." info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--transcript_fasta" description: "Path to FASTA transcriptome file." info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--gtf" description: "Path to GTF annotation file." info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "file" name: "--salmon_index" description: "Path to directory containing the Salmon index" info: null must_exist: true create_parent: true required: false direction: "input" multiple: false multiple_sep: ";" - type: "integer" name: "--num_trimmed_reads" description: "Number of reads after trimming" info: null required: false direction: "input" multiple: false multiple_sep: ";" - name: "Extra pipeline options" arguments: - type: "boolean" name: "--skip_qc" description: "Skip QC steps of the workflow." info: null required: false direction: "input" multiple: false multiple_sep: ";" - name: "FastQC options" arguments: - type: "boolean" name: "--skip_fastqc" description: "Skip FatQC step." info: null default: - false required: false direction: "input" multiple: false multiple_sep: ";" - name: "UMI-tools options" arguments: - type: "boolean" name: "--with_umi" description: "Enable UMI-based read deduplication." info: null default: - false required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean" name: "--skip_umi_extract" description: "Skip umi_tools extract step." info: null default: - false required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--umitools_extract_method" description: "UMI pattern to use." info: null default: - "string" required: false choices: - "string" - "regex" direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--umitools_bc_pattern" description: "The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the\ \ first 6 nucleotides of the read are from the UMI." info: null default: - "" required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--umitools_bc_pattern2" description: "The UMI barcode pattern to use if the UMI is located in read 2." info: null default: - "" required: false direction: "input" multiple: false multiple_sep: ";" - type: "integer" name: "--umi_discard_read" description: "After UMI barcode extraction discard either R1 or R2 by setting\ \ this parameter to 1 or 2, respectively." info: null default: - 0 required: false choices: - 0 - 1 - 2 direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--umitools_umi_separator" description: "The character that separates the UMI in the read name. Most likely\ \ a colon if you skipped the extraction with UMI-tools and used other software." info: null default: - "_" required: false direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--umitools_grouping_method" description: "Method to use to determine read groups by subsuming those with similar\ \ UMIs. All methods start by identifying the reads with the same mapping position,\ \ but treat similar yet nonidentical UMIs differently." info: null default: - "directional" required: false choices: - "unique" - "percentile" - "cluster" - "adjacency" - "directional" direction: "input" multiple: false multiple_sep: ";" - type: "boolean" name: "--save_umi_intermeds" description: "If this option is specified, intermediate FastQ and BAM files produced\ \ by UMI-tools are also saved in the results directory." info: null default: - false required: false direction: "input" multiple: false multiple_sep: ";" - name: "Read trimming options" arguments: - type: "string" name: "--trimmer" description: "Specify the trimming tool to use." info: null default: - "trimgalore" required: false choices: - "trimgalore" - "fastp" direction: "input" multiple: false multiple_sep: ";" - type: "string" name: "--extra_trimgalore_args" description: "Extra arguments to pass to Trim Galore! command in addition to defaults\ \ defined by the pipeline." info: null required: false direction: "input" multiple: false multiple_sep: ";" - type: "integer" name: "--min_trimmed_reads" description: "Minimum number of trimmed reads below which samples are removed\ \ from further processing. Some downstream steps in the pipeline will fail if\ \ this threshold is too low." info: null default: - 10000 required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean" name: "--skip_trimming" description: "Skip the adapter trimming step." info: null default: - false required: false direction: "input" multiple: false multiple_sep: ";" - type: "boolean" name: "--save_trimmed" description: "Save the trimmed FastQ files in the results directory." info: null default: - false required: false direction: "input" multiple: false multiple_sep: ";" - name: "Alignment options" arguments: - type: "string" name: "--extra_salmon_quant_args" description: "Extra arguments to pass to salmon quant command in addition to defaults\ \ defined by the pipeline." info: null default: - "" required: false direction: "input" multiple: false multiple_sep: ";" - name: "Read filtering options" arguments: - type: "boolean_true" name: "--skip_bbsplit" description: "Skip BBSplit for removal of non-reference genome reads." info: null direction: "input" - type: "boolean_true" name: "--remove_ribo_rna" description: "Enable the removal of reads derived from ribosomal RNA using SortMeRNA." info: null direction: "input" - name: "Other options" arguments: - type: "string" name: "--extra_fq_subsample_args" description: "Extra arguments to pass to fq subsample command in addition to defaults\ \ defined by the pipeline." info: null default: - "--record-count 1000000 --seed 1" required: false direction: "input" multiple: false multiple_sep: ";" - name: "Output" arguments: - type: "file" name: "--qc_output1" description: "Path to output directory" info: null default: - "$id.read_1.fastq" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--qc_output2" description: "Path to output directory" info: null default: - "$id.read_2.fastq" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--fastqc_html_1" description: "FastQC HTML report for read 1." info: null default: - "$id.read_1.fastqc.html" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--fastqc_html_2" description: "FastQC HTML report for read 2." info: null default: - "$id.read_2.fastqc.html" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--fastqc_zip_1" description: "FastQC report archive for read 1." info: null default: - "$id.read_1.fastqc.zip" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--fastqc_zip_2" description: "FastQC report archive for read 2." info: null default: - "$id.read_2.fastqc.zip" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--trim_log_1" info: null default: - "$id.read_1.trimming_report.txt" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--trim_log_2" info: null default: - "$id.read_2.trimming_report.txt" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--trim_html_1" info: null default: - "$id.read_1.trimmed_fastqc.html" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--trim_html_2" info: null default: - "$id.read_2.trimmed_fastqc.html" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--trim_zip_1" info: null default: - "$id.read_1.trimmed_fastqc.zip" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--trim_zip_2" info: null default: - "$id.read_2.trimmed_fastqc.zip" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--sortmerna_log" description: "Sortmerna log file." info: null default: - "$id.sortmerna.log" must_exist: false create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--salmon_quant_output" description: "Results from Salmon quant" info: null default: - "$id.salmon_quant_output" must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--trim_json" description: "The fastp json format report file name" info: null default: - "$id.fastp_out.json" must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--trim_html" description: "The fastp html format report file name" info: null default: - "$id.fastp_out.html" must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" - type: "file" name: "--merged_out" description: "File name to store merged fastp output." info: null must_exist: true create_parent: true required: false direction: "output" multiple: false multiple_sep: ";" resources: - type: "nextflow_script" path: "main.nf" is_executable: true entrypoint: "run_wf" description: "A subworkflow for the pre-processing stage of the nf-core/rnaseq pipeline.\n" info: null status: "enabled" requirements: commands: - "ps" dependencies: - name: "fastqc" repository: type: "local" - name: "umitools/umitools_extract" repository: type: "local" - name: "umi_tools/umi_tools_extract" repository: type: "vsh" repo: "vsh/biobox" tag: "main" - name: "trimgalore" repository: type: "local" - name: "bbmap_bbsplit" repository: type: "local" - name: "sortmerna" repository: type: "local" - name: "fastp" repository: type: "vsh" repo: "vsh/biobox" tag: "main" - name: "fq_subsample" repository: type: "local" - name: "salmon/salmon_quant" repository: type: "vsh" repo: "vsh/biobox" tag: "main" repositories: - type: "vsh" name: "biobox" repo: "vsh/biobox" tag: "main" - type: "vsh" name: "craftbox" repo: "craftbox" tag: "v0.1.0" runners: - type: "executable" id: "executable" docker_setup_strategy: "ifneedbepullelsecachedbuild" - type: "nextflow" id: "nextflow" directives: tag: "$id" auto: simplifyInput: true simplifyOutput: false transcript: false publish: false config: labels: mem1gb: "memory = 1000000000.B" mem2gb: "memory = 2000000000.B" mem5gb: "memory = 5000000000.B" mem10gb: "memory = 10000000000.B" mem20gb: "memory = 20000000000.B" mem50gb: "memory = 50000000000.B" mem100gb: "memory = 100000000000.B" mem200gb: "memory = 200000000000.B" mem500gb: "memory = 500000000000.B" mem1tb: "memory = 1000000000000.B" mem2tb: "memory = 2000000000000.B" mem5tb: "memory = 5000000000000.B" mem10tb: "memory = 10000000000000.B" mem20tb: "memory = 20000000000000.B" mem50tb: "memory = 50000000000000.B" mem100tb: "memory = 100000000000000.B" mem200tb: "memory = 200000000000000.B" mem500tb: "memory = 500000000000000.B" mem1gib: "memory = 1073741824.B" mem2gib: "memory = 2147483648.B" mem4gib: "memory = 4294967296.B" mem8gib: "memory = 8589934592.B" mem16gib: "memory = 17179869184.B" mem32gib: "memory = 34359738368.B" mem64gib: "memory = 68719476736.B" mem128gib: "memory = 137438953472.B" mem256gib: "memory = 274877906944.B" mem512gib: "memory = 549755813888.B" mem1tib: "memory = 1099511627776.B" mem2tib: "memory = 2199023255552.B" mem4tib: "memory = 4398046511104.B" mem8tib: "memory = 8796093022208.B" mem16tib: "memory = 17592186044416.B" mem32tib: "memory = 35184372088832.B" mem64tib: "memory = 70368744177664.B" mem128tib: "memory = 140737488355328.B" mem256tib: "memory = 281474976710656.B" mem512tib: "memory = 562949953421312.B" cpu1: "cpus = 1" cpu2: "cpus = 2" cpu5: "cpus = 5" cpu10: "cpus = 10" cpu20: "cpus = 20" cpu50: "cpus = 50" cpu100: "cpus = 100" cpu200: "cpus = 200" cpu500: "cpus = 500" cpu1000: "cpus = 1000" debug: false container: "docker" engines: - type: "native" id: "native" build_info: config: "src/workflows/pre_processing/config.vsh.yaml" runner: "executable" engine: "native" output: "target/executable/workflows/pre_processing" executable: "target/executable/workflows/pre_processing/pre_processing" viash_version: "0.9.0" git_commit: "a4f2c7d5a1d0c27cc538c9b06ec4dec5c62f8d74" git_remote: "https://x-access-token:ghs_hUComHHyuHIIVQYt2HdqulL85sCkXr3K9lOo@github.com/viash-hub/rnaseq" dependencies: - "target/nextflow/fastqc" - "target/nextflow/umitools/umitools_extract" - "target/dependencies/vsh/vsh/biobox/main/nextflow/umi_tools/umi_tools_extract" - "target/nextflow/trimgalore" - "target/nextflow/bbmap_bbsplit" - "target/nextflow/sortmerna" - "target/dependencies/vsh/vsh/biobox/main/nextflow/fastp" - "target/nextflow/fq_subsample" - "target/dependencies/vsh/vsh/biobox/main/nextflow/salmon/salmon_quant" package_config: name: "rnaseq" version: "main" info: test_resources: - path: "gs://viash-hub-test-data/rnaseq/v1" dest: "testData" repositories: - type: "vsh" name: "biobox" repo: "vsh/biobox" tag: "main" - type: "vsh" name: "craftbox" repo: "craftbox" tag: "v0.1.0" viash_version: "0.9.0" source: "src" target: "target" config_mods: - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\ \ := '$id'\n" - ".engines += { type: \"native\" }" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_tag := 'main'" organization: "vsh"