From 9ecdb612f60d919a222471c6d55681238e1fd9b2 Mon Sep 17 00:00:00 2001 From: CI Date: Fri, 13 Jun 2025 09:16:18 +0000 Subject: [PATCH] Build branch prepare_reads with version prepare_reads (2bd8e1b) Build pipeline: viash-hub.rnaseq.prepare-reads-9nvqs Source commit: https://github.com/viash-hub/rnaseq/commit/2bd8e1becf04860c3cf718f765e69a1511ceb7c6 Source message: update mermaid --- README.md | 28 +++++--- README.qmd | 28 +++++--- src/prepare_reads/config.vsh.yaml | 15 +++- src/prepare_reads/main.nf | 33 ++++++++- src/prepare_reads/test.sh | 11 +++ .../nextflow/prepare_genome/.config.vsh.yaml | 2 +- target/nextflow/prepare_genome/main.nf | 2 +- .../nextflow/prepare_reads/.config.vsh.yaml | 29 +++++--- target/nextflow/prepare_reads/main.nf | 72 +++++++++++++++---- .../prepare_reads/nextflow_schema.json | 11 +-- 10 files changed, 179 insertions(+), 52 deletions(-) create mode 100755 src/prepare_reads/test.sh diff --git a/README.md b/README.md index 4387db0..ac227d5 100644 --- a/README.md +++ b/README.md @@ -121,23 +121,29 @@ flowchart TB flowchart TB subgraph inputs[Inputs] - input_r1[Input R1 FastQ] - input_r2[Input R2 FastQ] - sample_metadata[Sample metadata] + input_r1[R1 reads] + input_r2[R2 reads] + input_strandedness[Strandedness] end - input_r1 --> cat_fastq_r1 --> trim_r1 --> infer_strandedness_r1 --> processed_r1 - input_r2 --> cat_fastq_r2 --> trim_r2 --> infer_strandedness_r2 --> processed_r2 + input_r1 --> concat_text_r1[/concat_text/]:::comp + input_r2 --> concat_text_r2[/concat_text/]:::comp + concat_text_r1 & concat_text_r2 --> lint_fastq1[/lint_fastq/]:::comp --> trim_reads[/trim_reads/]:::comp --> lint_fastq2[/lint_fastq/]:::comp --> remove_genome_contaminants[/remove_genome_contaminants/]:::comp --> lint_fastq3[/lint_fastq/]:::comp --> remove_rRNA[/remove_rRNA/]:::comp --> lint_fastq4[/lint_fastq/]:::comp --> infer_strandedness[/infer_strandedness/]:::comp --> processed_r1 & processed_r2 & output_strandedness - processed_r1 & processed_r2 --> fastqc --> prepare_reads_qc_data + input_strandedness --> infer_strandedness + + + concat_text_r1 & concat_text_r2 --> fastqc_raw[/fastqc_raw/]:::comp --> fastqc_raw_multiqc + trim_reads --> fastqc_trimmed[/fastqc_trimmed/]:::comp --> fastqc_trimmed_multiqc + infer_strandedness --> strandedness_multiqc + fastqc_raw_multiqc & fastqc_trimmed_multiqc & strandedness_multiqc --> prepare_reads_multiqc - sample_metadata --> processed_metadata subgraph outputs[Outputs] - processed_r1 - processed_r2 - processed_metadata - prepare_reads_qc_data + processed_r1[R1 reads] + processed_r2[R2 reads] + output_strandedness[Strandedness] + prepare_reads_multiqc[MultiQC files] end classDef info stroke-dasharray: 4 4 diff --git a/README.qmd b/README.qmd index ffc7765..0b3b235 100644 --- a/README.qmd +++ b/README.qmd @@ -127,23 +127,29 @@ flowchart TB flowchart TB subgraph inputs[Inputs] - input_r1[Input R1 FastQ] - input_r2[Input R2 FastQ] - sample_metadata[Sample metadata] + input_r1[R1 reads] + input_r2[R2 reads] + input_strandedness[Strandedness] end - input_r1 --> cat_fastq_r1 --> trim_r1 --> infer_strandedness_r1 --> processed_r1 - input_r2 --> cat_fastq_r2 --> trim_r2 --> infer_strandedness_r2 --> processed_r2 + input_r1 --> concat_text_r1[/concat_text/]:::comp + input_r2 --> concat_text_r2[/concat_text/]:::comp + concat_text_r1 & concat_text_r2 --> lint_fastq1[/lint_fastq/]:::comp --> trim_reads[/trim_reads/]:::comp --> lint_fastq2[/lint_fastq/]:::comp --> remove_genome_contaminants[/remove_genome_contaminants/]:::comp --> lint_fastq3[/lint_fastq/]:::comp --> remove_rRNA[/remove_rRNA/]:::comp --> lint_fastq4[/lint_fastq/]:::comp --> infer_strandedness[/infer_strandedness/]:::comp --> processed_r1 & processed_r2 & output_strandedness - processed_r1 & processed_r2 --> fastqc --> prepare_reads_qc_data + input_strandedness --> infer_strandedness + + + concat_text_r1 & concat_text_r2 --> fastqc_raw[/fastqc_raw/]:::comp --> fastqc_raw_multiqc + trim_reads --> fastqc_trimmed[/fastqc_trimmed/]:::comp --> fastqc_trimmed_multiqc + infer_strandedness --> strandedness_multiqc + fastqc_raw_multiqc & fastqc_trimmed_multiqc & strandedness_multiqc --> prepare_reads_multiqc - sample_metadata --> processed_metadata subgraph outputs[Outputs] - processed_r1 - processed_r2 - processed_metadata - prepare_reads_qc_data + processed_r1[R1 reads] + processed_r2[R2 reads] + output_strandedness[Strandedness] + prepare_reads_multiqc[MultiQC files] end classDef info stroke-dasharray: 4 4 diff --git a/src/prepare_reads/config.vsh.yaml b/src/prepare_reads/config.vsh.yaml index 5a2ee1d..0bc1404 100644 --- a/src/prepare_reads/config.vsh.yaml +++ b/src/prepare_reads/config.vsh.yaml @@ -11,24 +11,35 @@ argument_groups: type: file example: "reads_R2.fastq.gz" multiple: true + required: false + - name: --input_strandedness + type: string + example: "auto" + required: false - name: Outputs arguments: - name: --output_r1 type: file direction: output example: "fastq.gz" + required: true - name: --output_r2 type: file direction: output example: "fastq.gz" + required: false - name: --output_strandedness - type: file + type: string direction: output - example: strandedness.yaml + required: true dependencies: - name: concat_text repository: craftbox + alias: concat_r1 + - name: concat_text + repository: craftbox + alias: concat_r2 resources: - type: nextflow_script diff --git a/src/prepare_reads/main.nf b/src/prepare_reads/main.nf index 42eeb74..1dcdae4 100644 --- a/src/prepare_reads/main.nf +++ b/src/prepare_reads/main.nf @@ -4,7 +4,7 @@ workflow run_wf { main: output_ch = input_ch - | concat_text.run( + | concat_r1.run( fromState: [ input: "input_r1" ], @@ -13,7 +13,7 @@ workflow run_wf { ] ) - | concat_text.run( + | concat_r2.run( fromState: [ input: "input_r2" ], @@ -21,6 +21,35 @@ workflow run_wf { processed_r2: "output" ] ) + + // TODO: add fq linter + + // TODO: run fastqc on raw reads + + // TODO: add fq trimmer (trimgalore or fastp) + + // TODO: run fastqc on trimmed reads + // TODO: lint again? + + // TODO: remove genome contaminant reads (bbmap_bbsplit) + // TODO: lint again? + + // TODO: remove ribosomal RNA reads (sortmerna) + // TODO: lint again? + + // TODO: infer strandedness (if not provided) + | map { id, state -> + def newState = state + ["strandedness": "forward"] + [id, newState] + } + + | setState( + [ + output_r1: "processed_r1", + output_r2: "processed_r2", + output_strandedness: "strandedness" + ] + ) emit: output_ch diff --git a/src/prepare_reads/test.sh b/src/prepare_reads/test.sh new file mode 100755 index 0000000..fdb8814 --- /dev/null +++ b/src/prepare_reads/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +export NXF_VER=24.04.5 + +viash ns build + +nextflow run . \ + -main-script target/nextflow/prepare_reads/main.nf \ + --input_r1 resources_test/minimal_test/input_fastq/SRR6357070_1.fastq.gz \ + --input_r2 resources_test/minimal_test/input_fastq/SRR6357070_2.fastq.gz \ + --publish_dir test_results/test_prepare_reads diff --git a/target/nextflow/prepare_genome/.config.vsh.yaml b/target/nextflow/prepare_genome/.config.vsh.yaml index bb1fc18..b65cec3 100644 --- a/target/nextflow/prepare_genome/.config.vsh.yaml +++ b/target/nextflow/prepare_genome/.config.vsh.yaml @@ -161,7 +161,7 @@ build_info: output: "target/nextflow/prepare_genome" executable: "target/nextflow/prepare_genome/main.nf" viash_version: "0.9.4" - git_commit: "4bc267b511202938a752ce09237d0e9808f23639" + git_commit: "2bd8e1becf04860c3cf718f765e69a1511ceb7c6" git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/toolbox/v0.1.1/nextflow/bgzip" diff --git a/target/nextflow/prepare_genome/main.nf b/target/nextflow/prepare_genome/main.nf index e800d82..1762fe3 100644 --- a/target/nextflow/prepare_genome/main.nf +++ b/target/nextflow/prepare_genome/main.nf @@ -3231,7 +3231,7 @@ meta = [ "engine" : "native", "output" : "target/nextflow/prepare_genome", "viash_version" : "0.9.4", - "git_commit" : "4bc267b511202938a752ce09237d0e9808f23639", + "git_commit" : "2bd8e1becf04860c3cf718f765e69a1511ceb7c6", "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { diff --git a/target/nextflow/prepare_reads/.config.vsh.yaml b/target/nextflow/prepare_reads/.config.vsh.yaml index 3621b8e..53c2750 100644 --- a/target/nextflow/prepare_reads/.config.vsh.yaml +++ b/target/nextflow/prepare_reads/.config.vsh.yaml @@ -25,6 +25,15 @@ argument_groups: direction: "input" multiple: true multiple_sep: ";" + - type: "string" + name: "--input_strandedness" + info: null + example: + - "auto" + required: false + direction: "input" + multiple: false + multiple_sep: ";" - name: "Outputs" arguments: - type: "file" @@ -34,7 +43,7 @@ argument_groups: - "fastq.gz" must_exist: true create_parent: true - required: false + required: true direction: "output" multiple: false multiple_sep: ";" @@ -49,14 +58,10 @@ argument_groups: direction: "output" multiple: false multiple_sep: ";" - - type: "file" + - type: "string" name: "--output_strandedness" info: null - example: - - "strandedness.yaml" - must_exist: true - create_parent: true - required: false + required: true direction: "output" multiple: false multiple_sep: ";" @@ -72,6 +77,13 @@ scope: target: "public" dependencies: - name: "concat_text" + alias: "concat_r1" + repository: + type: "vsh" + repo: "craftbox" + tag: "v0.2.0" +- name: "concat_text" + alias: "concat_r2" repository: type: "vsh" repo: "craftbox" @@ -161,10 +173,11 @@ build_info: output: "target/nextflow/prepare_reads" executable: "target/nextflow/prepare_reads/main.nf" viash_version: "0.9.4" - git_commit: "4bc267b511202938a752ce09237d0e9808f23639" + git_commit: "2bd8e1becf04860c3cf718f765e69a1511ceb7c6" git_remote: "https://github.com/viash-hub/rnaseq" dependencies: - "target/dependencies/vsh/vsh/craftbox/v0.2.0/nextflow/concat_text" + - "target/dependencies/vsh/vsh/craftbox/v0.2.0/nextflow/concat_text" package_config: name: "rnaseq" version: "prepare_reads" diff --git a/target/nextflow/prepare_reads/main.nf b/target/nextflow/prepare_reads/main.nf index 6eaa554..c81a46c 100644 --- a/target/nextflow/prepare_reads/main.nf +++ b/target/nextflow/prepare_reads/main.nf @@ -3061,6 +3061,17 @@ meta = [ "direction" : "input", "multiple" : true, "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--input_strandedness", + "example" : [ + "auto" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" } ] }, @@ -3075,7 +3086,7 @@ meta = [ ], "must_exist" : true, "create_parent" : true, - "required" : false, + "required" : true, "direction" : "output", "multiple" : false, "multiple_sep" : ";" @@ -3094,14 +3105,9 @@ meta = [ "multiple_sep" : ";" }, { - "type" : "file", + "type" : "string", "name" : "--output_strandedness", - "example" : [ - "strandedness.yaml" - ], - "must_exist" : true, - "create_parent" : true, - "required" : false, + "required" : true, "direction" : "output", "multiple" : false, "multiple_sep" : ";" @@ -3125,6 +3131,16 @@ meta = [ "dependencies" : [ { "name" : "concat_text", + "alias" : "concat_r1", + "repository" : { + "type" : "vsh", + "repo" : "craftbox", + "tag" : "v0.2.0" + } + }, + { + "name" : "concat_text", + "alias" : "concat_r2", "repository" : { "type" : "vsh", "repo" : "craftbox", @@ -3233,7 +3249,7 @@ meta = [ "engine" : "native", "output" : "target/nextflow/prepare_reads", "viash_version" : "0.9.4", - "git_commit" : "4bc267b511202938a752ce09237d0e9808f23639", + "git_commit" : "2bd8e1becf04860c3cf718f765e69a1511ceb7c6", "git_remote" : "https://github.com/viash-hub/rnaseq" }, "package_config" : { @@ -3282,7 +3298,10 @@ meta = [ // resolve dependencies dependencies (if any) meta["root_dir"] = getRootDir() -include { concat_text } from "${meta.root_dir}/dependencies/vsh/vsh/craftbox/v0.2.0/nextflow/concat_text/main.nf" +include { concat_text as concat_r1_viashalias } from "${meta.root_dir}/dependencies/vsh/vsh/craftbox/v0.2.0/nextflow/concat_text/main.nf" +concat_r1 = concat_r1_viashalias.run(key: "concat_r1") +include { concat_text as concat_r2_viashalias } from "${meta.root_dir}/dependencies/vsh/vsh/craftbox/v0.2.0/nextflow/concat_text/main.nf" +concat_r2 = concat_r2_viashalias.run(key: "concat_r2") // inner workflow // user-provided Nextflow code @@ -3292,7 +3311,7 @@ workflow run_wf { main: output_ch = input_ch - | concat_text.run( + | concat_r1.run( fromState: [ input: "input_r1" ], @@ -3301,7 +3320,7 @@ workflow run_wf { ] ) - | concat_text.run( + | concat_r2.run( fromState: [ input: "input_r2" ], @@ -3309,6 +3328,35 @@ workflow run_wf { processed_r2: "output" ] ) + + // TODO: add fq linter + + // TODO: run fastqc on raw reads + + // TODO: add fq trimmer (trimgalore or fastp) + + // TODO: run fastqc on trimmed reads + // TODO: lint again? + + // TODO: remove genome contaminant reads (bbmap_bbsplit) + // TODO: lint again? + + // TODO: remove ribosomal RNA reads (sortmerna) + // TODO: lint again? + + // TODO: infer strandedness (if not provided) + | map { id, state -> + def newState = state + ["strandedness": "forward"] + [id, newState] + } + + | setState( + [ + output_r1: "processed_r1", + output_r2: "processed_r2", + output_strandedness: "strandedness" + ] + ) emit: output_ch diff --git a/target/nextflow/prepare_reads/nextflow_schema.json b/target/nextflow/prepare_reads/nextflow_schema.json index d6cd115..7b97317 100644 --- a/target/nextflow/prepare_reads/nextflow_schema.json +++ b/target/nextflow/prepare_reads/nextflow_schema.json @@ -26,6 +26,11 @@ "format": "path", "description": "", "help_text": "Type: `file`, multiple: `True`, direction: `input`, example: `[\"reads_R2.fastq.gz\"]`. " + }, + "input_strandedness": { + "type": "string", + "description": "", + "help_text": "Type: `string`, multiple: `False`, example: `\"auto\"`. " } } }, @@ -38,7 +43,7 @@ "type": "string", "format": "path", "description": "", - "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_r1.gz\"`, direction: `output`, example: `\"fastq.gz\"`. ", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output_r1.gz\"`, direction: `output`, example: `\"fastq.gz\"`. ", "default": "$id.$key.output_r1.gz" }, "output_r2": { @@ -50,10 +55,8 @@ }, "output_strandedness": { "type": "string", - "format": "path", "description": "", - "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_strandedness.yaml\"`, direction: `output`, example: `\"strandedness.yaml\"`. ", - "default": "$id.$key.output_strandedness.yaml" + "help_text": "Type: `string`, multiple: `False`, required. " } } },