Build branch prepare_reads with version prepare_reads (de5d5ef)

Build pipeline: viash-hub.rnaseq.prepare-reads-9svzd Source commit: de5d5efc69 Source message: run fastqc on the raw fastq files
2025-06-13 11:17:27 +00:00
parent 9ecdb612f6
commit f51bfa2aed
11 changed files with 4892 additions and 7 deletions
--- a/src/prepare_reads/config.vsh.yaml
+++ b/src/prepare_reads/config.vsh.yaml
@@ -40,6 +40,9 @@ dependencies:
  - name: concat_text
    repository: craftbox
    alias: concat_r2
+  - name: fastqc
+    repository: biobox
+    alias: fastqc_raw

 resources:
  - type: nextflow_script
--- a/src/prepare_reads/main.nf
+++ b/src/prepare_reads/main.nf
@@ -24,7 +24,17 @@ workflow run_wf {

      // TODO: add fq linter

-      // TODO: run fastqc on raw reads
+      // run fastqc on raw reads
+      | fastqc_raw.run(
+        fromState: { id, state ->
+          [
+            input: [state.processed_r1, state.processed_r2]
+          ]
+        },
+        toState: {
+          fastqc_raw_zip: "zip"
+        }
+      )

      // TODO: add fq trimmer (trimgalore or fastp)

--- a/src/prepare_reads/test.sh
+++ b/src/prepare_reads/test.sh
@@ -8,4 +8,5 @@ nextflow run . \
  -main-script target/nextflow/prepare_reads/main.nf \
  --input_r1 resources_test/minimal_test/input_fastq/SRR6357070_1.fastq.gz \
  --input_r2 resources_test/minimal_test/input_fastq/SRR6357070_2.fastq.gz \
-  --publish_dir test_results/test_prepare_reads
+  --publish_dir test_results/test_prepare_reads \
+  -profile docker
--- a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/.config.vsh.yaml
+++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/.config.vsh.yaml
@@ -0,0 +1,381 @@
+name: "fastqc"
+version: "v0.3.1"
+authors:
+- name: "Theodoro Gasperin Terra Camargo"
+  roles:
+  - "author"
+  - "maintainer"
+  info:
+    links:
+      email: "theodorogtc@gmail.com"
+      github: "tgaspe"
+      linkedin: "theodoro-gasperin-terra-camargo"
+    organizations:
+    - name: "Data Intuitive"
+      href: "https://www.data-intuitive.com"
+      role: "Bioinformatician"
+argument_groups:
+- name: "Inputs"
+  arguments:
+  - type: "file"
+    name: "--input"
+    description: "FASTQ file(s) to be analyzed.\n"
+    info: null
+    example:
+    - "input.fq"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+- name: "Outputs"
+  description: "At least one of the output options (--html, --zip, --summary, --data)\
+    \ must be used.\n"
+  arguments:
+  - type: "file"
+    name: "--html"
+    description: "Create the HTML report of the results. \n'*' wild card must be provided\
+      \ in the output file name. \nWild card will be replaced by the input file basename.\n\
+      e.g. \n  --input \"sample_1.fq\"\n  --html \"*.html\"\n  would create an output\
+      \ html file named sample_1.html\n"
+    info: null
+    example:
+    - "*.html"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--zip"
+    description: "Create the zip file(s) containing: html report, data, images, icons,\
+      \ summary, etc.\n'*' wild card must be provided in the output file name.\nWild\
+      \ card will be replaced by the input basename.\ne.g. \n  --input \"sample_1.fq\"\
+      \n  --html \"*.zip\"\n  would create an output zip file named sample_1.zip\n"
+    info: null
+    example:
+    - "*.zip"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--summary"
+    description: "Create the summary file(s).\n'*' wild card must be provided in the\
+      \ output file name.\nWild card will be replaced by the input basename.\ne.g.\
+      \ \n  --input \"sample_1.fq\"\n  --summary \"*_summary.txt\"\n  would create\
+      \ an output summary.txt file named sample_1_summary.txt\n"
+    info: null
+    example:
+    - "*_summary.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--data"
+    description: "Create the data file(s).\n'*' wild card must be provided in the\
+      \ output file name.\nWild card will be replaced by the input basename.\ne.g.\
+      \ \n  --input \"sample_1.fq\"\n  --summary \"*_data.txt\"\n  would create an\
+      \ output data.txt file named sample_1_data.txt\n"
+    info: null
+    example:
+    - "*_data.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: true
+    multiple_sep: ";"
+- name: "Options"
+  arguments:
+  - type: "boolean_true"
+    name: "--casava"
+    description: "Files come from raw casava output. Files in the same sample\ngroup\
+      \ (differing only by the group number) will be analysed\nas a set rather than\
+      \ individually. Sequences with the filter\nflag set in the header will be excluded\
+      \ from the analysis.\nFiles must have the same names given to them by casava\n\
+      (including being gzipped and ending with .gz) otherwise they\nwon't be grouped\
+      \ together correctly.\n"
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--nano"
+    description: "Files come from nanopore sequences and are in fast5 format. In\n\
+      this mode you can pass in directories to process and the program\nwill take\
+      \ in all fast5 files within those directories and produce\na single output file\
+      \ from the sequences found in all files.\n"
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--nofilter"
+    description: "If running with --casava then don't remove read flagged by\ncasava\
+      \ as poor quality when performing the QC analysis.\n"
+    info: null
+    direction: "input"
+  - type: "boolean_true"
+    name: "--nogroup"
+    description: "Disable grouping of bases for reads >50bp. \nAll reports will show\
+      \ data for every base in the read. \nWARNING: Using this option will cause fastqc\
+      \ to crash \nand burn if you use it on really long reads, and your \nplots may\
+      \ end up a ridiculous size. You have been warned!\n"
+    info: null
+    direction: "input"
+  - type: "integer"
+    name: "--min_length"
+    description: "Sets an artificial lower limit on the length of the \nsequence to\
+      \ be shown in the report. As long as you \nset this to a value greater or equal\
+      \ to your longest \nread length then this will be the sequence length used \n\
+      to create your read groups. This can be useful for making\ndirectly comparable\
+      \ statistics from datasets with somewhat \nvariable read lengths.\n"
+    info: null
+    example:
+    - 0
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--format"
+    alternatives:
+    - "-f"
+    description: "Bypasses the normal sequence file format detection and \nforces\
+      \ the program to use the specified format. \nValid formats are bam, sam, bam_mapped,\
+      \ sam_mapped, and fastq.\n"
+    info: null
+    example:
+    - "bam"
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--contaminants"
+    alternatives:
+    - "-c"
+    description: "Specifies a non-default file which contains the list \nof contaminants\
+      \ to screen overrepresented sequences against. \nThe file must contain sets\
+      \ of named contaminants in the form\nname[tab]sequence. Lines prefixed with\
+      \ a hash will be ignored.\n"
+    info: null
+    example:
+    - "contaminants.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--adapters"
+    alternatives:
+    - "-a"
+    description: "Specifies a non-default file which contains the list of \nadapter\
+      \ sequences which will be explicitly searched against \nthe library. The file\
+      \ must contain sets of named adapters \nin the form name[tab]sequence. Lines\
+      \ prefixed with a hash will be ignored.\n"
+    info: null
+    example:
+    - "adapters.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--limits"
+    alternatives:
+    - "-l"
+    description: "Specifies a non-default file which contains \na set of criteria\
+      \ which will be used to determine \nthe warn/error limits for the various modules.\
+      \ \nThis file can also be used to selectively remove \nsome modules from the\
+      \ output altogether. The format \nneeds to mirror the default limits.txt file\
+      \ found in \nthe Configuration folder.\n"
+    info: null
+    example:
+    - "limits.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--kmers"
+    alternatives:
+    - "-k"
+    description: "Specifies the length of Kmer to look for in the Kmer \ncontent module.\
+      \ Specified Kmer length must be between \n2 and 10. Default length is 7 if not\
+      \ specified.\n"
+    info: null
+    example:
+    - 7
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "boolean_true"
+    name: "--quiet"
+    alternatives:
+    - "-q"
+    description: "Suppress all progress messages on stdout and only report errors.\n"
+    info: null
+    direction: "input"
+resources:
+- type: "bash_script"
+  path: "script.sh"
+  is_executable: true
+description: "FastQC - A high throughput sequence QC analysis tool."
+test_resources:
+- type: "bash_script"
+  path: "test.sh"
+  is_executable: true
+info: null
+status: "enabled"
+scope:
+  image: "public"
+  target: "public"
+requirements:
+  commands:
+  - "ps"
+keywords:
+- "Quality control"
+- "BAM"
+- "SAM"
+- "FASTQ"
+license: "GPL-3.0, Apache-2.0"
+links:
+  repository: "https://github.com/s-andrews/FastQC"
+  homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/"
+  documentation: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/"
+  issue_tracker: "https://github.com/s-andrews/FastQC/issues"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "biocontainers/fastqc:v0.11.9_cv8"
+  target_registry: "images.viash-hub.com"
+  target_tag: "v0.3.1"
+  namespace_separator: "/"
+  setup:
+  - type: "docker"
+    run:
+    - "echo \"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\" > /var/software_versions.txt\n"
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/fastqc/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker|native"
+  output: "target/nextflow/fastqc"
+  executable: "target/nextflow/fastqc/main.nf"
+  viash_version: "0.9.4"
+  git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223"
+  git_remote: "https://github.com/viash-hub/biobox"
+  git_tag: "v0.3.0-8-g98a5f3c"
+package_config:
+  name: "biobox"
+  version: "v0.3.1"
+  summary: "A curated collection of high-quality, standalone bioinformatics components\
+    \ built with [Viash](https://viash.io).\n"
+  description: "`biobox` offers a suite of reliable bioinformatics components, similar\
+    \ to [nf-core/modules](https://github.com/nf-core/modules) and [snakemake-wrappers/bio](https://github.com/snakemake/snakemake-wrappers/tree/master/bio),\
+    \ but built using the [Viash](https://viash.io) framework.\n\nThis approach emphasizes\
+    \ **reusability**, **reproducibility**, and adherence to **best practices**. Key\
+    \ features of `biobox` components include:\n\n* **Standalone & Nextflow Ready:**\
+    \ Run components directly via the command line or seamlessly integrate them into\
+    \ Nextflow workflows.\n* **High Quality Standards:**\n    * Comprehensive documentation\
+    \ for components and parameters.\n    * Full exposure of underlying tool arguments.\n\
+    \    * Containerized (Docker) for dependency management and reproducibility.\n\
+    \    * Unit tested for verified functionality.\n"
+  info: null
+  viash_version: "0.9.4"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'v0.3.1'"
+  keywords:
+  - "bioinformatics"
+  - "modules"
+  - "sequencing"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/biobox"
+    issue_tracker: "https://github.com/viash-hub/biobox/issues"
--- a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf
+++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf
--- a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow.config
+++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow.config
@@ -0,0 +1,126 @@
+manifest {
+  name = 'fastqc'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = 'v0.3.1'
+  description = 'FastQC - A high throughput sequence QC analysis tool.'
+  author = 'Theodoro Gasperin Terra Camargo'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+
--- a/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow_schema.json
+++ b/target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/nextflow_schema.json
@@ -0,0 +1,257 @@
+{
+"$schema": "http://json-schema.org/draft-07/schema",
+"title": "fastqc",
+"description": "FastQC - A high throughput sequence QC analysis tool.",
+"type": "object",
+"definitions": {
+
+    
+    
+    "inputs" : {
+    "title": "Inputs",
+    "type": "object",
+    "description": "No description",
+    "properties": {
+    
+        
+                "input": {
+                "type":
+                "string",
+                "description": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed",
+                "help_text": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed.\n"
+            
+            }
+    
+
+}
+},
+    
+    
+    "outputs" : {
+    "title": "Outputs",
+    "type": "object",
+    "description": "At least one of the output options (--html, --zip, --summary, --data) must be used.\n",
+    "properties": {
+    
+        
+                "html": {
+                "type":
+                "string",
+                "description": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results",
+                "help_text": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results. \n\u0027*\u0027 wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n  --input \"sample_1.fq\"\n  --html \"*.html\"\n  would create an output html file named sample_1.html\n"
+            ,
+                "default":"$id.$key.html_*.html"
+            }
+    
+
+        ,
+                "zip": {
+                "type":
+                "string",
+                "description": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc",
+                "help_text": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n  --input \"sample_1.fq\"\n  --html \"*.zip\"\n  would create an output zip file named sample_1.zip\n"
+            ,
+                "default":"$id.$key.zip_*.zip"
+            }
+    
+
+        ,
+                "summary": {
+                "type":
+                "string",
+                "description": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s)",
+                "help_text": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n  --input \"sample_1.fq\"\n  --summary \"*_summary.txt\"\n  would create an output summary.txt file named sample_1_summary.txt\n"
+            ,
+                "default":"$id.$key.summary_*.txt"
+            }
+    
+
+        ,
+                "data": {
+                "type":
+                "string",
+                "description": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s)",
+                "help_text": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n  --input \"sample_1.fq\"\n  --summary \"*_data.txt\"\n  would create an output data.txt file named sample_1_data.txt\n"
+            ,
+                "default":"$id.$key.data_*.txt"
+            }
+    
+
+}
+},
+    
+    
+    "options" : {
+    "title": "Options",
+    "type": "object",
+    "description": "No description",
+    "properties": {
+    
+        
+                "casava": {
+                "type":
+                "boolean",
+                "description": "Type: `boolean_true`, default: `false`. Files come from raw casava output",
+                "help_text": "Type: `boolean_true`, default: `false`. Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon\u0027t be grouped together correctly.\n"
+            ,
+                "default":false
+            }
+    
+
+        ,
+                "nano": {
+                "type":
+                "boolean",
+                "description": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format",
+                "help_text": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n"
+            ,
+                "default":false
+            }
+    
+
+        ,
+                "nofilter": {
+                "type":
+                "boolean",
+                "description": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis",
+                "help_text": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis.\n"
+            ,
+                "default":false
+            }
+    
+
+        ,
+                "nogroup": {
+                "type":
+                "boolean",
+                "description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp",
+                "help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n"
+            ,
+                "default":false
+            }
+    
+
+        ,
+                "min_length": {
+                "type":
+                "integer",
+                "description": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report",
+                "help_text": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n"
+            
+            }
+    
+
+        ,
+                "format": {
+                "type":
+                "string",
+                "description": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format",
+                "help_text": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n"
+            
+            }
+    
+
+        ,
+                "contaminants": {
+                "type":
+                "string",
+                "description": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against",
+                "help_text": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n"
+            
+            }
+    
+
+        ,
+                "adapters": {
+                "type":
+                "string",
+                "description": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library",
+                "help_text": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n"
+            
+            }
+    
+
+        ,
+                "limits": {
+                "type":
+                "string",
+                "description": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules",
+                "help_text": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n"
+            
+            }
+    
+
+        ,
+                "kmers": {
+                "type":
+                "integer",
+                "description": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module",
+                "help_text": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n"
+            
+            }
+    
+
+        ,
+                "quiet": {
+                "type":
+                "boolean",
+                "description": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors",
+                "help_text": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors.\n"
+            ,
+                "default":false
+            }
+    
+
+}
+},
+    
+    
+    "nextflow input-output arguments" : {
+    "title": "Nextflow input-output arguments",
+    "type": "object",
+    "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+    "properties": {
+    
+        
+                "publish_dir": {
+                "type":
+                "string",
+                "description": "Type: `string`, required, example: `output/`. Path to an output directory",
+                "help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
+            
+            }
+    
+
+        ,
+                "param_list": {
+                "type":
+                "string",
+                "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
+                "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
+                "hidden": true
+            
+            }
+    
+
+}
+}
+},
+"allOf": [
+
+    {
+    "$ref": "#/definitions/inputs"
+    },
+
+    {
+    "$ref": "#/definitions/outputs"
+    },
+
+    {
+    "$ref": "#/definitions/options"
+    },
+
+    {
+    "$ref": "#/definitions/nextflow input-output arguments"
+    }
+]
+}
--- a/target/nextflow/prepare_genome/.config.vsh.yaml
+++ b/target/nextflow/prepare_genome/.config.vsh.yaml
@@ -161,7 +161,7 @@ build_info:
  output: "target/nextflow/prepare_genome"
  executable: "target/nextflow/prepare_genome/main.nf"
  viash_version: "0.9.4"
-  git_commit: "2bd8e1becf04860c3cf718f765e69a1511ceb7c6"
+  git_commit: "de5d5efc69532c12d3047c1ce244cfa9c6af0a91"
  git_remote: "https://github.com/viash-hub/rnaseq"
  dependencies:
  - "target/dependencies/vsh/vsh/toolbox/v0.1.1/nextflow/bgzip"
--- a/target/nextflow/prepare_genome/main.nf
+++ b/target/nextflow/prepare_genome/main.nf
@@ -3231,7 +3231,7 @@ meta = [
    "engine" : "native",
    "output" : "target/nextflow/prepare_genome",
    "viash_version" : "0.9.4",
-    "git_commit" : "2bd8e1becf04860c3cf718f765e69a1511ceb7c6",
+    "git_commit" : "de5d5efc69532c12d3047c1ce244cfa9c6af0a91",
    "git_remote" : "https://github.com/viash-hub/rnaseq"
  },
  "package_config" : {
--- a/target/nextflow/prepare_reads/.config.vsh.yaml
+++ b/target/nextflow/prepare_reads/.config.vsh.yaml
@@ -88,6 +88,12 @@ dependencies:
    type: "vsh"
    repo: "craftbox"
    tag: "v0.2.0"
+- name: "fastqc"
+  alias: "fastqc_raw"
+  repository:
+    type: "vsh"
+    repo: "biobox"
+    tag: "v0.3.1"
 repositories:
 - type: "vsh"
  name: "biobox"
@@ -173,11 +179,12 @@ build_info:
  output: "target/nextflow/prepare_reads"
  executable: "target/nextflow/prepare_reads/main.nf"
  viash_version: "0.9.4"
-  git_commit: "2bd8e1becf04860c3cf718f765e69a1511ceb7c6"
+  git_commit: "de5d5efc69532c12d3047c1ce244cfa9c6af0a91"
  git_remote: "https://github.com/viash-hub/rnaseq"
  dependencies:
  - "target/dependencies/vsh/vsh/craftbox/v0.2.0/nextflow/concat_text"
  - "target/dependencies/vsh/vsh/craftbox/v0.2.0/nextflow/concat_text"
+  - "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc"
 package_config:
  name: "rnaseq"
  version: "prepare_reads"
--- a/target/nextflow/prepare_reads/main.nf
+++ b/target/nextflow/prepare_reads/main.nf
@@ -3146,6 +3146,15 @@ meta = [
        "repo" : "craftbox",
        "tag" : "v0.2.0"
      }
+    },
+    {
+      "name" : "fastqc",
+      "alias" : "fastqc_raw",
+      "repository" : {
+        "type" : "vsh",
+        "repo" : "biobox",
+        "tag" : "v0.3.1"
+      }
    }
  ],
  "repositories" : [
@@ -3249,7 +3258,7 @@ meta = [
    "engine" : "native",
    "output" : "target/nextflow/prepare_reads",
    "viash_version" : "0.9.4",
-    "git_commit" : "2bd8e1becf04860c3cf718f765e69a1511ceb7c6",
+    "git_commit" : "de5d5efc69532c12d3047c1ce244cfa9c6af0a91",
    "git_remote" : "https://github.com/viash-hub/rnaseq"
  },
  "package_config" : {
@@ -3302,6 +3311,8 @@ include { concat_text as concat_r1_viashalias } from "${meta.root_dir}/dependenc
 concat_r1 = concat_r1_viashalias.run(key: "concat_r1")
 include { concat_text as concat_r2_viashalias } from "${meta.root_dir}/dependencies/vsh/vsh/craftbox/v0.2.0/nextflow/concat_text/main.nf"
 concat_r2 = concat_r2_viashalias.run(key: "concat_r2")
+include { fastqc as fastqc_raw_viashalias } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf"
+fastqc_raw = fastqc_raw_viashalias.run(key: "fastqc_raw")

 // inner workflow
 // user-provided Nextflow code
@@ -3331,7 +3342,17 @@ workflow run_wf {

      // TODO: add fq linter

-      // TODO: run fastqc on raw reads
+      // run fastqc on raw reads
+      | fastqc_raw.run(
+        fromState: { id, state ->
+          [
+            input: [state.processed_r1, state.processed_r2]
+          ]
+        },
+        toState: {
+          fastqc_raw_zip: "zip"
+        }
+      )

      // TODO: add fq trimmer (trimgalore or fastp)