Build branch main with version main (6e71519)

Build pipeline: viash-hub.demultiplex.main-wxzkf

Source commit: 6e71519815

Source message: Replace Falco with FastQC (#51)
This commit is contained in:
CI
2025-05-28 11:25:23 +00:00
parent 029f2aee81
commit 50ab36217a
44 changed files with 1217 additions and 810 deletions

View File

@@ -1,3 +1,15 @@
# demultiplex v0.4.0
## Breaking changes
* Falco has been replaced with FastQC. Falco generates FastQC compatible output, but fails to run on empty FASTQ files (PR #51).
- `runner` workflow: `falco_output` has been renamed to `output_sample_qc`.
- `demultiplex` workflow: `output_falco` has been renamed to `output_sample_qc`.
- The output file names from the sample QC no longer contains the input file extensions. Instead, the sample name is used.
(for example `sample1_S1_R2_001.fastq.gz_fastqc_report.html` becomes `sample1_S1_R2_001_fastqc_report.html`)
* `demultiplex` workflow: `output_multiqc` argument has been renamed to `multiqc_output` in order to align inner workflow and runner (PR #51).
# demultiplex v0.3.12
## New features

View File

@@ -8,7 +8,7 @@ links:
repository: https://github.com/viash-hub/demultiplex
info:
test_resources:
- path: gs://viash-hub-resources/demultiplex/v3
- path: gs://viash-hub-resources/demultiplex/v4
dest: testData
viash_version: 0.9.4

View File

@@ -16,7 +16,7 @@ argument_groups:
type: file
required: false
multiple: true
- name: "--falco_dir"
- name: "--sample_qc_dir"
type: file
required: true
- name: Output arguments
@@ -31,7 +31,7 @@ argument_groups:
direction: output
multiple: true
required: false
- name: "--output_falco"
- name: "--output_sample_qc"
type: file
direction: output
required: true

View File

@@ -13,12 +13,12 @@ workflow run_wf {
// Gather the following state for all samples
def forward_fastqs = states.collect{it.forward_input}.flatten()
def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}.flatten()
def falco_dirs = states.collect{it.falco_dir}
def sample_qc_dirs = states.collect{it.sample_qc_dir}
def resultState = [
"output_forward": forward_fastqs,
"output_reverse": reverse_fastqs,
"output_falco": falco_dirs,
"output_sample_qc": sample_qc_dirs,
// The join ID is the same across all samples from the same run
"_meta": ["join_id": states[0]._meta.join_id]
]

View File

@@ -36,15 +36,15 @@ argument_groups:
direction: output
required: false
default: "$id/fastq"
- name: "--output_falco"
description: Directory to write falco output to
- name: "--output_sample_qc"
description: Directory to write FastQC output to
type: file
direction: output
required: false
multiple: true
default: "$id/qc/fastqc"
- name: "--output_multiqc"
description: Directory to write falco output to
- name: "--multiqc_output"
description: Location where to write MultiQC output to
type: file
direction: output
required: false
@@ -79,6 +79,9 @@ test_resources:
- type: nextflow_script
path: test.nf
entrypoint: test_bases2fastq
- type: nextflow_script
path: test.nf
entrypoint: test_no_index
dependencies:
- name: io/untar
@@ -93,7 +96,7 @@ dependencies:
repository: bb
- name: bases2fastq
repository: bb
- name: falco
- name: fastqc
repository: bb
- name: multiqc
repository: bb

View File

@@ -21,4 +21,11 @@ nextflow run . \
-profile docker,no_publish,local \
-entry test_bases2fastq \
-c src/config/labels.config \
-resume
nextflow run . \
-main-script src/demultiplex/test.nf \
-profile docker,no_publish,local \
-entry test_no_index \
-c src/config/labels.config \
-resume

View File

@@ -180,19 +180,21 @@ workflow run_wf {
)
output_ch = samples_ch
| falco.run(
| fastqc.run(
directives: [label: ["verylowcpu", "lowmem"]],
fromState: {id, state ->
def output_base = "$id/qc/fastqc/*"
[
"input": [state.fastq_forward, state.fastq_reverse],
"outdir": "$id/qc/falco",
"summary_filename": null,
"report_filename": null,
"data_filename": null,
"html": "${output_base}_fastqc_report.html",
"summary": "${output_base}_summary.txt",
"data": "${output_base}_fastqc_data.txt",
]
},
toState: { id, result, state ->
state + [ "output_falco" : result.outdir ]
// The output directory for all files above is the same:
// take the directory from one of the files
state + [ "output_sample_qc": result.html[0].parent ]
}
)
@@ -202,13 +204,13 @@ workflow run_wf {
"id": state.run_id,
"forward_input": state.fastq_forward,
"reverse_input": state.fastq_reverse,
"falco_dir": state.output_falco,
"sample_qc_dir": state.output_sample_qc,
]
},
toState: [
"forward_fastqs": "output_forward",
"reverse_fastqs": "output_reverse",
"output_falco": "output_falco",
"output_sample_qc": "output_sample_qc",
]
)
@@ -216,8 +218,8 @@ workflow run_wf {
directives: [label: ["midcpu", "midmem"]],
fromState: {id, state ->
def new_state = [
"input": state.output_falco,
"output_report": state.output_multiqc,
"input": state.output_sample_qc,
"output_report": state.multiqc_output,
"cl_config": 'sp: {fastqc/data: {fn: "*_fastqc_data.txt"}}'
]
if (state.demultiplexer == "bclconvert") {
@@ -229,7 +231,7 @@ workflow run_wf {
return new_state
},
toState: { id, result, state ->
state + [ "output_multiqc" : result.output_report ]
state + [ "multiqc_output" : result.output_report ]
}
)
@@ -237,8 +239,8 @@ workflow run_wf {
[
//"_meta": "_meta",
"output": "output_demultiplexer",
"output_falco": "output_falco",
"output_multiqc": "output_multiqc",
"output_sample_qc": "output_sample_qc",
"multiqc_output": "multiqc_output",
"output_run_information": "run_information",
"demultiplexer_logs": "demultiplexer_logs"
]

View File

@@ -33,10 +33,10 @@ workflow test_illumina {
assert_ch = output_ch
| map {id, state ->
assert state.output.isDirectory(): "Expected bclconvert output to be a directory"
state.output_falco.each{
assert it.isDirectory(): "Expected falco output to be a directory"
state.output_sample_qc.each{
assert it.isDirectory(): "Expected sample QC output to be a directory"
}
assert state.output_multiqc.isFile(): "Expected multiQC output to be a file"
assert state.multiqc_output.isFile(): "Expected multiQC output to be a file"
fastq_files = state.output.listFiles().collect{it.name}
assert ["Undetermined_S0_L001_R1_001.fastq.gz", "Sample23_S3_L001_R1_001.fastq.gz",
"sampletest_S4_L001_R1_001.fastq.gz", "Sample1_S1_L001_R1_001.fastq.gz",
@@ -101,8 +101,8 @@ workflow test_bases2fastq {
}
| map {id, state ->
assert state.output.isDirectory(): "Expected bases2fastq output to be a directory"
state.output_falco.each{assert it.isDirectory(): "Expected falco output to be a directory"}
assert state.output_multiqc.isFile(): "Expected multiQC output to be a file"
state.output_sample_qc.each{assert it.isDirectory(): "Expected sample QC output to be a directory"}
assert state.multiqc_output.isFile(): "Expected multiQC output to be a file"
def logs_files = state.demultiplexer_logs.listFiles()
println "Logs files: ${logs_files}"
@@ -114,3 +114,88 @@ workflow test_bases2fastq {
"Expected to find bases2fastq info directory"
}
}
workflow test_no_index {
// Test what happens when no index is specified. All the reads go into one sample
// and the "Undetermined" should be empty
output_ch = Channel.fromList([
[
input: params.resources_test + "demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2",
demultiplexer: "bclconvert",
run_information: params.resources_test + "demultiplex_htrnaseq_meta/SingleCell-RNA_P3_2/SampleSheetNoIndex.csv"
]
])
| map { state -> [ "run", state ] }
| demultiplex.run(
toState: { id, output, state ->
output + [ orig_input: state.input ] }
)
| view { output ->
assert output.size() == 2 : "outputs should contain two elements; [id, file]"
"Output: $output"
}
event_count_ch = output_ch
| toSortedList()
| map { state ->
assert state.size() == 1 : "Expected one event in the output channel"
}
assert_ch = output_ch
| map {id, state ->
assert state.output.isDirectory(): "Expected bclconvert output to be a directory"
state.output_sample_qc.each{
assert it.isDirectory(): "Expected sample QC output to be a directory"
}
assert state.multiqc_output.isFile(): "Expected multiQC output to be a file"
fastq_files = state.output.listFiles().collect{it.name}
assert ["Undetermined_S0_R2_001.fastq.gz", "Undetermined_S0_R1_001.fastq.gz",
"SingleCell-RNA-P3-2-SI-TT-A5_S1_R1_001.fastq.gz", "SingleCell-RNA-P3-2-SI-TT-A5_S1_R2_001.fastq.gz"
].toSet() == fastq_files.toSet(): \
"Output directory should contain the expected FASTQ files"
fastq_files.each{
assert it.length() != 0: "Expected FASTQ file to not be empty"
}
assert state.output_run_information.isFile(): "Expected output run information to be a file"
expected_run_information = """[Header],,,,
|FileFormatVersion,2,,,
|RunName,SingleCell-RNA_P3_2,,,
|InstrumentPlatform,NextSeq1k2k,,,
|IndexOrientation,Forward,,,
|,,,,
|[Reads],,,,
|Read1Cycles,28,,,
|Read2Cycles,90,,,
|Index1Cycles,10,,,
|Index2Cycles,10,,,
|,,,,
|[BCLConvert_Settings],,,,
|SoftwareVersion,4.2.7,,,
|TrimUMI,0,,,
|OverrideCycles,U28;N10;N10;Y90,,,
|FastqCompressionFormat,gzip,,,
|NoLaneSplitting,TRUE,,,
|,,,,
|[BCLConvert_Data],,,,
|Sample_ID,Index,Index2,,
|SingleCell-RNA-P3-2-SI-TT-A5,,,,
|,,,,""".stripMargin()
assert state.output_run_information.text.replaceAll("\r\n", "\n") == expected_run_information
println "ID: ${id}"
println "State: ${state}"
assert state.demultiplexer_logs.isDirectory():
"Expected BCL Convert reports to be a directory"
def logs_files = state.demultiplexer_logs.listFiles()
println "Logs files: ${logs_files}"
assert logs_files.size() > 0: "Expected BCL Convert logs dir to contain files"
assert logs_files.find { it.name == "Demultiplex_Stats.csv" }:
"Expected to find BCL Convert Demultiplex_Stats.csv"
assert logs_files.find { it.name == "Logs" }:
"Expected to find BCL Convert Logs directory"
}
}

View File

@@ -25,10 +25,10 @@ do
ls "$output_location"
done
echo "Grouping output from $par_input_falco into $par_output_falco"
mkdir -p "$par_output_falco"
IFS=";" read -ra falco_inputs <<< $par_input_falco
for falco_dir in "${falco_inputs[@]}"; do
echo "Copying contents of $falco_dir"
find -H -D exec "$falco_dir" -type f -maxdepth 1 -exec cp -t "$par_output_falco" {} +
echo "Grouping output from $par_input_sample_qc into $par_output_sample_qc"
mkdir -p "$par_output_sample_qc"
IFS=";" read -ra sample_qc_inputs <<< $par_input_sample_qc
for qc_dir in "${sample_qc_inputs[@]}"; do
echo "Copying contents of $qc_dir"
find -H -D exec "$qc_dir" -type f -maxdepth 1 -exec cp -t "$par_output_sample_qc" {} +
done

View File

@@ -8,8 +8,8 @@ argument_groups:
description: Directory to write fastq data to
type: file
required: true
- name: "--input_falco"
description: Directory to write falco output to
- name: "--input_sample_qc"
description: Directory to write sample QC output to
type: file
required: true
multiple: true
@@ -30,7 +30,7 @@ argument_groups:
type: file
direction: output
default: "fastq"
- name: --output_falco
- name: --output_sample_qc
type: file
direction: output
default: "qc/fastqc"

View File

@@ -41,7 +41,7 @@ argument_groups:
type: file
direction: output
default: "fastq"
- name: --falco_output
- name: --sample_qc_output
type: file
direction: output
default: "qc/fastqc"
@@ -64,6 +64,10 @@ resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
test_resources:
- type: nextflow_script
path: test.nf
entrypoint: test
dependencies:
- name: demultiplex

16
src/runner/integration_tests.sh Executable file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"
viash ns build --setup cb -q runner
nextflow run . \
-main-script src/runner/test.nf \
-entry test \
-profile docker,local \
-c src/config/labels.config \
-resume

View File

@@ -9,6 +9,17 @@ workflow run_wf {
main:
output_ch = input_ch
| map { id, state ->
// The argument names for this workflow and the demultiplex workflow may overlap
// here, we store a copy in order to make sure to not accidentally overwrite the state.
def new_state = state + [
"fastq_output_workflow": state.fastq_output,
"multiqc_output_workflow": state.multiqc_output,
"sample_qc_output_workflow": state.sample_qc_output,
"demultiplexer_logs_workflow": state.demultiplexer_logs,
]
return [id, new_state]
}
// Extract the ID from the input.
// If the input is a tarball, strip the suffix.
| map{ id, state ->
@@ -26,8 +37,8 @@ workflow run_wf {
"demultiplexer": state.demultiplexer,
"skip_copycomplete_check": state.skip_copycomplete_check,
"output": "$id/fastq",
"output_falco": "$id/qc/fastqc",
"output_multiqc": "$id/qc/multiqc_report.html",
"output_sample_qc": "$id/qc/fastqc",
"multiqc_output": "$id/qc/multiqc_report.html",
"demultiplexer_logs": "$id/demultiplexer_logs",
]
if (state.run_information) {
@@ -45,26 +56,25 @@ workflow run_wf {
def id1 = (state.plain_output) ? id : "${state.run_id}/${date}"
def id2 = (state.plain_output) ? id : "${id1}_demultiplex_${version}"
def fastq_output_1 = (id2 == "run") ? state.fastq_output : "${id2}/" + state.fastq_output
def falco_output_1 = (id2 == "run") ? state.falco_output : "${id2}/" + state.falco_output
def multiqc_output_1 = (id2 == "run") ? state.multiqc_output : "${id2}/" + state.multiqc_output
def run_information_output_1 = (id2 == "run") ? "${state.output_run_information.getName()}" : "${id2}/${state.output_run_information.getName()}"
def demultiplexer_logs_output = (id2 == "run") ? state.demultiplexer_logs : "${id2}/${state.demultiplexer_logs.getName()}"
def prefix = (id2 == "run") ? "" : "${id2}/"
// These output names are determined by arguments.
def fastq_output_1 = "${prefix}${state.fastq_output_workflow}"
def sample_qc_output_1 = "${prefix}${state.sample_qc_output_workflow}"
def multiqc_output_1 = "${prefix}${state.multiqc_output_workflow}"
def demultiplexer_logs_output = "${prefix}${state.demultiplexer_logs_workflow}"
// The name of the output file for the run information is determined by the input file name.
def run_information_output_1 = "${prefix}${state.output_run_information.getName()}"
if (id2 == "run") {
println("Publising to ${params.publish_dir}")
} else {
println("Publising to ${params.publish_dir}/${id2}")
}
println("Publising to ${params.publish_dir}/${prefix}")
[
input: state.output,
input_falco: state.output_falco,
input_multiqc: state.output_multiqc,
input_sample_qc: state.output_sample_qc,
input_multiqc: state.multiqc_output,
input_run_information: state.output_run_information,
input_demultiplexer_logs: state.demultiplexer_logs,
output: fastq_output_1,
output_falco: falco_output_1,
output_sample_qc: sample_qc_output_1,
output_multiqc: multiqc_output_1,
output_run_information: run_information_output_1,
output_demultiplexer_logs: demultiplexer_logs_output,
@@ -84,9 +94,14 @@ workflow run_wf {
output_ch
}
def get_version(inputFile) {
def get_version(input) {
def inputFile = file(input)
if (!inputFile.exists()) {
// When executing tests
return "unknown_version"
}
def yamlSlurper = new groovy.yaml.YamlSlurper()
def loaded_viash_config = yamlSlurper.parse(file(inputFile))
def loaded_viash_config = yamlSlurper.parse(inputFile)
def version = (loaded_viash_config.version) ? loaded_viash_config.version : "unknown_version"
println("Version to be used: ${version}")
return version

View File

@@ -8,5 +8,9 @@ process {
}
}
params {
rootDir = java.nio.file.Paths.get("$projectDir/../../").toAbsolutePath().normalize().toString()
}
// include common settings
includeConfig("${params.rootDir}/src/config/labels.config")

92
src/runner/test.nf Normal file
View File

@@ -0,0 +1,92 @@
import java.nio.file.Files
import nextflow.exception.WorkflowScriptErrorException
// Create temporary directory for the publish_dir if it is not defined
if (!params.publish_dir && params.publishDir) {
params.publish_dir = params.publishDir
}
if (!params.publish_dir) {
def tempDir = Files.createTempDirectory("demultiplex_runner_integration_test")
println "Created temp directory: $tempDir"
// Register shutdown hook to delete it on JVM exit
Runtime.runtime.addShutdownHook(new Thread({
try {
// Delete directory recursively
Files.walk(tempDir)
.sorted(Comparator.reverseOrder())
.forEach { Files.delete(it) }
println "Deleted temp directory: $tempDir"
} catch (Exception e) {
println "Failed to delete temp directory: $e"
}
}))
params.publish_dir = tempDir
}
// The module inherits the parameters defined before the include statement,
// therefore any parameters set afterwards will not be used by the module.
include { runner } from params.rootDir + "/target/nextflow/runner/main.nf"
params.resources_test = params.rootDir + "/testData/"
workflow test {
output_ch = Channel.fromList([
[
id: "test",
input: params.resources_test + "200624_A00834_0183_BHMTFYDRXX.tar.gz",
]
])
| map {event -> [event.id, event] }
| runner.run(
fromState: {id, state -> state }
)
workflow.onComplete = {
try {
// Nexflow only allows exceptions generated using the 'error' function (which throws WorkflowScriptErrorException).
// So in order for the assert statement to work (or allow other errors to let the tests to fail)
// We need to wrap these in WorkflowScriptErrorException. See https://github.com/nextflow-io/nextflow/pull/4458/files
// The error message will show up in .nextflow.log
def publish_subdir = file("${params.publish_dir}/200624_A00834_0183_BHMTFYDRXX")
assert publish_subdir.isDirectory()
def all_files = publish_subdir.listFiles()
assert all_files.size() == 1
def publish_dir = file(all_files[0])
assert publish_dir.name.endsWith("_demultiplex_unknown_version")
def published_items = publish_dir.listFiles()
assert published_items.size() == 4
assert published_items.collect{it.name}.toSet() == ["demultiplexer_logs", "fastq", "qc", "SampleSheet.csv"].toSet()
def fastqc_files = publish_dir.resolve("qc/fastqc").listFiles()
assert fastqc_files.collect{it.name}.toSet() == [
"Sample1_S1_L001_R1_001_fastqc_data.txt",
"Sample1_S1_L001_R1_001_fastqc_report.html",
"Sample1_S1_L001_R1_001_summary.txt",
"Sample23_S3_L001_R1_001_fastqc_data.txt",
"Sample23_S3_L001_R1_001_fastqc_report.html",
"Sample23_S3_L001_R1_001_summary.txt",
"SampleA_S2_L001_R1_001_fastqc_data.txt",
"SampleA_S2_L001_R1_001_fastqc_report.html",
"SampleA_S2_L001_R1_001_summary.txt",
"sampletest_S4_L001_R1_001_fastqc_data.txt",
"sampletest_S4_L001_R1_001_fastqc_report.html",
"sampletest_S4_L001_R1_001_summary.txt",
"Undetermined_S0_L001_R1_001_fastqc_data.txt",
"Undetermined_S0_L001_R1_001_fastqc_report.html",
"Undetermined_S0_L001_R1_001_summary.txt"
].toSet()
assert publish_dir.resolve("qc/multiqc_report.html").exists()
def fastq_files = publish_dir.resolve("fastq").listFiles()
assert fastq_files.collect{it.name}.toSet() == [
"Sample1_S1_L001_R1_001.fastq.gz",
"Sample23_S3_L001_R1_001.fastq.gz",
"SampleA_S2_L001_R1_001.fastq.gz",
"sampletest_S4_L001_R1_001.fastq.gz",
"Undetermined_S0_L001_R1_001.fastq.gz"
].toSet()
assert publish_dir.resolve("SampleSheet.csv").exists()
} catch (Exception e) {
throw new WorkflowScriptErrorException("Integration test failed!", e)
}
}
}

View File

@@ -1,227 +0,0 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "falco",
"description": "A C++ drop-in replacement of FastQC to assess the quality of sequence read data",
"type": "object",
"definitions": {
"input arguments" : {
"title": "Input arguments",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: List of `file`, required, example: `input1.fastq;input2.fastq`, multiple_sep: `\";\"`. input fastq files",
"help_text": "Type: List of `file`, required, example: `input1.fastq;input2.fastq`, multiple_sep: `\";\"`. input fastq files"
}
}
},
"run arguments" : {
"title": "Run arguments",
"type": "object",
"description": "No description",
"properties": {
"nogroup": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp",
"help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in \nthe read. WARNING: When using this option, \nyour plots may end up a ridiculous size. You \nhave been warned!\n"
,
"default":false
}
,
"contaminents": {
"type":
"string",
"description": "Type: `file`. Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against",
"help_text": "Type: `file`. Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against. The file \nmust contain sets of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n"
}
,
"adapters": {
"type":
"string",
"description": "Type: `file`. Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library",
"help_text": "Type: `file`. Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library. The \nfile must contain sets of named adapters in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n"
}
,
"limits": {
"type":
"string",
"description": "Type: `file`. Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules",
"help_text": "Type: `file`. Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules. This file can also be used \nto selectively remove some modules from the \noutput all together. The format needs to \nmirror the default limits.txt file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n"
}
,
"subsample": {
"type":
"integer",
"description": "Type: `integer`, example: `10`. [Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads)",
"help_text": "Type: `integer`, example: `10`. [Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads).\n"
}
,
"bisulfite": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. [Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content",
"help_text": "Type: `boolean_true`, default: `false`. [Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content.\n"
,
"default":false
}
,
"reverse_complement": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. [Falco only] The input is a \nreverse-complement",
"help_text": "Type: `boolean_true`, default: `false`. [Falco only] The input is a \nreverse-complement. All modules will be \ntested by swapping A/T and C/G\n"
,
"default":false
}
}
},
"output arguments" : {
"title": "Output arguments",
"type": "object",
"description": "No description",
"properties": {
"outdir": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.outdir`, example: `output`. Create all output files in the specified \noutput directory",
"help_text": "Type: `file`, required, default: `$id.$key.outdir`, example: `output`. Create all output files in the specified \noutput directory. FALCO-SPECIFIC: If the \ndirectory does not exists, the program will \ncreate it.\n"
,
"default":"$id.$key.outdir"
}
,
"format": {
"type":
"string",
"description": "Type: `string`, choices: ``bam`, `sam`, `bam_mapped`, `sam_mapped`, `fastq`, `fq`, `fastq.gz`, `fq.gz``. Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format",
"help_text": "Type: `string`, choices: ``bam`, `sam`, `bam_mapped`, `sam_mapped`, `fastq`, `fq`, `fastq.gz`, `fq.gz``. Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped, sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n",
"enum": ["bam", "sam", "bam_mapped", "sam_mapped", "fastq", "fq", "fastq.gz", "fq.gz"]
}
,
"data_filename": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.data_filename`. [Falco only] Specify filename for FastQC \ndata output (TXT)",
"help_text": "Type: `file`, default: `$id.$key.data_filename`. [Falco only] Specify filename for FastQC \ndata output (TXT). If not specified, it will \nbe called fastq_data.txt in either the input \nfile\u0027s directory or the one specified in the \n--output flag. Only available when running \nfalco with a single input.\n"
,
"default":"$id.$key.data_filename"
}
,
"report_filename": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.report_filename`. [Falco only] Specify filename for FastQC \nreport output (HTML)",
"help_text": "Type: `file`, default: `$id.$key.report_filename`. [Falco only] Specify filename for FastQC \nreport output (HTML). If not specified, it \nwill be called fastq_report.html in either \nthe input file\u0027s directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n"
,
"default":"$id.$key.report_filename"
}
,
"summary_filename": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.summary_filename`. [Falco only] Specify filename for the short \nsummary output (TXT)",
"help_text": "Type: `file`, default: `$id.$key.summary_filename`. [Falco only] Specify filename for the short \nsummary output (TXT). If not specified, it \nwill be called fastq_report.html in either \nthe input file\u0027s directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n"
,
"default":"$id.$key.summary_filename"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input arguments"
},
{
"$ref": "#/definitions/run arguments"
},
{
"$ref": "#/definitions/output arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -1,49 +1,171 @@
name: "falco"
name: "fastqc"
version: "v0.3.1"
authors:
- name: "Toni Verbeiren"
- name: "Theodoro Gasperin Terra Camargo"
roles:
- "author"
- "maintainer"
info:
links:
github: "tverbeiren"
linkedin: "verbeiren"
email: "theodorogtc@gmail.com"
github: "tgaspe"
linkedin: "theodoro-gasperin-terra-camargo"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist and CEO"
role: "Bioinformatician"
argument_groups:
- name: "Input arguments"
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "input fastq files"
description: "FASTQ file(s) to be analyzed.\n"
info: null
example:
- "input1.fastq;input2.fastq"
- "input.fq"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Run arguments"
- name: "Outputs"
description: "At least one of the output options (--html, --zip, --summary, --data)\
\ must be used.\n"
arguments:
- type: "file"
name: "--html"
description: "Create the HTML report of the results. \n'*' wild card must be provided\
\ in the output file name. \nWild card will be replaced by the input file basename.\n\
e.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output\
\ html file named sample_1.html\n"
info: null
example:
- "*.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--zip"
description: "Create the zip file(s) containing: html report, data, images, icons,\
\ summary, etc.\n'*' wild card must be provided in the output file name.\nWild\
\ card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\
\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n"
info: null
example:
- "*.zip"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--summary"
description: "Create the summary file(s).\n'*' wild card must be provided in the\
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
\ \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create\
\ an output summary.txt file named sample_1_summary.txt\n"
info: null
example:
- "*_summary.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--data"
description: "Create the data file(s).\n'*' wild card must be provided in the\
\ output file name.\nWild card will be replaced by the input basename.\ne.g.\
\ \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an\
\ output data.txt file named sample_1_data.txt\n"
info: null
example:
- "*_data.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- name: "Options"
arguments:
- type: "boolean_true"
name: "--casava"
description: "Files come from raw casava output. Files in the same sample\ngroup\
\ (differing only by the group number) will be analysed\nas a set rather than\
\ individually. Sequences with the filter\nflag set in the header will be excluded\
\ from the analysis.\nFiles must have the same names given to them by casava\n\
(including being gzipped and ending with .gz) otherwise they\nwon't be grouped\
\ together correctly.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nano"
description: "Files come from nanopore sequences and are in fast5 format. In\n\
this mode you can pass in directories to process and the program\nwill take\
\ in all fast5 files within those directories and produce\na single output file\
\ from the sequences found in all files.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nofilter"
description: "If running with --casava then don't remove read flagged by\ncasava\
\ as poor quality when performing the QC analysis.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--nogroup"
description: "Disable grouping of bases for reads >50bp. \nAll reports will show\
\ data for every base in \nthe read. WARNING: When using this option, \nyour\
\ plots may end up a ridiculous size. You \nhave been warned!\n"
\ data for every base in the read. \nWARNING: Using this option will cause fastqc\
\ to crash \nand burn if you use it on really long reads, and your \nplots may\
\ end up a ridiculous size. You have been warned!\n"
info: null
direction: "input"
- type: "file"
name: "--contaminents"
description: "Specifies a non-default file which contains \nthe list of contaminants\
\ to screen \noverrepresented sequences against. The file \nmust contain sets\
\ of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith\
\ a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n"
- type: "integer"
name: "--min_length"
description: "Sets an artificial lower limit on the length of the \nsequence to\
\ be shown in the report. As long as you \nset this to a value greater or equal\
\ to your longest \nread length then this will be the sequence length used \n\
to create your read groups. This can be useful for making\ndirectly comparable\
\ statistics from datasets with somewhat \nvariable read lengths.\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--format"
alternatives:
- "-f"
description: "Bypasses the normal sequence file format detection and \nforces\
\ the program to use the specified format. \nValid formats are bam, sam, bam_mapped,\
\ sam_mapped, and fastq.\n"
info: null
example:
- "bam"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--contaminants"
alternatives:
- "-c"
description: "Specifies a non-default file which contains the list \nof contaminants\
\ to screen overrepresented sequences against. \nThe file must contain sets\
\ of named contaminants in the form\nname[tab]sequence. Lines prefixed with\
\ a hash will be ignored.\n"
info: null
example:
- "contaminants.txt"
must_exist: true
create_parent: true
required: false
@@ -52,11 +174,15 @@ argument_groups:
multiple_sep: ";"
- type: "file"
name: "--adapters"
description: "Specifies a non-default file which contains \nthe list of adapter\
\ sequences which will be \nexplicity searched against the library. The \nfile\
\ must contain sets of named adapters in \nthe form name[tab]sequence. Lines\
\ prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n"
alternatives:
- "-a"
description: "Specifies a non-default file which contains the list of \nadapter\
\ sequences which will be explicitly searched against \nthe library. The file\
\ must contain sets of named adapters \nin the form name[tab]sequence. Lines\
\ prefixed with a hash will be ignored.\n"
info: null
example:
- "adapters.txt"
must_exist: true
create_parent: true
required: false
@@ -65,12 +191,16 @@ argument_groups:
multiple_sep: ";"
- type: "file"
name: "--limits"
alternatives:
- "-l"
description: "Specifies a non-default file which contains \na set of criteria\
\ which will be used to \ndetermine the warn/error limits for the \nvarious\
\ modules. This file can also be used \nto selectively remove some modules from\
\ the \noutput all together. The format needs to \nmirror the default limits.txt\
\ file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n"
\ which will be used to determine \nthe warn/error limits for the various modules.\
\ \nThis file can also be used to selectively remove \nsome modules from the\
\ output altogether. The format \nneeds to mirror the default limits.txt file\
\ found in \nthe Configuration folder.\n"
info: null
example:
- "limits.txt"
must_exist: true
create_parent: true
required: false
@@ -78,125 +208,31 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--subsample"
name: "--kmers"
alternatives:
- "-s"
description: "[Falco only] makes falco faster (but \npossibly less accurate) by\
\ only processing \nreads that are a multiple of this value (using \n0-based\
\ indexing to number reads).\n"
- "-k"
description: "Specifies the length of Kmer to look for in the Kmer \ncontent module.\
\ Specified Kmer length must be between \n2 and 10. Default length is 7 if not\
\ specified.\n"
info: null
example:
- 10
- 7
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bisulfite"
name: "--quiet"
alternatives:
- "-b"
description: "[Falco only] reads are whole genome \nbisulfite sequencing, and\
\ more Ts and fewer \nCs are therefore expected and will be \naccounted for\
\ in base content.\n"
- "-q"
description: "Suppress all progress messages on stdout and only report errors.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--reverse_complement"
alternatives:
- "-r"
description: "[Falco only] The input is a \nreverse-complement. All modules will\
\ be \ntested by swapping A/T and C/G\n"
info: null
direction: "input"
- name: "Output arguments"
arguments:
- type: "file"
name: "--outdir"
alternatives:
- "-o"
description: "Create all output files in the specified \noutput directory. FALCO-SPECIFIC:\
\ If the \ndirectory does not exists, the program will \ncreate it.\n"
info: null
example:
- "output"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--format"
alternatives:
- "-f"
description: "Bypasses the normal sequence file format \ndetection and forces\
\ the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped,\
\ sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n"
info: null
required: false
choices:
- "bam"
- "sam"
- "bam_mapped"
- "sam_mapped"
- "fastq"
- "fq"
- "fastq.gz"
- "fq.gz"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--data_filename"
alternatives:
- "-D"
description: "[Falco only] Specify filename for FastQC \ndata output (TXT). If\
\ not specified, it will \nbe called fastq_data.txt in either the input \nfile's\
\ directory or the one specified in the \n--output flag. Only available when\
\ running \nfalco with a single input.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--report_filename"
alternatives:
- "-R"
description: "[Falco only] Specify filename for FastQC \nreport output (HTML).\
\ If not specified, it \nwill be called fastq_report.html in either \nthe input\
\ file's directory or the one \nspecified in the --output flag. Only \navailable\
\ when running falco with a single \ninput.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--summary_filename"
alternatives:
- "-S"
description: "[Falco only] Specify filename for the short \nsummary output (TXT).\
\ If not specified, it \nwill be called fastq_report.html in either \nthe input\
\ file's directory or the one \nspecified in the --output flag. Only \navailable\
\ when running falco with a single \ninput.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "A C++ drop-in replacement of FastQC to assess the quality of sequence\
\ read data"
description: "FastQC - A high throughput sequence QC analysis tool."
test_resources:
- type: "bash_script"
path: "test.sh"
@@ -210,16 +246,16 @@ requirements:
commands:
- "ps"
keywords:
- "qc"
- "fastqc"
- "sequencing"
license: "GPL-3.0"
references:
doi:
- "10.12688/f1000research.21142.2"
- "Quality control"
- "BAM"
- "SAM"
- "FASTQ"
license: "GPL-3.0, Apache-2.0"
links:
repository: "https://github.com/smithlabcode/falco"
documentation: "https://falco.readthedocs.io/en/latest/"
repository: "https://github.com/s-andrews/FastQC"
homepage: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/"
documentation: "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/"
issue_tracker: "https://github.com/s-andrews/FastQC/issues"
runners:
- type: "executable"
id: "executable"
@@ -288,37 +324,24 @@ runners:
engines:
- type: "docker"
id: "docker"
image: "debian:trixie-slim"
image: "biocontainers/fastqc:v0.11.9_cv8"
target_registry: "images.viash-hub.com"
target_tag: "v0.3.1"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "wget"
- "build-essential"
- "g++"
- "zlib1g-dev"
- "procps"
interactive: false
- type: "docker"
run:
- "wget https://github.com/smithlabcode/falco/releases/download/v1.2.2/falco-1.2.2.tar.gz\
\ -O /tmp/falco.tar.gz && \\\ncd /tmp && \\\ntar xvf falco.tar.gz && \\\ncd\
\ falco-1.2.2 && \\\n./configure && \\\nmake all && \\\nmake install\n"
- type: "docker"
run:
- "echo \"falco: \\\"$(falco -v | sed -n 's/^falco //p')\\\"\" > /var/software_versions.txt\n"
- "echo \"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\" > /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/falco/config.vsh.yaml"
config: "src/fastqc/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/falco"
executable: "target/nextflow/falco/main.nf"
output: "target/nextflow/fastqc"
executable: "target/nextflow/fastqc/main.nf"
viash_version: "0.9.4"
git_commit: "98a5f3cc745525a65c10263d25cf414eb1093223"
git_remote: "https://github.com/viash-hub/biobox"

View File

@@ -1,4 +1,4 @@
// falco v0.3.1
// fastqc v0.3.1
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -10,7 +10,7 @@
// files.
//
// Component authors:
// * Toni Verbeiren (author, maintainer)
// * Theodoro Gasperin Terra Camargo (author, maintainer)
////////////////////////////
// VDSL3 helper functions //
@@ -3033,25 +3033,26 @@ nextflow.enable.dsl=2
meta = [
"resources_dir": moduleDir.toRealPath().normalize(),
"config": processConfig(readJsonBlob('''{
"name" : "falco",
"name" : "fastqc",
"version" : "v0.3.1",
"authors" : [
{
"name" : "Toni Verbeiren",
"name" : "Theodoro Gasperin Terra Camargo",
"roles" : [
"author",
"maintainer"
],
"info" : {
"links" : {
"github" : "tverbeiren",
"linkedin" : "verbeiren"
"email" : "theodorogtc@gmail.com",
"github" : "tgaspe",
"linkedin" : "theodoro-gasperin-terra-camargo"
},
"organizations" : [
{
"name" : "Data Intuitive",
"href" : "https://www.data-intuitive.com",
"role" : "Data Scientist and CEO"
"role" : "Bioinformatician"
}
]
}
@@ -3059,14 +3060,14 @@ meta = [
],
"argument_groups" : [
{
"name" : "Input arguments",
"name" : "Inputs",
"arguments" : [
{
"type" : "file",
"name" : "--input",
"description" : "input fastq files",
"description" : "FASTQ file(s) to be analyzed.\n",
"example" : [
"input1.fastq;input2.fastq"
"input.fq"
],
"must_exist" : true,
"create_parent" : true,
@@ -3078,18 +3079,131 @@ meta = [
]
},
{
"name" : "Run arguments",
"name" : "Outputs",
"description" : "At least one of the output options (--html, --zip, --summary, --data) must be used.\n",
"arguments" : [
{
"type" : "boolean_true",
"name" : "--nogroup",
"description" : "Disable grouping of bases for reads >50bp. \nAll reports will show data for every base in \nthe read. WARNING: When using this option, \nyour plots may end up a ridiculous size. You \nhave been warned!\n",
"direction" : "input"
"type" : "file",
"name" : "--html",
"description" : "Create the HTML report of the results. \n'*' wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n --input \\"sample_1.fq\\"\n --html \\"*.html\\"\n would create an output html file named sample_1.html\n",
"example" : [
"*.html"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--contaminents",
"description" : "Specifies a non-default file which contains \nthe list of contaminants to screen \noverrepresented sequences against. The file \nmust contain sets of named contaminants in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/contaminant_list.txt\n",
"name" : "--zip",
"description" : "Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --html \\"*.zip\\"\n would create an output zip file named sample_1.zip\n",
"example" : [
"*.zip"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--summary",
"description" : "Create the summary file(s).\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --summary \\"*_summary.txt\\"\n would create an output summary.txt file named sample_1_summary.txt\n",
"example" : [
"*_summary.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--data",
"description" : "Create the data file(s).\n'*' wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \\"sample_1.fq\\"\n --summary \\"*_data.txt\\"\n would create an output data.txt file named sample_1_data.txt\n",
"example" : [
"*_data.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : true,
"multiple_sep" : ";"
}
]
},
{
"name" : "Options",
"arguments" : [
{
"type" : "boolean_true",
"name" : "--casava",
"description" : "Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon't be grouped together correctly.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--nano",
"description" : "Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--nofilter",
"description" : "If running with --casava then don't remove read flagged by\ncasava as poor quality when performing the QC analysis.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--nogroup",
"description" : "Disable grouping of bases for reads >50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n",
"direction" : "input"
},
{
"type" : "integer",
"name" : "--min_length",
"description" : "Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n",
"example" : [
0
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--format",
"alternatives" : [
"-f"
],
"description" : "Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n",
"example" : [
"bam"
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--contaminants",
"alternatives" : [
"-c"
],
"description" : "Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n",
"example" : [
"contaminants.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
@@ -3100,7 +3214,13 @@ meta = [
{
"type" : "file",
"name" : "--adapters",
"description" : "Specifies a non-default file which contains \nthe list of adapter sequences which will be \nexplicity searched against the library. The \nfile must contain sets of named adapters in \nthe form name[tab]sequence. Lines prefixed \nwith a hash will be ignored. Default:\nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/adapter_list.txt\n",
"alternatives" : [
"-a"
],
"description" : "Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n",
"example" : [
"adapters.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
@@ -3111,7 +3231,13 @@ meta = [
{
"type" : "file",
"name" : "--limits",
"description" : "Specifies a non-default file which contains \na set of criteria which will be used to \ndetermine the warn/error limits for the \nvarious modules. This file can also be used \nto selectively remove some modules from the \noutput all together. The format needs to \nmirror the default limits.txt file found in \nthe Configuration folder. Default: \nhttps://github.com/smithlabcode/falco/blob/v1.2.2/Configuration/limits.txt\n",
"alternatives" : [
"-l"
],
"description" : "Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n",
"example" : [
"limits.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : false,
@@ -3121,13 +3247,13 @@ meta = [
},
{
"type" : "integer",
"name" : "--subsample",
"name" : "--kmers",
"alternatives" : [
"-s"
"-k"
],
"description" : "[Falco only] makes falco faster (but \npossibly less accurate) by only processing \nreads that are a multiple of this value (using \n0-based indexing to number reads).\n",
"description" : "Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n",
"example" : [
10
7
],
"required" : false,
"direction" : "input",
@@ -3136,107 +3262,12 @@ meta = [
},
{
"type" : "boolean_true",
"name" : "--bisulfite",
"name" : "--quiet",
"alternatives" : [
"-b"
"-q"
],
"description" : "[Falco only] reads are whole genome \nbisulfite sequencing, and more Ts and fewer \nCs are therefore expected and will be \naccounted for in base content.\n",
"description" : "Suppress all progress messages on stdout and only report errors.\n",
"direction" : "input"
},
{
"type" : "boolean_true",
"name" : "--reverse_complement",
"alternatives" : [
"-r"
],
"description" : "[Falco only] The input is a \nreverse-complement. All modules will be \ntested by swapping A/T and C/G\n",
"direction" : "input"
}
]
},
{
"name" : "Output arguments",
"arguments" : [
{
"type" : "file",
"name" : "--outdir",
"alternatives" : [
"-o"
],
"description" : "Create all output files in the specified \noutput directory. FALCO-SPECIFIC: If the \ndirectory does not exists, the program will \ncreate it.\n",
"example" : [
"output"
],
"must_exist" : true,
"create_parent" : true,
"required" : true,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--format",
"alternatives" : [
"-f"
],
"description" : "Bypasses the normal sequence file format \ndetection and forces the program to use the \nspecified format. Validformats are bam, sam, \nbam_mapped, sam_mapped, fastq, fq, fastq.gz \nor fq.gz.\n",
"required" : false,
"choices" : [
"bam",
"sam",
"bam_mapped",
"sam_mapped",
"fastq",
"fq",
"fastq.gz",
"fq.gz"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--data_filename",
"alternatives" : [
"-D"
],
"description" : "[Falco only] Specify filename for FastQC \ndata output (TXT). If not specified, it will \nbe called fastq_data.txt in either the input \nfile's directory or the one specified in the \n--output flag. Only available when running \nfalco with a single input.\n",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--report_filename",
"alternatives" : [
"-R"
],
"description" : "[Falco only] Specify filename for FastQC \nreport output (HTML). If not specified, it \nwill be called fastq_report.html in either \nthe input file's directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--summary_filename",
"alternatives" : [
"-S"
],
"description" : "[Falco only] Specify filename for the short \nsummary output (TXT). If not specified, it \nwill be called fastq_report.html in either \nthe input file's directory or the one \nspecified in the --output flag. Only \navailable when running falco with a single \ninput.\n",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
}
]
}
@@ -3248,7 +3279,7 @@ meta = [
"is_executable" : true
}
],
"description" : "A C++ drop-in replacement of FastQC to assess the quality of sequence read data",
"description" : "FastQC - A high throughput sequence QC analysis tool.",
"test_resources" : [
{
"type" : "bash_script",
@@ -3267,19 +3298,17 @@ meta = [
]
},
"keywords" : [
"qc",
"fastqc",
"sequencing"
"Quality control",
"BAM",
"SAM",
"FASTQ"
],
"license" : "GPL-3.0",
"references" : {
"doi" : [
"10.12688/f1000research.21142.2"
]
},
"license" : "GPL-3.0, Apache-2.0",
"links" : {
"repository" : "https://github.com/smithlabcode/falco",
"documentation" : "https://falco.readthedocs.io/en/latest/"
"repository" : "https://github.com/s-andrews/FastQC",
"homepage" : "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/",
"documentation" : "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/",
"issue_tracker" : "https://github.com/s-andrews/FastQC/issues"
},
"runners" : [
{
@@ -3359,32 +3388,15 @@ meta = [
{
"type" : "docker",
"id" : "docker",
"image" : "debian:trixie-slim",
"image" : "biocontainers/fastqc:v0.11.9_cv8",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v0.3.1",
"namespace_separator" : "/",
"setup" : [
{
"type" : "apt",
"packages" : [
"wget",
"build-essential",
"g++",
"zlib1g-dev",
"procps"
],
"interactive" : false
},
{
"type" : "docker",
"run" : [
"wget https://github.com/smithlabcode/falco/releases/download/v1.2.2/falco-1.2.2.tar.gz -O /tmp/falco.tar.gz && \\\\\ncd /tmp && \\\\\ntar xvf falco.tar.gz && \\\\\ncd falco-1.2.2 && \\\\\n./configure && \\\\\nmake all && \\\\\nmake install\n"
]
},
{
"type" : "docker",
"run" : [
"echo \\"falco: \\\\\\"$(falco -v | sed -n 's/^falco //p')\\\\\\"\\" > /var/software_versions.txt\n"
"echo \\"fastqc: $(fastqc --version | sed -n 's/^FastQC //p')\\" > /var/software_versions.txt\n"
]
}
]
@@ -3395,10 +3407,10 @@ meta = [
}
],
"build_info" : {
"config" : "/workdir/root/repo/src/falco/config.vsh.yaml",
"config" : "/workdir/root/repo/src/fastqc/config.vsh.yaml",
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/falco",
"output" : "target/nextflow/fastqc",
"viash_version" : "0.9.4",
"git_commit" : "98a5f3cc745525a65c10263d25cf414eb1093223",
"git_remote" : "https://github.com/viash-hub/biobox",
@@ -3442,21 +3454,26 @@ def innerWorkflowFactory(args) {
def rawScript = '''set -e
tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
#!/bin/bash
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_HTML+x} ]; then echo "${VIASH_PAR_HTML}" | sed "s#'#'\\"'\\"'#g;s#.*#par_html='&'#" ; else echo "# par_html="; fi )
$( if [ ! -z ${VIASH_PAR_ZIP+x} ]; then echo "${VIASH_PAR_ZIP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_zip='&'#" ; else echo "# par_zip="; fi )
$( if [ ! -z ${VIASH_PAR_SUMMARY+x} ]; then echo "${VIASH_PAR_SUMMARY}" | sed "s#'#'\\"'\\"'#g;s#.*#par_summary='&'#" ; else echo "# par_summary="; fi )
$( if [ ! -z ${VIASH_PAR_DATA+x} ]; then echo "${VIASH_PAR_DATA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data='&'#" ; else echo "# par_data="; fi )
$( if [ ! -z ${VIASH_PAR_CASAVA+x} ]; then echo "${VIASH_PAR_CASAVA}" | sed "s#'#'\\"'\\"'#g;s#.*#par_casava='&'#" ; else echo "# par_casava="; fi )
$( if [ ! -z ${VIASH_PAR_NANO+x} ]; then echo "${VIASH_PAR_NANO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nano='&'#" ; else echo "# par_nano="; fi )
$( if [ ! -z ${VIASH_PAR_NOFILTER+x} ]; then echo "${VIASH_PAR_NOFILTER}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nofilter='&'#" ; else echo "# par_nofilter="; fi )
$( if [ ! -z ${VIASH_PAR_NOGROUP+x} ]; then echo "${VIASH_PAR_NOGROUP}" | sed "s#'#'\\"'\\"'#g;s#.*#par_nogroup='&'#" ; else echo "# par_nogroup="; fi )
$( if [ ! -z ${VIASH_PAR_CONTAMINENTS+x} ]; then echo "${VIASH_PAR_CONTAMINENTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_contaminents='&'#" ; else echo "# par_contaminents="; fi )
$( if [ ! -z ${VIASH_PAR_MIN_LENGTH+x} ]; then echo "${VIASH_PAR_MIN_LENGTH}" | sed "s#'#'\\"'\\"'#g;s#.*#par_min_length='&'#" ; else echo "# par_min_length="; fi )
$( if [ ! -z ${VIASH_PAR_FORMAT+x} ]; then echo "${VIASH_PAR_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_format='&'#" ; else echo "# par_format="; fi )
$( if [ ! -z ${VIASH_PAR_CONTAMINANTS+x} ]; then echo "${VIASH_PAR_CONTAMINANTS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_contaminants='&'#" ; else echo "# par_contaminants="; fi )
$( if [ ! -z ${VIASH_PAR_ADAPTERS+x} ]; then echo "${VIASH_PAR_ADAPTERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_adapters='&'#" ; else echo "# par_adapters="; fi )
$( if [ ! -z ${VIASH_PAR_LIMITS+x} ]; then echo "${VIASH_PAR_LIMITS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_limits='&'#" ; else echo "# par_limits="; fi )
$( if [ ! -z ${VIASH_PAR_SUBSAMPLE+x} ]; then echo "${VIASH_PAR_SUBSAMPLE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_subsample='&'#" ; else echo "# par_subsample="; fi )
$( if [ ! -z ${VIASH_PAR_BISULFITE+x} ]; then echo "${VIASH_PAR_BISULFITE}" | sed "s#'#'\\"'\\"'#g;s#.*#par_bisulfite='&'#" ; else echo "# par_bisulfite="; fi )
$( if [ ! -z ${VIASH_PAR_REVERSE_COMPLEMENT+x} ]; then echo "${VIASH_PAR_REVERSE_COMPLEMENT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_reverse_complement='&'#" ; else echo "# par_reverse_complement="; fi )
$( if [ ! -z ${VIASH_PAR_OUTDIR+x} ]; then echo "${VIASH_PAR_OUTDIR}" | sed "s#'#'\\"'\\"'#g;s#.*#par_outdir='&'#" ; else echo "# par_outdir="; fi )
$( if [ ! -z ${VIASH_PAR_FORMAT+x} ]; then echo "${VIASH_PAR_FORMAT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_format='&'#" ; else echo "# par_format="; fi )
$( if [ ! -z ${VIASH_PAR_DATA_FILENAME+x} ]; then echo "${VIASH_PAR_DATA_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_data_filename='&'#" ; else echo "# par_data_filename="; fi )
$( if [ ! -z ${VIASH_PAR_REPORT_FILENAME+x} ]; then echo "${VIASH_PAR_REPORT_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_report_filename='&'#" ; else echo "# par_report_filename="; fi )
$( if [ ! -z ${VIASH_PAR_SUMMARY_FILENAME+x} ]; then echo "${VIASH_PAR_SUMMARY_FILENAME}" | sed "s#'#'\\"'\\"'#g;s#.*#par_summary_filename='&'#" ; else echo "# par_summary_filename="; fi )
$( if [ ! -z ${VIASH_PAR_KMERS+x} ]; then echo "${VIASH_PAR_KMERS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_kmers='&'#" ; else echo "# par_kmers="; fi )
$( if [ ! -z ${VIASH_PAR_QUIET+x} ]; then echo "${VIASH_PAR_QUIET}" | sed "s#'#'\\"'\\"'#g;s#.*#par_quiet='&'#" ; else echo "# par_quiet="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
$( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "${VIASH_META_RESOURCES_DIR}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_resources_dir='&'#" ; else echo "# meta_resources_dir="; fi )
@@ -3477,30 +3494,87 @@ $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "${VIASH_META_MEMORY_TIB}"
$( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "${VIASH_META_MEMORY_PIB}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_memory_pib='&'#" ; else echo "# meta_memory_pib="; fi )
## VIASH END
#!/bin/bash
# exit on error
set -eo pipefail
[[ "\\$par_nogroup" == "false" ]] && unset par_nogroup
[[ "\\$par_bisulfite" == "false" ]] && unset par_bisulfite
[[ "\\$par_reverse_complement" == "false" ]] && unset par_reverse_complement
# Check if both outputs are empty, at least one must be passed.
if [[ -z "\\$par_html" ]] && [[ -z "\\$par_zip" ]] && [[ -z "\\$par_summary" ]] && [[ -z "\\$par_data" ]]; then
echo "Error: At least one of the output arguments (--html, --zip, --summary, and --data) must be passed."
exit 1
fi
# unset flags
unset_if_false=(
par_casava
par_nano
par_nofilter
par_extract
par_noextract
par_nogroup
par_quiet
)
for par in \\${unset_if_false[@]}; do
test_val="\\${!par}"
[[ "\\$test_val" == "false" ]] && unset \\$par
done
tmpdir=\\$(mktemp -d "\\${meta_temp_dir}/\\${meta_name}-XXXXXXXX")
function clean_up {
rm -rf "\\$tmpdir"
}
trap clean_up EXIT
# Create input array
IFS=";" read -ra input <<< \\$par_input
\\$(which falco) \\\\
# Run fastqc
fastqc \\\\
--extract \\\\
\\${par_casava:+--casava} \\\\
\\${par_nano:+--nano} \\\\
\\${par_nofilter:+--nofilter} \\\\
\\${par_nogroup:+--nogroup} \\\\
\\${par_min_length:+--min_length "\\$par_min_length"} \\\\
\\${par_format:+--format "\\$par_format"} \\\\
\\${par_contaminants:+--contaminants "\\$par_contaminants"} \\\\
\\${par_adapters:+--adapters "\\$par_adapters"} \\\\
\\${par_limits:+--limits "\\$par_limits"} \\\\
\\${par_subsample:+-subsample \\$par_subsample} \\\\
\\${par_bisulfite:+-bisulfite} \\\\
\\${par_reverse_complement:+-reverse-complement} \\\\
\\${par_outdir:+--outdir "\\$par_outdir"} \\\\
\\${par_format:+--format "\\$par_format"} \\\\
\\${par_data_filename:+-data-filename "\\$par_data_filename"} \\\\
\\${par_report_filename:+-report-filename "\\$par_report_filename"} \\\\
\\${par_summary_filename:+-summary-filename "\\$par_summary_filename"} \\\\
\\${input[*]}
\\${par_kmers:+--kmers "\\$par_kmers"} \\\\
\\${par_quiet:+--quiet} \\\\
\\${meta_cpus:+--threads "\\$meta_cpus"} \\\\
\\${meta_temp_dir:+--dir "\\$meta_temp_dir"} \\\\
--outdir "\\${tmpdir}" \\\\
"\\${input[@]}"
# Move output files
for file in "\\${input[@]}"; do
# Removes everthing after the first dot of the basename
sample_name=\\$(basename "\\${file}" | sed 's/\\\\..*\\$//')
if [[ -n "\\$par_html" ]]; then
input_html="\\${tmpdir}/\\${sample_name}_fastqc.html"
html_file="\\${par_html//\\\\*/\\$sample_name}"
mv "\\$input_html" "\\$html_file"
fi
if [[ -n "\\$par_zip" ]]; then
input_zip="\\${tmpdir}/\\${sample_name}_fastqc.zip"
zip_file="\\${par_zip//\\\\*/\\$sample_name}"
mv "\\$input_zip" "\\$zip_file"
fi
if [[ -n "\\$par_summary" ]]; then
summary_file="\\${tmpdir}/\\${sample_name}_fastqc/summary.txt"
new_summary="\\${par_summary//\\\\*/\\$sample_name}"
mv "\\$summary_file" "\\$new_summary"
fi
if [[ -n "\\$par_data" ]]; then
data_file="\\${tmpdir}/\\${sample_name}_fastqc/fastqc_data.txt"
new_data="\\${par_data//\\\\*/\\$sample_name}"
mv "\\$data_file" "\\$new_data"
fi
# Remove the extracted directory
rm -r "\\${tmpdir}/\\${sample_name}_fastqc"
done
VIASHMAIN
bash "$tempscript"
'''
@@ -3882,7 +3956,7 @@ meta["defaults"] = [
directives: readJsonBlob('''{
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/biobox/falco",
"image" : "vsh/biobox/fastqc",
"tag" : "v0.3.1"
},
"tag" : "$id"

View File

@@ -1,10 +1,10 @@
manifest {
name = 'falco'
name = 'fastqc'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v0.3.1'
description = 'A C++ drop-in replacement of FastQC to assess the quality of sequence read data'
author = 'Toni Verbeiren'
description = 'FastQC - A high throughput sequence QC analysis tool.'
author = 'Theodoro Gasperin Terra Camargo'
}
process.container = 'nextflow/bash:latest'

View File

@@ -0,0 +1,257 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "fastqc",
"description": "FastQC - A high throughput sequence QC analysis tool.",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed",
"help_text": "Type: List of `file`, required, example: `input.fq`, multiple_sep: `\";\"`. FASTQ file(s) to be analyzed.\n"
}
}
},
"outputs" : {
"title": "Outputs",
"type": "object",
"description": "At least one of the output options (--html, --zip, --summary, --data) must be used.\n",
"properties": {
"html": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results",
"help_text": "Type: List of `file`, default: `$id.$key.html_*.html`, example: `*.html`, multiple_sep: `\";\"`. Create the HTML report of the results. \n\u0027*\u0027 wild card must be provided in the output file name. \nWild card will be replaced by the input file basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.html\"\n would create an output html file named sample_1.html\n"
,
"default":"$id.$key.html_*.html"
}
,
"zip": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc",
"help_text": "Type: List of `file`, default: `$id.$key.zip_*.zip`, example: `*.zip`, multiple_sep: `\";\"`. Create the zip file(s) containing: html report, data, images, icons, summary, etc.\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --html \"*.zip\"\n would create an output zip file named sample_1.zip\n"
,
"default":"$id.$key.zip_*.zip"
}
,
"summary": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s)",
"help_text": "Type: List of `file`, default: `$id.$key.summary_*.txt`, example: `*_summary.txt`, multiple_sep: `\";\"`. Create the summary file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_summary.txt\"\n would create an output summary.txt file named sample_1_summary.txt\n"
,
"default":"$id.$key.summary_*.txt"
}
,
"data": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s)",
"help_text": "Type: List of `file`, default: `$id.$key.data_*.txt`, example: `*_data.txt`, multiple_sep: `\";\"`. Create the data file(s).\n\u0027*\u0027 wild card must be provided in the output file name.\nWild card will be replaced by the input basename.\ne.g. \n --input \"sample_1.fq\"\n --summary \"*_data.txt\"\n would create an output data.txt file named sample_1_data.txt\n"
,
"default":"$id.$key.data_*.txt"
}
}
},
"options" : {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"casava": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Files come from raw casava output",
"help_text": "Type: `boolean_true`, default: `false`. Files come from raw casava output. Files in the same sample\ngroup (differing only by the group number) will be analysed\nas a set rather than individually. Sequences with the filter\nflag set in the header will be excluded from the analysis.\nFiles must have the same names given to them by casava\n(including being gzipped and ending with .gz) otherwise they\nwon\u0027t be grouped together correctly.\n"
,
"default":false
}
,
"nano": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format",
"help_text": "Type: `boolean_true`, default: `false`. Files come from nanopore sequences and are in fast5 format. In\nthis mode you can pass in directories to process and the program\nwill take in all fast5 files within those directories and produce\na single output file from the sequences found in all files.\n"
,
"default":false
}
,
"nofilter": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis",
"help_text": "Type: `boolean_true`, default: `false`. If running with --casava then don\u0027t remove read flagged by\ncasava as poor quality when performing the QC analysis.\n"
,
"default":false
}
,
"nogroup": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp",
"help_text": "Type: `boolean_true`, default: `false`. Disable grouping of bases for reads \u003e50bp. \nAll reports will show data for every base in the read. \nWARNING: Using this option will cause fastqc to crash \nand burn if you use it on really long reads, and your \nplots may end up a ridiculous size. You have been warned!\n"
,
"default":false
}
,
"min_length": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report",
"help_text": "Type: `integer`, example: `0`. Sets an artificial lower limit on the length of the \nsequence to be shown in the report. As long as you \nset this to a value greater or equal to your longest \nread length then this will be the sequence length used \nto create your read groups. This can be useful for making\ndirectly comparable statistics from datasets with somewhat \nvariable read lengths.\n"
}
,
"format": {
"type":
"string",
"description": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format",
"help_text": "Type: `string`, example: `bam`. Bypasses the normal sequence file format detection and \nforces the program to use the specified format. \nValid formats are bam, sam, bam_mapped, sam_mapped, and fastq.\n"
}
,
"contaminants": {
"type":
"string",
"description": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against",
"help_text": "Type: `file`, example: `contaminants.txt`. Specifies a non-default file which contains the list \nof contaminants to screen overrepresented sequences against. \nThe file must contain sets of named contaminants in the form\nname[tab]sequence. Lines prefixed with a hash will be ignored.\n"
}
,
"adapters": {
"type":
"string",
"description": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library",
"help_text": "Type: `file`, example: `adapters.txt`. Specifies a non-default file which contains the list of \nadapter sequences which will be explicitly searched against \nthe library. The file must contain sets of named adapters \nin the form name[tab]sequence. Lines prefixed with a hash will be ignored.\n"
}
,
"limits": {
"type":
"string",
"description": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules",
"help_text": "Type: `file`, example: `limits.txt`. Specifies a non-default file which contains \na set of criteria which will be used to determine \nthe warn/error limits for the various modules. \nThis file can also be used to selectively remove \nsome modules from the output altogether. The format \nneeds to mirror the default limits.txt file found in \nthe Configuration folder.\n"
}
,
"kmers": {
"type":
"integer",
"description": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module",
"help_text": "Type: `integer`, example: `7`. Specifies the length of Kmer to look for in the Kmer \ncontent module. Specified Kmer length must be between \n2 and 10. Default length is 7 if not specified.\n"
}
,
"quiet": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors",
"help_text": "Type: `boolean_true`, default: `false`. Suppress all progress messages on stdout and only report errors.\n"
,
"default":false
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/outputs"
},
{
"$ref": "#/definitions/options"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -157,16 +157,16 @@ build_info:
output: "target/executable/io/interop_summary_to_csv"
executable: "target/executable/io/interop_summary_to_csv/interop_summary_to_csv"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
package_config:
name: "demultiplex"
version: "main"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -454,9 +454,9 @@ tar -C /tmp/ --no-same-owner --no-same-permissions -xvf /tmp/interop.tar.gz && \
mv /tmp/interop-1.3.1-Linux-GNU/bin/index-summary /tmp/interop-1.3.1-Linux-GNU/bin/summary /usr/local/bin/
LABEL org.opencontainers.image.description="Companion container for running component io interop_summary_to_csv"
LABEL org.opencontainers.image.created="2025-05-26T10:28:13Z"
LABEL org.opencontainers.image.created="2025-05-28T11:02:52Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
LABEL org.opencontainers.image.revision="219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
LABEL org.opencontainers.image.revision="6e71519815566a057711019a23a56a22479dd655"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -15,8 +15,8 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_falco"
description: "Directory to write falco output to"
name: "--input_sample_qc"
description: "Directory to write sample QC output to"
info: null
must_exist: true
create_parent: true
@@ -67,7 +67,7 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_falco"
name: "--output_sample_qc"
info: null
default:
- "qc/fastqc"
@@ -219,16 +219,16 @@ build_info:
output: "target/executable/io/publish"
executable: "target/executable/io/publish/publish"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
package_config:
name: "demultiplex"
version: "main"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -450,9 +450,9 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
LABEL org.opencontainers.image.description="Companion container for running component io publish"
LABEL org.opencontainers.image.created="2025-05-26T10:28:13Z"
LABEL org.opencontainers.image.created="2025-05-28T11:02:51Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
LABEL org.opencontainers.image.revision="219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
LABEL org.opencontainers.image.revision="6e71519815566a057711019a23a56a22479dd655"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER
@@ -579,9 +579,9 @@ function ViashHelp {
echo " type: file, required parameter, file must exist"
echo " Directory to write fastq data to"
echo ""
echo " --input_falco"
echo " --input_sample_qc"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Directory to write falco output to"
echo " Directory to write sample QC output to"
echo ""
echo " --input_multiqc"
echo " type: file, required parameter, file must exist"
@@ -599,7 +599,7 @@ function ViashHelp {
echo " type: file, output, file must exist"
echo " default: fastq"
echo ""
echo " --output_falco"
echo " --output_sample_qc"
echo " type: file, output, file must exist"
echo " default: qc/fastqc"
echo ""
@@ -676,20 +676,20 @@ while [[ $# -gt 0 ]]; do
VIASH_PAR_INPUT=$(ViashRemoveFlags "$1")
shift 1
;;
--input_falco)
if [ -z "$VIASH_PAR_INPUT_FALCO" ]; then
VIASH_PAR_INPUT_FALCO="$2"
--input_sample_qc)
if [ -z "$VIASH_PAR_INPUT_SAMPLE_QC" ]; then
VIASH_PAR_INPUT_SAMPLE_QC="$2"
else
VIASH_PAR_INPUT_FALCO="$VIASH_PAR_INPUT_FALCO;""$2"
VIASH_PAR_INPUT_SAMPLE_QC="$VIASH_PAR_INPUT_SAMPLE_QC;""$2"
fi
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input_falco. Use "--help" to get more information on the parameters. && exit 1
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input_sample_qc. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--input_falco=*)
if [ -z "$VIASH_PAR_INPUT_FALCO" ]; then
VIASH_PAR_INPUT_FALCO=$(ViashRemoveFlags "$1")
--input_sample_qc=*)
if [ -z "$VIASH_PAR_INPUT_SAMPLE_QC" ]; then
VIASH_PAR_INPUT_SAMPLE_QC=$(ViashRemoveFlags "$1")
else
VIASH_PAR_INPUT_FALCO="$VIASH_PAR_INPUT_FALCO;"$(ViashRemoveFlags "$1")
VIASH_PAR_INPUT_SAMPLE_QC="$VIASH_PAR_INPUT_SAMPLE_QC;"$(ViashRemoveFlags "$1")
fi
shift 1
;;
@@ -737,15 +737,15 @@ while [[ $# -gt 0 ]]; do
VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1")
shift 1
;;
--output_falco)
[ -n "$VIASH_PAR_OUTPUT_FALCO" ] && ViashError Bad arguments for option \'--output_falco\': \'$VIASH_PAR_OUTPUT_FALCO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_FALCO="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --output_falco. Use "--help" to get more information on the parameters. && exit 1
--output_sample_qc)
[ -n "$VIASH_PAR_OUTPUT_SAMPLE_QC" ] && ViashError Bad arguments for option \'--output_sample_qc\': \'$VIASH_PAR_OUTPUT_SAMPLE_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_SAMPLE_QC="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --output_sample_qc. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--output_falco=*)
[ -n "$VIASH_PAR_OUTPUT_FALCO" ] && ViashError Bad arguments for option \'--output_falco=*\': \'$VIASH_PAR_OUTPUT_FALCO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_FALCO=$(ViashRemoveFlags "$1")
--output_sample_qc=*)
[ -n "$VIASH_PAR_OUTPUT_SAMPLE_QC" ] && ViashError Bad arguments for option \'--output_sample_qc=*\': \'$VIASH_PAR_OUTPUT_SAMPLE_QC\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_SAMPLE_QC=$(ViashRemoveFlags "$1")
shift 1
;;
--output_multiqc)
@@ -957,8 +957,8 @@ if [ -z ${VIASH_PAR_INPUT+x} ]; then
ViashError '--input' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_PAR_INPUT_FALCO+x} ]; then
ViashError '--input_falco' is a required argument. Use "--help" to get more information on the parameters.
if [ -z ${VIASH_PAR_INPUT_SAMPLE_QC+x} ]; then
ViashError '--input_sample_qc' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_PAR_INPUT_MULTIQC+x} ]; then
@@ -1002,8 +1002,8 @@ fi
if [ -z ${VIASH_PAR_OUTPUT+x} ]; then
VIASH_PAR_OUTPUT="fastq"
fi
if [ -z ${VIASH_PAR_OUTPUT_FALCO+x} ]; then
VIASH_PAR_OUTPUT_FALCO="qc/fastqc"
if [ -z ${VIASH_PAR_OUTPUT_SAMPLE_QC+x} ]; then
VIASH_PAR_OUTPUT_SAMPLE_QC="qc/fastqc"
fi
if [ -z ${VIASH_PAR_OUTPUT_MULTIQC+x} ]; then
VIASH_PAR_OUTPUT_MULTIQC="qc/multiqc_report.html"
@@ -1020,10 +1020,10 @@ if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then
ViashError "Input file '$VIASH_PAR_INPUT' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then
if [ ! -z "$VIASH_PAR_INPUT_SAMPLE_QC" ]; then
IFS=';'
set -f
for file in $VIASH_PAR_INPUT_FALCO; do
for file in $VIASH_PAR_INPUT_SAMPLE_QC; do
unset IFS
if [ ! -e "$file" ]; then
ViashError "Input file '$file' does not exist."
@@ -1123,8 +1123,8 @@ fi
if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")"
fi
if [ ! -z "$VIASH_PAR_OUTPUT_FALCO" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_FALCO")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_FALCO")"
if [ ! -z "$VIASH_PAR_OUTPUT_SAMPLE_QC" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_SAMPLE_QC")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_SAMPLE_QC")"
fi
if [ ! -z "$VIASH_PAR_OUTPUT_MULTIQC" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_MULTIQC")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_MULTIQC")"
@@ -1152,16 +1152,16 @@ if [ ! -z "$VIASH_PAR_INPUT" ]; then
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" )
VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT")
fi
if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then
VIASH_TEST_INPUT_FALCO=()
if [ ! -z "$VIASH_PAR_INPUT_SAMPLE_QC" ]; then
VIASH_TEST_INPUT_SAMPLE_QC=()
IFS=';'
for var in $VIASH_PAR_INPUT_FALCO; do
for var in $VIASH_PAR_INPUT_SAMPLE_QC; do
unset IFS
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
var=$(ViashDockerAutodetectMount "$var")
VIASH_TEST_INPUT_FALCO+=( "$var" )
VIASH_TEST_INPUT_SAMPLE_QC+=( "$var" )
done
VIASH_PAR_INPUT_FALCO=$(IFS=';' ; echo "${VIASH_TEST_INPUT_FALCO[*]}")
VIASH_PAR_INPUT_SAMPLE_QC=$(IFS=';' ; echo "${VIASH_TEST_INPUT_SAMPLE_QC[*]}")
fi
if [ ! -z "$VIASH_PAR_INPUT_MULTIQC" ]; then
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_MULTIQC")" )
@@ -1180,10 +1180,10 @@ if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT")
VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" )
fi
if [ ! -z "$VIASH_PAR_OUTPUT_FALCO" ]; then
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_FALCO")" )
VIASH_PAR_OUTPUT_FALCO=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_FALCO")
VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_FALCO" )
if [ ! -z "$VIASH_PAR_OUTPUT_SAMPLE_QC" ]; then
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_SAMPLE_QC")" )
VIASH_PAR_OUTPUT_SAMPLE_QC=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_SAMPLE_QC")
VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_SAMPLE_QC" )
fi
if [ ! -z "$VIASH_PAR_OUTPUT_MULTIQC" ]; then
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_MULTIQC")" )
@@ -1270,12 +1270,12 @@ cat > "\$tempscript" << 'VIASHMAIN'
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_FALCO+x} ]; then echo "${VIASH_PAR_INPUT_FALCO}" | sed "s#'#'\"'\"'#g;s#.*#par_input_falco='&'#" ; else echo "# par_input_falco="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_SAMPLE_QC+x} ]; then echo "${VIASH_PAR_INPUT_SAMPLE_QC}" | sed "s#'#'\"'\"'#g;s#.*#par_input_sample_qc='&'#" ; else echo "# par_input_sample_qc="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_MULTIQC+x} ]; then echo "${VIASH_PAR_INPUT_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_input_multiqc='&'#" ; else echo "# par_input_multiqc="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_RUN_INFORMATION+x} ]; then echo "${VIASH_PAR_INPUT_RUN_INFORMATION}" | sed "s#'#'\"'\"'#g;s#.*#par_input_run_information='&'#" ; else echo "# par_input_run_information="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS+x} ]; then echo "${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS}" | sed "s#'#'\"'\"'#g;s#.*#par_input_demultiplexer_logs='&'#" ; else echo "# par_input_demultiplexer_logs="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_FALCO+x} ]; then echo "${VIASH_PAR_OUTPUT_FALCO}" | sed "s#'#'\"'\"'#g;s#.*#par_output_falco='&'#" ; else echo "# par_output_falco="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_SAMPLE_QC+x} ]; then echo "${VIASH_PAR_OUTPUT_SAMPLE_QC}" | sed "s#'#'\"'\"'#g;s#.*#par_output_sample_qc='&'#" ; else echo "# par_output_sample_qc="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_MULTIQC+x} ]; then echo "${VIASH_PAR_OUTPUT_MULTIQC}" | sed "s#'#'\"'\"'#g;s#.*#par_output_multiqc='&'#" ; else echo "# par_output_multiqc="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_RUN_INFORMATION+x} ]; then echo "${VIASH_PAR_OUTPUT_RUN_INFORMATION}" | sed "s#'#'\"'\"'#g;s#.*#par_output_run_information='&'#" ; else echo "# par_output_run_information="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS+x} ]; then echo "${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS}" | sed "s#'#'\"'\"'#g;s#.*#par_output_demultiplexer_logs='&'#" ; else echo "# par_output_demultiplexer_logs="; fi )
@@ -1326,12 +1326,12 @@ do
ls "\$output_location"
done
echo "Grouping output from \$par_input_falco into \$par_output_falco"
mkdir -p "\$par_output_falco"
IFS=";" read -ra falco_inputs <<< \$par_input_falco
for falco_dir in "\${falco_inputs[@]}"; do
echo "Copying contents of \$falco_dir"
find -H -D exec "\$falco_dir" -type f -maxdepth 1 -exec cp -t "\$par_output_falco" {} +
echo "Grouping output from \$par_input_sample_qc into \$par_output_sample_qc"
mkdir -p "\$par_output_sample_qc"
IFS=";" read -ra sample_qc_inputs <<< \$par_input_sample_qc
for qc_dir in "\${sample_qc_inputs[@]}"; do
echo "Copying contents of \$qc_dir"
find -H -D exec "\$qc_dir" -type f -maxdepth 1 -exec cp -t "\$par_output_sample_qc" {} +
done
VIASHMAIN
bash "\$tempscript" &
@@ -1346,18 +1346,18 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
if [ ! -z "$VIASH_PAR_INPUT" ]; then
VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT")
fi
if [ ! -z "$VIASH_PAR_INPUT_FALCO" ]; then
unset VIASH_TEST_INPUT_FALCO
if [ ! -z "$VIASH_PAR_INPUT_SAMPLE_QC" ]; then
unset VIASH_TEST_INPUT_SAMPLE_QC
IFS=';'
for var in $VIASH_PAR_INPUT_FALCO; do
for var in $VIASH_PAR_INPUT_SAMPLE_QC; do
unset IFS
if [ -z "$VIASH_TEST_INPUT_FALCO" ]; then
VIASH_TEST_INPUT_FALCO="$(ViashDockerStripAutomount "$var")"
if [ -z "$VIASH_TEST_INPUT_SAMPLE_QC" ]; then
VIASH_TEST_INPUT_SAMPLE_QC="$(ViashDockerStripAutomount "$var")"
else
VIASH_TEST_INPUT_FALCO="$VIASH_TEST_INPUT_FALCO;""$(ViashDockerStripAutomount "$var")"
VIASH_TEST_INPUT_SAMPLE_QC="$VIASH_TEST_INPUT_SAMPLE_QC;""$(ViashDockerStripAutomount "$var")"
fi
done
VIASH_PAR_INPUT_FALCO="$VIASH_TEST_INPUT_FALCO"
VIASH_PAR_INPUT_SAMPLE_QC="$VIASH_TEST_INPUT_SAMPLE_QC"
fi
if [ ! -z "$VIASH_PAR_INPUT_MULTIQC" ]; then
VIASH_PAR_INPUT_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_MULTIQC")
@@ -1371,8 +1371,8 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT")
fi
if [ ! -z "$VIASH_PAR_OUTPUT_FALCO" ]; then
VIASH_PAR_OUTPUT_FALCO=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_FALCO")
if [ ! -z "$VIASH_PAR_OUTPUT_SAMPLE_QC" ]; then
VIASH_PAR_OUTPUT_SAMPLE_QC=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_SAMPLE_QC")
fi
if [ ! -z "$VIASH_PAR_OUTPUT_MULTIQC" ]; then
VIASH_PAR_OUTPUT_MULTIQC=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_MULTIQC")
@@ -1403,8 +1403,8 @@ if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then
ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_OUTPUT_FALCO" ] && [ ! -e "$VIASH_PAR_OUTPUT_FALCO" ]; then
ViashError "Output file '$VIASH_PAR_OUTPUT_FALCO' does not exist."
if [ ! -z "$VIASH_PAR_OUTPUT_SAMPLE_QC" ] && [ ! -e "$VIASH_PAR_OUTPUT_SAMPLE_QC" ]; then
ViashError "Output file '$VIASH_PAR_OUTPUT_SAMPLE_QC' does not exist."
exit 1
fi
if [ ! -z "$VIASH_PAR_OUTPUT_MULTIQC" ] && [ ! -e "$VIASH_PAR_OUTPUT_MULTIQC" ]; then

View File

@@ -156,16 +156,16 @@ build_info:
output: "target/executable/io/untar"
executable: "target/executable/io/untar/untar"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
package_config:
name: "demultiplex"
version: "main"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -450,9 +450,9 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
LABEL org.opencontainers.image.description="Companion container for running component io untar"
LABEL org.opencontainers.image.created="2025-05-26T10:28:12Z"
LABEL org.opencontainers.image.created="2025-05-28T11:02:51Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/demultiplex"
LABEL org.opencontainers.image.revision="219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
LABEL org.opencontainers.image.revision="6e71519815566a057711019a23a56a22479dd655"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -31,7 +31,7 @@ argument_groups:
multiple: true
multiple_sep: ";"
- type: "file"
name: "--falco_dir"
name: "--sample_qc_dir"
info: null
must_exist: true
create_parent: true
@@ -60,7 +60,7 @@ argument_groups:
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output_falco"
name: "--output_sample_qc"
info: null
must_exist: true
create_parent: true
@@ -165,16 +165,16 @@ build_info:
output: "target/nextflow/dataflow/combine_samples"
executable: "target/nextflow/dataflow/combine_samples/main.nf"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
package_config:
name: "demultiplex"
version: "main"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -3068,7 +3068,7 @@ meta = [
},
{
"type" : "file",
"name" : "--falco_dir",
"name" : "--sample_qc_dir",
"must_exist" : true,
"create_parent" : true,
"required" : true,
@@ -3103,7 +3103,7 @@ meta = [
},
{
"type" : "file",
"name" : "--output_falco",
"name" : "--output_sample_qc",
"must_exist" : true,
"create_parent" : true,
"required" : true,
@@ -3230,9 +3230,9 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/dataflow/combine_samples",
"viash_version" : "0.9.4",
"git_commit" : "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880",
"git_commit" : "6e71519815566a057711019a23a56a22479dd655",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-30-g219bd58"
"git_tag" : "v0.1.1-31-g6e71519"
},
"package_config" : {
"name" : "demultiplex",
@@ -3241,7 +3241,7 @@ meta = [
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-resources/demultiplex/v3",
"path" : "gs://viash-hub-resources/demultiplex/v4",
"dest" : "testData"
}
]
@@ -3291,12 +3291,12 @@ workflow run_wf {
// Gather the following state for all samples
def forward_fastqs = states.collect{it.forward_input}.flatten()
def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}.flatten()
def falco_dirs = states.collect{it.falco_dir}
def sample_qc_dirs = states.collect{it.sample_qc_dir}
def resultState = [
"output_forward": forward_fastqs,
"output_reverse": reverse_fastqs,
"output_falco": falco_dirs,
"output_sample_qc": sample_qc_dirs,
// The join ID is the same across all samples from the same run
"_meta": ["join_id": states[0]._meta.join_id]
]

View File

@@ -33,7 +33,7 @@
"description": "",
"help_text": "Type: `file`, multiple: `True`, direction: `input`. "
},
"falco_dir": {
"sample_qc_dir": {
"type": "string",
"format": "path",
"exists": true,
@@ -67,15 +67,15 @@
"help_text": "Type: `file`, multiple: `True`, default: `\"$id.$key.output_reverse_*\"`, direction: `output`. ",
"default": "$id.$key.output_reverse_*"
},
"output_falco": {
"output_sample_qc": {
"type": "array",
"items": {
"type": "string"
},
"format": "path",
"description": "",
"help_text": "Type: `file`, multiple: `True`, required, default: `\"$id.$key.output_falco_*\"`, direction: `output`. ",
"default": "$id.$key.output_falco_*"
"help_text": "Type: `file`, multiple: `True`, required, default: `\"$id.$key.output_sample_qc_*\"`, direction: `output`. ",
"default": "$id.$key.output_sample_qc_*"
}
}
},

View File

@@ -156,16 +156,16 @@ build_info:
output: "target/nextflow/dataflow/gather_fastqs_and_validate"
executable: "target/nextflow/dataflow/gather_fastqs_and_validate/main.nf"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
package_config:
name: "demultiplex"
version: "main"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -3227,9 +3227,9 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/dataflow/gather_fastqs_and_validate",
"viash_version" : "0.9.4",
"git_commit" : "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880",
"git_commit" : "6e71519815566a057711019a23a56a22479dd655",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-30-g219bd58"
"git_tag" : "v0.1.1-31-g6e71519"
},
"package_config" : {
"name" : "demultiplex",
@@ -3238,7 +3238,7 @@ meta = [
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-resources/demultiplex/v3",
"path" : "gs://viash-hub-resources/demultiplex/v4",
"dest" : "testData"
}
]

View File

@@ -62,8 +62,8 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_falco"
description: "Directory to write falco output to"
name: "--output_sample_qc"
description: "Directory to write FastQC output to"
info: null
default:
- "$id/qc/fastqc"
@@ -74,8 +74,8 @@ argument_groups:
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output_multiqc"
description: "Directory to write falco output to"
name: "--multiqc_output"
description: "Location where to write MultiQC output to"
info: null
default:
- "$id/qc/multiqc_report.html"
@@ -133,6 +133,10 @@ test_resources:
path: "test.nf"
is_executable: true
entrypoint: "test_bases2fastq"
- type: "nextflow_script"
path: "test.nf"
is_executable: true
entrypoint: "test_no_index"
info: null
status: "enabled"
scope:
@@ -164,7 +168,7 @@ dependencies:
type: "vsh"
repo: "biobox"
tag: "v0.3.1"
- name: "falco"
- name: "fastqc"
repository:
type: "vsh"
repo: "biobox"
@@ -258,9 +262,9 @@ build_info:
output: "target/nextflow/demultiplex"
executable: "target/nextflow/demultiplex/main.nf"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
dependencies:
- "target/nextflow/io/untar"
- "target/nextflow/dataflow/gather_fastqs_and_validate"
@@ -268,7 +272,7 @@ build_info:
- "target/nextflow/dataflow/combine_samples"
- "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert"
- "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq"
- "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco"
- "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc"
- "target/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc"
package_config:
name: "demultiplex"
@@ -276,7 +280,7 @@ package_config:
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -3101,8 +3101,8 @@ meta = [
},
{
"type" : "file",
"name" : "--output_falco",
"description" : "Directory to write falco output to",
"name" : "--output_sample_qc",
"description" : "Directory to write FastQC output to",
"default" : [
"$id/qc/fastqc"
],
@@ -3115,8 +3115,8 @@ meta = [
},
{
"type" : "file",
"name" : "--output_multiqc",
"description" : "Directory to write falco output to",
"name" : "--multiqc_output",
"description" : "Location where to write MultiQC output to",
"default" : [
"$id/qc/multiqc_report.html"
],
@@ -3193,6 +3193,12 @@ meta = [
"path" : "test.nf",
"is_executable" : true,
"entrypoint" : "test_bases2fastq"
},
{
"type" : "nextflow_script",
"path" : "test.nf",
"is_executable" : true,
"entrypoint" : "test_no_index"
}
],
"status" : "enabled",
@@ -3247,7 +3253,7 @@ meta = [
}
},
{
"name" : "falco",
"name" : "fastqc",
"repository" : {
"type" : "vsh",
"repo" : "biobox",
@@ -3363,9 +3369,9 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/demultiplex",
"viash_version" : "0.9.4",
"git_commit" : "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880",
"git_commit" : "6e71519815566a057711019a23a56a22479dd655",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-30-g219bd58"
"git_tag" : "v0.1.1-31-g6e71519"
},
"package_config" : {
"name" : "demultiplex",
@@ -3374,7 +3380,7 @@ meta = [
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-resources/demultiplex/v3",
"path" : "gs://viash-hub-resources/demultiplex/v4",
"dest" : "testData"
}
]
@@ -3412,7 +3418,7 @@ include { interop_summary_to_csv } from "${meta.resources_dir}/../../nextflow/io
include { combine_samples } from "${meta.resources_dir}/../../nextflow/dataflow/combine_samples/main.nf"
include { bcl_convert } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bcl_convert/main.nf"
include { bases2fastq } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/bases2fastq/main.nf"
include { falco } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/falco/main.nf"
include { fastqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/fastqc/main.nf"
include { multiqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.1/nextflow/multiqc/main.nf"
// inner workflow
@@ -3599,19 +3605,21 @@ workflow run_wf {
)
output_ch = samples_ch
| falco.run(
| fastqc.run(
directives: [label: ["verylowcpu", "lowmem"]],
fromState: {id, state ->
def output_base = "$id/qc/fastqc/*"
[
"input": [state.fastq_forward, state.fastq_reverse],
"outdir": "$id/qc/falco",
"summary_filename": null,
"report_filename": null,
"data_filename": null,
"html": "${output_base}_fastqc_report.html",
"summary": "${output_base}_summary.txt",
"data": "${output_base}_fastqc_data.txt",
]
},
toState: { id, result, state ->
state + [ "output_falco" : result.outdir ]
// The output directory for all files above is the same:
// take the directory from one of the files
state + [ "output_sample_qc": result.html[0].parent ]
}
)
@@ -3621,13 +3629,13 @@ workflow run_wf {
"id": state.run_id,
"forward_input": state.fastq_forward,
"reverse_input": state.fastq_reverse,
"falco_dir": state.output_falco,
"sample_qc_dir": state.output_sample_qc,
]
},
toState: [
"forward_fastqs": "output_forward",
"reverse_fastqs": "output_reverse",
"output_falco": "output_falco",
"output_sample_qc": "output_sample_qc",
]
)
@@ -3635,8 +3643,8 @@ workflow run_wf {
directives: [label: ["midcpu", "midmem"]],
fromState: {id, state ->
def new_state = [
"input": state.output_falco,
"output_report": state.output_multiqc,
"input": state.output_sample_qc,
"output_report": state.multiqc_output,
"cl_config": 'sp: {fastqc/data: {fn: "*_fastqc_data.txt"}}'
]
if (state.demultiplexer == "bclconvert") {
@@ -3648,7 +3656,7 @@ workflow run_wf {
return new_state
},
toState: { id, result, state ->
state + [ "output_multiqc" : result.output_report ]
state + [ "multiqc_output" : result.output_report ]
}
)
@@ -3656,8 +3664,8 @@ workflow run_wf {
[
//"_meta": "_meta",
"output": "output_demultiplexer",
"output_falco": "output_falco",
"output_multiqc": "output_multiqc",
"output_sample_qc": "output_sample_qc",
"multiqc_output": "multiqc_output",
"output_run_information": "run_information",
"demultiplexer_logs": "demultiplexer_logs"
]

View File

@@ -50,22 +50,22 @@
"help_text": "Type: `file`, multiple: `False`, default: `\"$id/fastq\"`, direction: `output`. ",
"default": "$id/fastq"
},
"output_falco": {
"output_sample_qc": {
"type": "array",
"items": {
"type": "string"
},
"format": "path",
"description": "Directory to write falco output to",
"description": "Directory to write FastQC output to",
"help_text": "Type: `file`, multiple: `True`, default: `[\"$id/qc/fastqc\"]`, direction: `output`. ",
"default": [
"$id/qc/fastqc"
]
},
"output_multiqc": {
"multiqc_output": {
"type": "string",
"format": "path",
"description": "Directory to write falco output to",
"description": "Location where to write MultiQC output to",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id/qc/multiqc_report.html\"`, direction: `output`. ",
"default": "$id/qc/multiqc_report.html"
},

View File

@@ -157,16 +157,16 @@ build_info:
output: "target/nextflow/io/interop_summary_to_csv"
executable: "target/nextflow/io/interop_summary_to_csv/main.nf"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
package_config:
name: "demultiplex"
version: "main"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -3228,9 +3228,9 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/io/interop_summary_to_csv",
"viash_version" : "0.9.4",
"git_commit" : "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880",
"git_commit" : "6e71519815566a057711019a23a56a22479dd655",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-30-g219bd58"
"git_tag" : "v0.1.1-31-g6e71519"
},
"package_config" : {
"name" : "demultiplex",
@@ -3239,7 +3239,7 @@ meta = [
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-resources/demultiplex/v3",
"path" : "gs://viash-hub-resources/demultiplex/v4",
"dest" : "testData"
}
]

View File

@@ -15,8 +15,8 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_falco"
description: "Directory to write falco output to"
name: "--input_sample_qc"
description: "Directory to write sample QC output to"
info: null
must_exist: true
create_parent: true
@@ -67,7 +67,7 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_falco"
name: "--output_sample_qc"
info: null
default:
- "qc/fastqc"
@@ -219,16 +219,16 @@ build_info:
output: "target/nextflow/io/publish"
executable: "target/nextflow/io/publish/main.nf"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
package_config:
name: "demultiplex"
version: "main"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -3050,8 +3050,8 @@ meta = [
},
{
"type" : "file",
"name" : "--input_falco",
"description" : "Directory to write falco output to",
"name" : "--input_sample_qc",
"description" : "Directory to write sample QC output to",
"must_exist" : true,
"create_parent" : true,
"required" : true,
@@ -3111,7 +3111,7 @@ meta = [
},
{
"type" : "file",
"name" : "--output_falco",
"name" : "--output_sample_qc",
"default" : [
"qc/fastqc"
],
@@ -3297,9 +3297,9 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/io/publish",
"viash_version" : "0.9.4",
"git_commit" : "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880",
"git_commit" : "6e71519815566a057711019a23a56a22479dd655",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-30-g219bd58"
"git_tag" : "v0.1.1-31-g6e71519"
},
"package_config" : {
"name" : "demultiplex",
@@ -3308,7 +3308,7 @@ meta = [
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-resources/demultiplex/v3",
"path" : "gs://viash-hub-resources/demultiplex/v4",
"dest" : "testData"
}
]
@@ -3350,12 +3350,12 @@ cat > "$tempscript" << VIASHMAIN
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_FALCO+x} ]; then echo "${VIASH_PAR_INPUT_FALCO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_falco='&'#" ; else echo "# par_input_falco="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_SAMPLE_QC+x} ]; then echo "${VIASH_PAR_INPUT_SAMPLE_QC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_sample_qc='&'#" ; else echo "# par_input_sample_qc="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_MULTIQC+x} ]; then echo "${VIASH_PAR_INPUT_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_multiqc='&'#" ; else echo "# par_input_multiqc="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_RUN_INFORMATION+x} ]; then echo "${VIASH_PAR_INPUT_RUN_INFORMATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_run_information='&'#" ; else echo "# par_input_run_information="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS+x} ]; then echo "${VIASH_PAR_INPUT_DEMULTIPLEXER_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_demultiplexer_logs='&'#" ; else echo "# par_input_demultiplexer_logs="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_FALCO+x} ]; then echo "${VIASH_PAR_OUTPUT_FALCO}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_falco='&'#" ; else echo "# par_output_falco="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_SAMPLE_QC+x} ]; then echo "${VIASH_PAR_OUTPUT_SAMPLE_QC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_sample_qc='&'#" ; else echo "# par_output_sample_qc="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_MULTIQC+x} ]; then echo "${VIASH_PAR_OUTPUT_MULTIQC}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_multiqc='&'#" ; else echo "# par_output_multiqc="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_RUN_INFORMATION+x} ]; then echo "${VIASH_PAR_OUTPUT_RUN_INFORMATION}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_run_information='&'#" ; else echo "# par_output_run_information="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS+x} ]; then echo "${VIASH_PAR_OUTPUT_DEMULTIPLEXER_LOGS}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output_demultiplexer_logs='&'#" ; else echo "# par_output_demultiplexer_logs="; fi )
@@ -3406,12 +3406,12 @@ do
ls "\\$output_location"
done
echo "Grouping output from \\$par_input_falco into \\$par_output_falco"
mkdir -p "\\$par_output_falco"
IFS=";" read -ra falco_inputs <<< \\$par_input_falco
for falco_dir in "\\${falco_inputs[@]}"; do
echo "Copying contents of \\$falco_dir"
find -H -D exec "\\$falco_dir" -type f -maxdepth 1 -exec cp -t "\\$par_output_falco" {} +
echo "Grouping output from \\$par_input_sample_qc into \\$par_output_sample_qc"
mkdir -p "\\$par_output_sample_qc"
IFS=";" read -ra sample_qc_inputs <<< \\$par_input_sample_qc
for qc_dir in "\\${sample_qc_inputs[@]}"; do
echo "Copying contents of \\$qc_dir"
find -H -D exec "\\$qc_dir" -type f -maxdepth 1 -exec cp -t "\\$par_output_sample_qc" {} +
done
VIASHMAIN
bash "$tempscript"

View File

@@ -16,14 +16,14 @@
"description": "Directory to write fastq data to",
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`. "
},
"input_falco": {
"input_sample_qc": {
"type": "array",
"items": {
"type": "string"
},
"format": "path",
"exists": true,
"description": "Directory to write falco output to",
"description": "Directory to write sample QC output to",
"help_text": "Type: `file`, multiple: `True`, required, direction: `input`. "
},
"input_multiqc": {
@@ -61,7 +61,7 @@
"help_text": "Type: `file`, multiple: `False`, default: `\"fastq\"`, direction: `output`. ",
"default": "fastq"
},
"output_falco": {
"output_sample_qc": {
"type": "string",
"format": "path",
"description": "",

View File

@@ -156,16 +156,16 @@ build_info:
output: "target/nextflow/io/untar"
executable: "target/nextflow/io/untar/main.nf"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
package_config:
name: "demultiplex"
version: "main"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -3227,9 +3227,9 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/io/untar",
"viash_version" : "0.9.4",
"git_commit" : "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880",
"git_commit" : "6e71519815566a057711019a23a56a22479dd655",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-30-g219bd58"
"git_tag" : "v0.1.1-31-g6e71519"
},
"package_config" : {
"name" : "demultiplex",
@@ -3238,7 +3238,7 @@ meta = [
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-resources/demultiplex/v3",
"path" : "gs://viash-hub-resources/demultiplex/v4",
"dest" : "testData"
}
]

View File

@@ -63,7 +63,7 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--falco_output"
name: "--sample_qc_output"
info: null
default:
- "qc/fastqc"
@@ -112,6 +112,11 @@ resources:
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Runner for demultiplexing of raw sequencing data"
test_resources:
- type: "nextflow_script"
path: "test.nf"
is_executable: true
entrypoint: "test"
info: null
status: "enabled"
scope:
@@ -206,9 +211,9 @@ build_info:
output: "target/nextflow/runner"
executable: "target/nextflow/runner/main.nf"
viash_version: "0.9.4"
git_commit: "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880"
git_commit: "6e71519815566a057711019a23a56a22479dd655"
git_remote: "https://github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-30-g219bd58"
git_tag: "v0.1.1-31-g6e71519"
dependencies:
- "target/nextflow/demultiplex"
- "target/nextflow/io/publish"
@@ -218,7 +223,7 @@ package_config:
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-resources/demultiplex/v3"
- path: "gs://viash-hub-resources/demultiplex/v4"
dest: "testData"
viash_version: "0.9.4"
source: "src"

View File

@@ -3102,7 +3102,7 @@ meta = [
},
{
"type" : "file",
"name" : "--falco_output",
"name" : "--sample_qc_output",
"default" : [
"qc/fastqc"
],
@@ -3167,6 +3167,14 @@ meta = [
}
],
"description" : "Runner for demultiplexing of raw sequencing data",
"test_resources" : [
{
"type" : "nextflow_script",
"path" : "test.nf",
"is_executable" : true,
"entrypoint" : "test"
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
@@ -3283,9 +3291,9 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/runner",
"viash_version" : "0.9.4",
"git_commit" : "219bd5816e7e6a6c80b4d6f6c1d21b3ffe53c880",
"git_commit" : "6e71519815566a057711019a23a56a22479dd655",
"git_remote" : "https://github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-30-g219bd58"
"git_tag" : "v0.1.1-31-g6e71519"
},
"package_config" : {
"name" : "demultiplex",
@@ -3294,7 +3302,7 @@ meta = [
"info" : {
"test_resources" : [
{
"path" : "gs://viash-hub-resources/demultiplex/v3",
"path" : "gs://viash-hub-resources/demultiplex/v4",
"dest" : "testData"
}
]
@@ -3342,6 +3350,17 @@ workflow run_wf {
main:
output_ch = input_ch
| map { id, state ->
// The argument names for this workflow and the demultiplex workflow may overlap
// here, we store a copy in order to make sure to not accidentally overwrite the state.
def new_state = state + [
"fastq_output_workflow": state.fastq_output,
"multiqc_output_workflow": state.multiqc_output,
"sample_qc_output_workflow": state.sample_qc_output,
"demultiplexer_logs_workflow": state.demultiplexer_logs,
]
return [id, new_state]
}
// Extract the ID from the input.
// If the input is a tarball, strip the suffix.
| map{ id, state ->
@@ -3359,8 +3378,8 @@ workflow run_wf {
"demultiplexer": state.demultiplexer,
"skip_copycomplete_check": state.skip_copycomplete_check,
"output": "$id/fastq",
"output_falco": "$id/qc/fastqc",
"output_multiqc": "$id/qc/multiqc_report.html",
"output_sample_qc": "$id/qc/fastqc",
"multiqc_output": "$id/qc/multiqc_report.html",
"demultiplexer_logs": "$id/demultiplexer_logs",
]
if (state.run_information) {
@@ -3378,26 +3397,25 @@ workflow run_wf {
def id1 = (state.plain_output) ? id : "${state.run_id}/${date}"
def id2 = (state.plain_output) ? id : "${id1}_demultiplex_${version}"
def fastq_output_1 = (id2 == "run") ? state.fastq_output : "${id2}/" + state.fastq_output
def falco_output_1 = (id2 == "run") ? state.falco_output : "${id2}/" + state.falco_output
def multiqc_output_1 = (id2 == "run") ? state.multiqc_output : "${id2}/" + state.multiqc_output
def run_information_output_1 = (id2 == "run") ? "${state.output_run_information.getName()}" : "${id2}/${state.output_run_information.getName()}"
def demultiplexer_logs_output = (id2 == "run") ? state.demultiplexer_logs : "${id2}/${state.demultiplexer_logs.getName()}"
def prefix = (id2 == "run") ? "" : "${id2}/"
// These output names are determined by arguments.
def fastq_output_1 = "${prefix}${state.fastq_output_workflow}"
def sample_qc_output_1 = "${prefix}${state.sample_qc_output_workflow}"
def multiqc_output_1 = "${prefix}${state.multiqc_output_workflow}"
def demultiplexer_logs_output = "${prefix}${state.demultiplexer_logs_workflow}"
// The name of the output file for the run information is determined by the input file name.
def run_information_output_1 = "${prefix}${state.output_run_information.getName()}"
if (id2 == "run") {
println("Publising to ${params.publish_dir}")
} else {
println("Publising to ${params.publish_dir}/${id2}")
}
println("Publising to ${params.publish_dir}/${prefix}")
[
input: state.output,
input_falco: state.output_falco,
input_multiqc: state.output_multiqc,
input_sample_qc: state.output_sample_qc,
input_multiqc: state.multiqc_output,
input_run_information: state.output_run_information,
input_demultiplexer_logs: state.demultiplexer_logs,
output: fastq_output_1,
output_falco: falco_output_1,
output_sample_qc: sample_qc_output_1,
output_multiqc: multiqc_output_1,
output_run_information: run_information_output_1,
output_demultiplexer_logs: demultiplexer_logs_output,
@@ -3417,9 +3435,14 @@ workflow run_wf {
output_ch
}
def get_version(inputFile) {
def get_version(input) {
def inputFile = file(input)
if (!inputFile.exists()) {
// When executing tests
return "unknown_version"
}
def yamlSlurper = new groovy.yaml.YamlSlurper()
def loaded_viash_config = yamlSlurper.parse(file(inputFile))
def loaded_viash_config = yamlSlurper.parse(inputFile)
def version = (loaded_viash_config.version) ? loaded_viash_config.version : "unknown_version"
println("Version to be used: ${version}")
return version

View File

@@ -58,7 +58,7 @@
"help_text": "Type: `file`, multiple: `False`, default: `\"fastq\"`, direction: `output`. ",
"default": "fastq"
},
"falco_output": {
"sample_qc_output": {
"type": "string",
"format": "path",
"description": "",