Build branch outflow with version outflow (850924d)

Build pipeline: viash-hub.demultiplex.outflow-sk9tm

Source commit: 850924de0c

Source message: Fixes
This commit is contained in:
CI
2024-12-10 14:42:05 +00:00
parent 19858ae245
commit d1913fc8b9
60 changed files with 15064 additions and 327 deletions

View File

@@ -19,7 +19,7 @@ argument_groups:
create_parent: true
required: true
direction: "input"
multiple: false
multiple: true
multiple_sep: ";"
- type: "file"
name: "--reverse_input"
@@ -28,7 +28,7 @@ argument_groups:
create_parent: true
required: false
direction: "input"
multiple: false
multiple: true
multiple_sep: ";"
- name: "Output arguments"
arguments:
@@ -139,9 +139,9 @@ build_info:
output: "target/nextflow/dataflow/combine_samples"
executable: "target/nextflow/dataflow/combine_samples/main.nf"
viash_version: "0.9.0"
git_commit: "e616759998e9686e363aedef2987bf06cae8207d"
git_remote: "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-7-ge616759"
git_commit: "850924de0ce1af49300d7f0b3dbe620e4f97c916"
git_remote: "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-16-g850924d"
package_config:
name: "demultiplex"
version: "outflow"

View File

@@ -2827,7 +2827,7 @@ meta = [
"create_parent" : true,
"required" : true,
"direction" : "input",
"multiple" : false,
"multiple" : true,
"multiple_sep" : ";"
},
{
@@ -2837,7 +2837,7 @@ meta = [
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple" : true,
"multiple_sep" : ";"
}
]
@@ -2972,9 +2972,9 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/dataflow/combine_samples",
"viash_version" : "0.9.0",
"git_commit" : "e616759998e9686e363aedef2987bf06cae8207d",
"git_remote" : "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-7-ge616759"
"git_commit" : "850924de0ce1af49300d7f0b3dbe620e4f97c916",
"git_remote" : "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-16-g850924d"
},
"package_config" : {
"name" : "demultiplex",
@@ -3031,8 +3031,8 @@ workflow run_wf {
| groupTuple(by: 0, sort: "hash")
| map {run_id, states ->
// Gather the following state for all samples
def forward_fastqs = states.collect{it.forward_input}
def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}
def forward_fastqs = states.collect{it.forward_input}.flatten()
def reverse_fastqs = states.collect{it.reverse_input}.findAll{it != null}.flatten()
def resultState = [
"output_forward": forward_fastqs,

View File

@@ -27,8 +27,8 @@
"forward_input": {
"type":
"string",
"description": "Type: `file`, required. ",
"help_text": "Type: `file`, required. "
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. ",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. "
}
@@ -37,8 +37,8 @@
"reverse_input": {
"type":
"string",
"description": "Type: `file`. ",
"help_text": "Type: `file`. "
"description": "Type: List of `file`, multiple_sep: `\";\"`. ",
"help_text": "Type: List of `file`, multiple_sep: `\";\"`. "
}

View File

@@ -33,7 +33,7 @@ argument_groups:
create_parent: true
required: true
direction: "output"
multiple: false
multiple: true
multiple_sep: ";"
- type: "file"
name: "--fastq_reverse"
@@ -42,7 +42,7 @@ argument_groups:
create_parent: true
required: false
direction: "output"
multiple: false
multiple: true
multiple_sep: ";"
resources:
- type: "nextflow_script"
@@ -133,9 +133,9 @@ build_info:
output: "target/nextflow/dataflow/gather_fastqs_and_validate"
executable: "target/nextflow/dataflow/gather_fastqs_and_validate/main.nf"
viash_version: "0.9.0"
git_commit: "e616759998e9686e363aedef2987bf06cae8207d"
git_remote: "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-7-ge616759"
git_commit: "850924de0ce1af49300d7f0b3dbe620e4f97c916"
git_remote: "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-16-g850924d"
package_config:
name: "demultiplex"
version: "outflow"

View File

@@ -2845,7 +2845,7 @@ meta = [
"create_parent" : true,
"required" : true,
"direction" : "output",
"multiple" : false,
"multiple" : true,
"multiple_sep" : ";"
},
{
@@ -2855,7 +2855,7 @@ meta = [
"create_parent" : true,
"required" : false,
"direction" : "output",
"multiple" : false,
"multiple" : true,
"multiple_sep" : ";"
}
]
@@ -2965,9 +2965,9 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/dataflow/gather_fastqs_and_validate",
"viash_version" : "0.9.0",
"git_commit" : "e616759998e9686e363aedef2987bf06cae8207d",
"git_remote" : "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-7-ge616759"
"git_commit" : "850924de0ce1af49300d7f0b3dbe620e4f97c916",
"git_remote" : "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-16-g850924d"
},
"package_config" : {
"name" : "demultiplex",
@@ -3027,9 +3027,11 @@ workflow run_wf {
def original_id = id
// Parse sample sheet for sample IDs
println "Processing run information file ${sample_sheet}"
csv_lines = sample_sheet.splitCsv(header: false, sep: ',')
csv_lines.any { csv_items ->
if (csv_items.isEmpty()) {
// skip empty line
return
}
def possible_header = csv_items[0]
@@ -3037,22 +3039,40 @@ workflow run_wf {
if (header) {
if (start_parsing) {
// Stop parsing when encountering the next header
println "Encountered next header '[${start_parsing}]', stopping parsing."
return true
}
if (header == "Data") {
// [Data] for illumina
// [Samples] for Element Biosciences
if (header in ["Data", "Samples"]) {
println "Found header [${header}], start parsing."
start_parsing = true
return
}
}
if (start_parsing) {
if ( !sample_id_column_index ) {
sample_id_column_index = csv_items.findIndexValues{it == "Sample_ID"}
assert sample_id_column_index != -1:
"Could not find column 'Sample_ID' in sample sheet!"
if ( sample_id_column_index == null) {
println "Looking for sample name column."
sample_id_column_index = csv_items.findIndexValues{it == "Sample_ID" || it == "SampleName"}
assert (!sample_id_column_index.isEmpty()):
"Could not find column 'Sample_ID' (Illumina) or 'SampleName' " +
"(Element Biosciences) in run information! Found: ${sample_id_column_index}"
assert sample_id_column_index.size() == 1, "Expected run information file to contain " +
"a column 'Sample_ID' or 'SampleName', not both. Found: ${sample_id_column_index}"
sample_id_column_index = sample_id_column_index[0]
println "Found sample names column '${csv_items[sample_id_column_index]}'."
return
}
samples += csv_items[sample_id_column_index]
}
// This return is important! (If 'true' is returned, the parsing stops.)
return
}
assert start_parsing:
"Sample information file does not contain [Data] or [Samples] header!"
assert samples.size() > 1:
"Sample information file does not seem to contain any information about the samples!"
println "Finished processing run information file, found samples: ${samples}."
println "Looking for fastq files in ${state.input}."
def allfastqs = state.input.listFiles().findAll{it.isFile() && it.name ==~ /^.+\.fastq.gz$/}
println "Found ${allfastqs.size()} fastq files, matching them to the following samples: ${samples}."
@@ -3061,17 +3081,15 @@ workflow run_wf {
def reverse_regex = ~/^${sample_id}_S(\d+)_(L(\d+)_)?R2_(\d+)\.fastq\.gz$/
def forward_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ forward_regex}
def reverse_fastq = state.input.listFiles().findAll{it.isFile() && it.name ==~ reverse_regex}
assert forward_fastq : "No forward fastq files were found for sample ${sample_id}"
assert forward_fastq.size() < 2:
"Found multiple forward fastq files corresponding to sample ${sample_id}: ${forward_fastq}"
assert reverse_fastq.size() < 2:
"Found multiple reverse fastq files corresponding to sample ${sample_id}: ${reverse_fastq}."
assert !forward_fastq.isEmpty():
"Expected a forward fastq file to have been created correspondig to sample ${sample_id}."
// TODO: if one sample had reverse reads, the others must as well.
reverse_fastq = !reverse_fastq.isEmpty() ? reverse_fastq[0] : null
assert forward_fastq && !forward_fastq.isEmpty(): "No forward fastq files were found for sample ${sample_id}. " +
"All fastq files in directory: ${allfastqs.collect{it.name}}"
assert (reverse_fastq.isEmpty() || (forward_fastq.size() == reverse_fastq.size())):
"Expected equal number of forward and reverse fastq files for sample ${sample_id}. " +
"Found forward: ${forward_fastq} and reverse: ${reverse_fastq}."
println "Found ${forward_fastq.size()} forward and ${reverse_fastq.size()} reverse " +
"fastq files for sample ${sample_id}"
def fastqs_state = [
"fastq_forward": forward_fastq[0],
"fastq_forward": forward_fastq,
"fastq_reverse": reverse_fastq,
"_meta": [ "join_id": original_id ],
]

View File

@@ -47,10 +47,10 @@
"fastq_forward": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.fastq_forward.fastq_forward`. ",
"help_text": "Type: `file`, required, default: `$id.$key.fastq_forward.fastq_forward`. "
"description": "Type: List of `file`, required, default: `$id.$key.fastq_forward_*.fastq_forward_*`, multiple_sep: `\";\"`. ",
"help_text": "Type: List of `file`, required, default: `$id.$key.fastq_forward_*.fastq_forward_*`, multiple_sep: `\";\"`. "
,
"default":"$id.$key.fastq_forward.fastq_forward"
"default":"$id.$key.fastq_forward_*.fastq_forward_*"
}
@@ -58,10 +58,10 @@
"fastq_reverse": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.fastq_reverse.fastq_reverse`. ",
"help_text": "Type: `file`, default: `$id.$key.fastq_reverse.fastq_reverse`. "
"description": "Type: List of `file`, default: `$id.$key.fastq_reverse_*.fastq_reverse_*`, multiple_sep: `\";\"`. ",
"help_text": "Type: List of `file`, default: `$id.$key.fastq_reverse_*.fastq_reverse_*`, multiple_sep: `\";\"`. "
,
"default":"$id.$key.fastq_reverse.fastq_reverse"
"default":"$id.$key.fastq_reverse_*.fastq_reverse_*"
}

View File

@@ -3,6 +3,14 @@ version: "outflow"
argument_groups:
- name: "Input arguments"
arguments:
- type: "string"
name: "--id"
description: "Unique identifier for the run"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
description: "Directory containing raw sequencing data"
@@ -14,9 +22,11 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "file"
name: "--sample_sheet"
description: "Sample sheet as input for BCL Convert. If not specified,\nwill try\
\ to autodetect the sample sheet in the input directory\n"
name: "--run_information"
description: "CSV file containing sample information, which will be used as \n\
input for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina)\n\
or 'RunManifest.csv' (Element Biosciences). If not specified,\nwill try to autodetect\
\ the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n"
info: null
must_exist: true
create_parent: true
@@ -24,6 +34,19 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--demultiplexer"
description: "Demultiplexer to use, choice depends on the provider\nof the instrument\
\ that was used to generate the data.\nWhen not using --sample_sheet, specifying\
\ this argument is not\nrequired.\n"
info: null
required: false
choices:
- "bases2fastq"
- "bclconvert"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
@@ -72,7 +95,11 @@ test_resources:
- type: "nextflow_script"
path: "test.nf"
is_executable: true
entrypoint: "test_wf"
entrypoint: "test_illumina"
- type: "nextflow_script"
path: "test.nf"
is_executable: true
entrypoint: "test_bases2fastq"
info: null
status: "enabled"
requirements:
@@ -95,22 +122,27 @@ dependencies:
repository:
type: "vsh"
repo: "biobox"
tag: "v0.2.0"
tag: "v0.3.0"
- name: "bases2fastq"
repository:
type: "vsh"
repo: "biobox"
tag: "v0.3.0"
- name: "falco"
repository:
type: "vsh"
repo: "biobox"
tag: "v0.2.0"
tag: "v0.3.0"
- name: "multiqc"
repository:
type: "vsh"
repo: "biobox"
tag: "v0.2.0"
tag: "v0.3.0"
repositories:
- type: "vsh"
name: "bb"
repo: "biobox"
tag: "v0.2.0"
tag: "v0.3.0"
license: "MIT"
links:
repository: "https://github.com/viash-hub/demultiplex"
@@ -188,17 +220,18 @@ build_info:
output: "target/nextflow/demultiplex"
executable: "target/nextflow/demultiplex/main.nf"
viash_version: "0.9.0"
git_commit: "e616759998e9686e363aedef2987bf06cae8207d"
git_remote: "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-7-ge616759"
git_commit: "850924de0ce1af49300d7f0b3dbe620e4f97c916"
git_remote: "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-16-g850924d"
dependencies:
- "target/nextflow/io/untar"
- "target/nextflow/dataflow/gather_fastqs_and_validate"
- "target/nextflow/io/interop_summary_to_csv"
- "target/nextflow/dataflow/combine_samples"
- "target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/bcl_convert"
- "target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/falco"
- "target/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/multiqc"
- "target/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/bcl_convert"
- "target/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/bases2fastq"
- "target/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/falco"
- "target/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/multiqc"
package_config:
name: "demultiplex"
version: "outflow"

View File

@@ -2810,6 +2810,15 @@ meta = [
{
"name" : "Input arguments",
"arguments" : [
{
"type" : "string",
"name" : "--id",
"description" : "Unique identifier for the run",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--input",
@@ -2823,14 +2832,27 @@ meta = [
},
{
"type" : "file",
"name" : "--sample_sheet",
"description" : "Sample sheet as input for BCL Convert. If not specified,\nwill try to autodetect the sample sheet in the input directory\n",
"name" : "--run_information",
"description" : "CSV file containing sample information, which will be used as \ninput for the demultiplexer. Canonically called 'SampleSheet.csv' (Illumina)\nor 'RunManifest.csv' (Element Biosciences). If not specified,\nwill try to autodetect the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n",
"must_exist" : true,
"create_parent" : true,
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--demultiplexer",
"description" : "Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data.\nWhen not using --sample_sheet, specifying this argument is not\nrequired.\n",
"required" : false,
"choices" : [
"bases2fastq",
"bclconvert"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
@@ -2896,7 +2918,13 @@ meta = [
"type" : "nextflow_script",
"path" : "test.nf",
"is_executable" : true,
"entrypoint" : "test_wf"
"entrypoint" : "test_illumina"
},
{
"type" : "nextflow_script",
"path" : "test.nf",
"is_executable" : true,
"entrypoint" : "test_bases2fastq"
}
],
"status" : "enabled",
@@ -2935,7 +2963,15 @@ meta = [
"repository" : {
"type" : "vsh",
"repo" : "biobox",
"tag" : "v0.2.0"
"tag" : "v0.3.0"
}
},
{
"name" : "bases2fastq",
"repository" : {
"type" : "vsh",
"repo" : "biobox",
"tag" : "v0.3.0"
}
},
{
@@ -2943,7 +2979,7 @@ meta = [
"repository" : {
"type" : "vsh",
"repo" : "biobox",
"tag" : "v0.2.0"
"tag" : "v0.3.0"
}
},
{
@@ -2951,7 +2987,7 @@ meta = [
"repository" : {
"type" : "vsh",
"repo" : "biobox",
"tag" : "v0.2.0"
"tag" : "v0.3.0"
}
}
],
@@ -2960,7 +2996,7 @@ meta = [
"type" : "vsh",
"name" : "bb",
"repo" : "biobox",
"tag" : "v0.2.0"
"tag" : "v0.3.0"
}
],
"license" : "MIT",
@@ -3052,9 +3088,9 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/demultiplex",
"viash_version" : "0.9.0",
"git_commit" : "e616759998e9686e363aedef2987bf06cae8207d",
"git_remote" : "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-7-ge616759"
"git_commit" : "850924de0ce1af49300d7f0b3dbe620e4f97c916",
"git_remote" : "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-16-g850924d"
},
"package_config" : {
"name" : "demultiplex",
@@ -3099,22 +3135,23 @@ include { untar } from "${meta.resources_dir}/../../nextflow/io/untar/main.nf"
include { gather_fastqs_and_validate } from "${meta.resources_dir}/../../nextflow/dataflow/gather_fastqs_and_validate/main.nf"
include { interop_summary_to_csv } from "${meta.resources_dir}/../../nextflow/io/interop_summary_to_csv/main.nf"
include { combine_samples } from "${meta.resources_dir}/../../nextflow/dataflow/combine_samples/main.nf"
include { bcl_convert } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/bcl_convert/main.nf"
include { falco } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/falco/main.nf"
include { multiqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.2.0/nextflow/multiqc/main.nf"
include { bcl_convert } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/bcl_convert/main.nf"
include { bases2fastq } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/bases2fastq/main.nf"
include { falco } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/falco/main.nf"
include { multiqc } from "${meta.root_dir}/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/multiqc/main.nf"
// inner workflow
// user-provided Nextflow code
def date = new Date().format('yyyyMMdd_hhmmss')
workflow run_wf {
take:
input_ch
main:
samples_ch = input_ch
// untar input if needed
| untar.run(
directives: [label: ["lowmem", "lowcpu"]],
runIf: {id, state ->
def inputStr = state.input.toString()
inputStr.endsWith(".tar.gz") || \
@@ -3131,22 +3168,80 @@ workflow run_wf {
// Gather input files from folder
| map {id, state ->
def newState = [:]
if (!state.sample_sheet) {
def sample_sheet = state.input.resolve("SampleSheet.csv")
assert (sample_sheet && sample_sheet.isFile()): "Could not find 'SampleSheet.csv' file in input directory."
newState["sample_sheet"] = sample_sheet
println("Provided run information: ${state.run_information} and demultiplexer: ${state.demultiplexer}")
if (!state.run_information) {
println("Run information was not specified, auto-detecting...")
// The supported_platforms hashmap must be a 1-on-1 mapping
// Also, it's keys must be present in the 'choices' field
// for the 'run_information' argument in the viash config.
def supported_platforms = [
"bclconvert": "SampleSheet.csv", // Illumina
"bases2fastq": "RunManifest.csv" // Element Biosciences
]
def found_sample_information = supported_platforms.collectEntries{demultiplexer, filename ->
println("Checking if ${filename} can be found in input folder ${state.input}.")
def resolved_filename = state.input.resolve(filename)
if (!resolved_filename.isFile()) {
resolved_filename = null
}
println("Result after looking for run information for ${demultiplexer}: ${resolved_filename}.")
[demultiplexer, resolved_filename]
}
def demultiplexer = null
def run_information = null
found_sample_information.each{demultiplexer_candidate, file_path ->
if (file_path) {
// At this point, a candicate run information file was found.
assert !run_information: "Autodetection of run information " +
"(SampleSheet, RunManifest) failed: " +
"multiple candidate files found in input folder. " +
"Please specify one using --run_information."
run_information = file_path
demultiplexer = demultiplexer_candidate
}
}
// When autodetecting, the run information should have been found
assert run_information: "No run information file (SampleSheet, RunManifest) " +
"found in input directory."
// When autodetecting, the demultiplexer must be set if the run information was found
assert demultiplexer, "State error: the demultiplexer should have been autodetected. " +
"Please report this as a bug."
// When autodetecting, the found demultiplexer must match
// with the demultiplexer that the user has provided (in case it was provided).
if (state.demultiplexer) {
assert state.demultiplexer == demultiplexer,
"Requested to use demultiplexer ${state.demultiplexer} " +
"but demultiplexer based on the autodetected run information "
"file ${run_information} seems to indicate that the demultiplexer "
"should be ${demultiplexer}. Either avoid specifying the demultiplexer "
"or override the autodetection of the run information by providing "
"the file."
}
println("Using run information ${run_information} and demultiplexer ${demultiplexer}")
// At this point, the autodetected state can override the user provided state.
newState = newState + [
"run_information": run_information,
"demultiplexer": demultiplexer,
]
}
// Do not add InterOp to state because we generate the summary csv's in the next
// step based on the run dir, not the InterOp dir.
def interop_dir = state.input.resolve("InterOp")
assert interop_dir.isDirectory(): "Expected InterOp directory to be present."
if (newState.demultiplexer in ["bclconvert"]) {
// Do not add InterOp to state because we generate the summary csv's in the next
// step based on the run dir, not the InterOp dir.
def interop_dir = state.input.resolve("InterOp")
assert interop_dir.isDirectory(): "Expected InterOp directory to be present."
}
def resultState = state + newState
[id, resultState]
}
| interop_summary_to_csv.run(
runIf: {id, state -> state.demultiplexer in ["bclconvert"]},
directives: [label: ["lowmem", "verylowcpu"]],
fromState: [
"input": "input",
],
@@ -3157,10 +3252,12 @@ workflow run_wf {
)
// run bcl_convert
| bcl_convert.run(
runIf: {id, state -> state.demultiplexer in ["bclconvert"]},
directives: [label: ["highmem", "midcpu"]],
fromState: { id, state ->
[
bcl_input_directory: state.input,
sample_sheet: state.sample_sheet,
sample_sheet: state.run_information,
output_directory: state.output,
reports: "reports",
logs: "logs"
@@ -3168,8 +3265,31 @@ workflow run_wf {
},
toState: {id, result, state ->
def toAdd = [
"output_bclconvert" : result.output_directory,
"bclconvert_reports": result.reports,
"output_demultiplexer" : result.output_directory,
"run_id": id,
]
def newState = state + toAdd
return newState
}
)
// run bases2fastq
| bases2fastq.run(
runIf: {id, state -> state.demultiplexer in ["bases2fastq"]},
directives: [label: ["highmem", "midcpu"]],
fromState: [
"analysis_directory": "input",
"run_manifest": "run_information",
"output_directory": "output",
],
args: [
"no_projects": true, // Do not put output files in a subfolder for project
//"split_lanes": true,
"legacy_fastq": true, // Illumina style output names
"group_fastq": true, // No subdir per sample
],
toState: {id, result, state ->
def toAdd = [
"output_demultiplexer" : result.output_directory,
"run_id": id,
]
def newState = state + toAdd
@@ -3178,8 +3298,8 @@ workflow run_wf {
)
| gather_fastqs_and_validate.run(
fromState: [
"input": "output_bclconvert",
"sample_sheet": "sample_sheet",
"input": "output_demultiplexer",
"sample_sheet": "run_information",
],
toState: [
"fastq_forward": "fastq_forward",
@@ -3188,6 +3308,8 @@ workflow run_wf {
)
output_ch = samples_ch
| combine_samples.run(
fromState: { id, state ->
[
@@ -3202,6 +3324,7 @@ workflow run_wf {
]
)
| falco.run(
directives: [label: ["lowcpu", "lowmem"]],
fromState: {id, state ->
reverse_fastqs_list = state.reverse_fastqs ? state.reverse_fastqs : []
[
@@ -3217,18 +3340,20 @@ workflow run_wf {
}
)
| multiqc.run(
directives: [label: ["midcpu", "midmem"]],
fromState: {id, state ->
[
"input": [
state.output_falco,
def new_state = [
"input": [state.output_falco],
"output_report": state.output_multiqc,
"cl_config": 'sp: {fastqc/data: {fn: "*_fastqc_data.txt"}}'
]
if (state.demultiplexer == "bclconvert") {
new_state["input"] += [
state.interop_run_summary.getParent(),
state.interop_index_summary.getParent()
],
"output_report": state.output_multiqc,
"output_data": null,
"output_plots": null,
"cl_config": 'sp: {fastqc/data: {fn: "*_fastqc_data.txt"}}',
]
]
}
return new_state
},
toState: { id, result, state ->
state + [ "output_multiqc" : result.output_report ]
@@ -3236,16 +3361,12 @@ workflow run_wf {
)
| setState(
[
"output": "output_bclconvert",
//"_meta": "_meta",
"output": "output_demultiplexer",
"output_falco": "output_falco",
"output_multiqc": "output_multiqc"
]
)
| map{ id, state ->
def newId = "${date}/".toString()
[ newId, state + [ _meta: [ join_id: id ] ] ]
}
| niceView()
emit:
output_ch

View File

@@ -14,6 +14,16 @@
"properties": {
"id": {
"type":
"string",
"description": "Type: `string`. Unique identifier for the run",
"help_text": "Type: `string`. Unique identifier for the run"
}
,
"input": {
"type":
"string",
@@ -24,11 +34,23 @@
,
"sample_sheet": {
"run_information": {
"type":
"string",
"description": "Type: `file`. Sample sheet as input for BCL Convert",
"help_text": "Type: `file`. Sample sheet as input for BCL Convert. If not specified,\nwill try to autodetect the sample sheet in the input directory\n"
"description": "Type: `file`. CSV file containing sample information, which will be used as \ninput for the demultiplexer",
"help_text": "Type: `file`. CSV file containing sample information, which will be used as \ninput for the demultiplexer. Canonically called \u0027SampleSheet.csv\u0027 (Illumina)\nor \u0027RunManifest.csv\u0027 (Element Biosciences). If not specified,\nwill try to autodetect the sample sheet in the input directory.\nRequires --demultiplexer to be set.\n"
}
,
"demultiplexer": {
"type":
"string",
"description": "Type: `string`, choices: ``bases2fastq`, `bclconvert``. Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data",
"help_text": "Type: `string`, choices: ``bases2fastq`, `bclconvert``. Demultiplexer to use, choice depends on the provider\nof the instrument that was used to generate the data.\nWhen not using --sample_sheet, specifying this argument is not\nrequired.\n",
"enum": ["bases2fastq", "bclconvert"]
}

View File

@@ -141,9 +141,9 @@ build_info:
output: "target/nextflow/io/interop_summary_to_csv"
executable: "target/nextflow/io/interop_summary_to_csv/main.nf"
viash_version: "0.9.0"
git_commit: "e616759998e9686e363aedef2987bf06cae8207d"
git_remote: "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-7-ge616759"
git_commit: "850924de0ce1af49300d7f0b3dbe620e4f97c916"
git_remote: "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-16-g850924d"
package_config:
name: "demultiplex"
version: "outflow"

View File

@@ -2977,9 +2977,9 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/io/interop_summary_to_csv",
"viash_version" : "0.9.0",
"git_commit" : "e616759998e9686e363aedef2987bf06cae8207d",
"git_remote" : "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-7-ge616759"
"git_commit" : "850924de0ce1af49300d7f0b3dbe620e4f97c916",
"git_remote" : "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-16-g850924d"
},
"package_config" : {
"name" : "demultiplex",

View File

@@ -0,0 +1,201 @@
name: "publish"
namespace: "io"
version: "outflow"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input"
description: "Directory to write fastq data to"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_falco"
description: "Directory to write falco output to"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_multiqc"
description: "Directory to write falco output to"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--output"
info: null
default:
- "fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_falco"
info: null
default:
- "qc/fastqc"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_multiqc"
info: null
default:
- "qc/multiqc_report.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "code.sh"
is_executable: true
description: "Publish the processed results of the run"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/demultiplex"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-20220912"
target_registry: "images.viash-hub.com"
target_tag: "outflow"
namespace_separator: "/"
setup:
- type: "docker"
run:
- "apt update && apt install -y procps"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/io/publish/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/io/publish"
executable: "target/nextflow/io/publish/main.nf"
viash_version: "0.9.0"
git_commit: "850924de0ce1af49300d7f0b3dbe620e4f97c916"
git_remote: "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-16-g850924d"
package_config:
name: "demultiplex"
version: "outflow"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'outflow'"
keywords:
- "bioinformatics"
- "sequence"
- "demultiplexing"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/demultiplex"
issue_tracker: "https://github.com/viash-hub/demultiplex/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'io/publish'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'outflow'
description = 'Publish the processed results of the run'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,137 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "publish",
"description": "Publish the processed results of the run",
"type": "object",
"definitions": {
"input arguments" : {
"title": "Input arguments",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: `file`, required. Directory to write fastq data to",
"help_text": "Type: `file`, required. Directory to write fastq data to"
}
,
"input_falco": {
"type":
"string",
"description": "Type: `file`, required. Directory to write falco output to",
"help_text": "Type: `file`, required. Directory to write falco output to"
}
,
"input_multiqc": {
"type":
"string",
"description": "Type: `file`, required. Directory to write falco output to",
"help_text": "Type: `file`, required. Directory to write falco output to"
}
}
},
"output arguments" : {
"title": "Output arguments",
"type": "object",
"description": "No description",
"properties": {
"output": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output.output`. ",
"help_text": "Type: `file`, default: `$id.$key.output.output`. "
,
"default":"$id.$key.output.output"
}
,
"output_falco": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output_falco.output_falco`. ",
"help_text": "Type: `file`, default: `$id.$key.output_falco.output_falco`. "
,
"default":"$id.$key.output_falco.output_falco"
}
,
"output_multiqc": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output_multiqc.html`. ",
"help_text": "Type: `file`, default: `$id.$key.output_multiqc.html`. "
,
"default":"$id.$key.output_multiqc.html"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input arguments"
},
{
"$ref": "#/definitions/output arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -148,9 +148,9 @@ build_info:
output: "target/nextflow/io/untar"
executable: "target/nextflow/io/untar/main.nf"
viash_version: "0.9.0"
git_commit: "e616759998e9686e363aedef2987bf06cae8207d"
git_remote: "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-7-ge616759"
git_commit: "850924de0ce1af49300d7f0b3dbe620e4f97c916"
git_remote: "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-16-g850924d"
package_config:
name: "demultiplex"
version: "outflow"

View File

@@ -2989,9 +2989,9 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/io/untar",
"viash_version" : "0.9.0",
"git_commit" : "e616759998e9686e363aedef2987bf06cae8207d",
"git_remote" : "https://x-access-token:ghs_VeQIhYEEY5ZJUc7oNW47Y7OvUp3Zbp4VAud9@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-7-ge616759"
"git_commit" : "850924de0ce1af49300d7f0b3dbe620e4f97c916",
"git_remote" : "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex",
"git_tag" : "v0.1.1-16-g850924d"
},
"package_config" : {
"name" : "demultiplex",

View File

@@ -0,0 +1,191 @@
name: "runner"
version: "outflow"
argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input"
description: "Base directory of the form `s3:/<bucket>/Sequencing/<Sequencer>/<RunID>/`"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Annotation flags"
arguments:
- type: "boolean_true"
name: "--add_date_time"
description: "Add date and time to the output directory name. This is useful\n\
when running the same pipeline multiple times on the same input\ndirectory.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--add_workflow_id"
description: "Add a workflow identifier to the output directory name.\n"
info: null
direction: "input"
- name: "Output arguments"
arguments:
- type: "file"
name: "--fastq_output"
info: null
default:
- "fastq"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--falco_output"
info: null
default:
- "qc/fastqc"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--multiqc_output"
info: null
default:
- "qc/multiqc_report.html"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
description: "Runner for demultiplexing of raw sequencing data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
dependencies:
- name: "demultiplex"
repository:
type: "local"
- name: "io/publish"
repository:
type: "local"
license: "MIT"
links:
repository: "https://github.com/viash-hub/demultiplex"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
- type: "native"
id: "native"
build_info:
config: "src/runner/config.vsh.yaml"
runner: "nextflow"
engine: "native|native"
output: "target/nextflow/runner"
executable: "target/nextflow/runner/main.nf"
viash_version: "0.9.0"
git_commit: "850924de0ce1af49300d7f0b3dbe620e4f97c916"
git_remote: "https://x-access-token:ghs_In7H7ob9B5hTOzNDr8gE6y2z2kDr5e3ZuuxO@github.com/viash-hub/demultiplex"
git_tag: "v0.1.1-16-g850924d"
dependencies:
- "target/nextflow/demultiplex"
- "target/nextflow/io/publish"
package_config:
name: "demultiplex"
version: "outflow"
description: "Demultiplexing pipeline\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/demultiplex/v2/"
dest: "testData"
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
\ := '$id'\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'outflow'"
keywords:
- "bioinformatics"
- "sequence"
- "demultiplexing"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/demultiplex"
issue_tracker: "https://github.com/viash-hub/demultiplex/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'runner'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'outflow'
description = 'Runner for demultiplexing of raw sequencing data'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}

View File

@@ -0,0 +1,153 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "runner",
"description": "Runner for demultiplexing of raw sequencing data",
"type": "object",
"definitions": {
"input arguments" : {
"title": "Input arguments",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: `file`, required. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/`",
"help_text": "Type: `file`, required. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/`"
}
}
},
"annotation flags" : {
"title": "Annotation flags",
"type": "object",
"description": "No description",
"properties": {
"add_date_time": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Add date and time to the output directory name",
"help_text": "Type: `boolean_true`, default: `false`. Add date and time to the output directory name. This is useful\nwhen running the same pipeline multiple times on the same input\ndirectory.\n"
,
"default":false
}
,
"add_workflow_id": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Add a workflow identifier to the output directory name",
"help_text": "Type: `boolean_true`, default: `false`. Add a workflow identifier to the output directory name.\n"
,
"default":false
}
}
},
"output arguments" : {
"title": "Output arguments",
"type": "object",
"description": "No description",
"properties": {
"fastq_output": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.fastq_output.fastq_output`. ",
"help_text": "Type: `file`, default: `$id.$key.fastq_output.fastq_output`. "
,
"default":"$id.$key.fastq_output.fastq_output"
}
,
"falco_output": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.falco_output.falco_output`. ",
"help_text": "Type: `file`, default: `$id.$key.falco_output.falco_output`. "
,
"default":"$id.$key.falco_output.falco_output"
}
,
"multiqc_output": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.multiqc_output.html`. ",
"help_text": "Type: `file`, default: `$id.$key.multiqc_output.html`. "
,
"default":"$id.$key.multiqc_output.html"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input arguments"
},
{
"$ref": "#/definitions/annotation flags"
},
{
"$ref": "#/definitions/output arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}