Build branch save-params with version save-params (05ac6a3)

Build pipeline: viash-hub.htrnaseq.save-params-h88fh

Source commit: 05ac6a3d24

Source message: updated filepathpath parsing
This commit is contained in:
CI
2025-04-01 16:03:23 +00:00
parent 8bf692c37b
commit 8ca2889a4d
86 changed files with 11389 additions and 2343 deletions

View File

@@ -1,3 +1,57 @@
# htrnaseq v0.7.0
## Breaking changes
The `runner` and `htrnaseq` workflow now output FASTQ files corresponding to the barcodes per input ID (per sequencing run).
Previously, when multiple input folders or multiple input FASTQ files were provided
(for the `runner` and `htrnaseq` workflows respectively), the demultiplexed FASTQ files for these inputs were concatenated
and provided as output. For the `htrnaseq` workflow, reads can still be combined by using a newly added `sampleID` argument.
This means that two lists of FASTQ files can be provided for a single sample, and by assigning the same `sampleID`,
these reads will be joined. For example, with other arguments are left out for brevity:
```yaml
- id: sample1_run1
input_r1: [sample_1_L001_1_R1.fastq, sample_1_L002_1_R1.fastq]
input_r2: [sample_1_L001_1_R2.fastq, sample_1_L002_1_R2.fastq]
sampleID: "sample_1"
- id: sample1_run2
input_r1: [sample_1_L001_1_R1.fastq, sample_1_L002_1_R1.fastq]
input_r2: [sample_1_L001_1_R2.fastq, sample_1_L002_1_R2.fastq]
sampleID: "sample_1"
- id: sample_2
input_r1: [sample_2_L001_1_R1.fastq, sample_2_L002_1_R1.fastq]
input_r2: [sample_2_L001_1_R2.fastq, sample_2_L002_1_R2.fastq]
```
For the runner, concatenation of data across samples is automatically inferred. Previously, multiple IDs (events) could be
provided which were processed in parallel. This is no longer possible, as providing multiple will cause the matching
samples for these runs to be concatenated.
For example, the following old parameter yaml
```yaml
- id: run1
input: ["run_folder_1/", run_folder_2/]
```
should now be provided as:
```yaml
- id: run1
input: "run_folder_1/"
- id: run2
input: run_folder_2/
```
## Minor changes
* Updated viash to `0.9.2` (PR #49)
# htrnaseq v0.6.0
## Breaking changes
* `runner`: a subdirectory `data_processed` is now added to the output structure, in between
the experiment ID and the directory with the workflow date and version (PR #45).
# htrnaseq v0.5.5
## New functionality

View File

@@ -7,7 +7,7 @@ links:
issue_tracker: https://github.com/viash-hub/htrnaseq/issues
repository: https://github.com/viash-hub/htrnaseq
viash_version: 0.9.0
viash_version: 0.9.2
info:
test_resources:

View File

@@ -8,13 +8,8 @@ mkdir -p "$par_output" && echo "$par_output created"
echo
echo "Copying files..."
IFS=";" read -ra input_r1 <<<$par_input_r1
IFS=";" read -ra input_r2 <<<$par_input_r2
IFS=";" read -ra input <<<$par_input
for i in "${input_r1[@]}"; do
for i in "${input[@]}"; do
cp -rL "$i" "$par_output/"
done
for i in "${input_r2[@]}"; do
cp -rL "$i" "$par_output/"
done
done

View File

@@ -4,13 +4,8 @@ description: "Publish the fastq files per well"
argument_groups:
- name: Input arguments
arguments:
- name: --input_r1
description: Directory to write R1 fastq data to
type: file
multiple: true
required: true
- name: --input_r2
description: Directory to write R2 fastq data to
- name: --input
description: Directory to write fastq data to
type: file
multiple: true
required: true

View File

@@ -114,7 +114,8 @@ for barcode_index in "${!barcodes[@]}"; do
fi
done
echo "Did not find FASTQ files files for well ${well_id}! "\
"Make sure that the input files have the correct file name format."
"Make sure that the input files have the correct file name format."\
"Input files: ${input_r1[@]}"
exit 1
done

View File

@@ -0,0 +1,43 @@
name: concatRuns
namespace: utils
description: |
Concatenate well FASTQ files from different runs in order to increase sequencing depth.
arguments:
- name: "--input_r1"
type: file
required: true
multiple: true
- name: "--input_r2"
type: file
required: true
multiple: true
- name: "--sample_id"
type: string
required: true
- name: "--output_r1"
type: file
multiple: true
description: Path to read 1 fastq/fasta file
direction: output
- name: "--output_r2"
type: file
multiple: true
description: Path to read 2 fastq/fasta file
direction: output
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
dependencies:
- name: concat_text
repository: cb
repositories:
- name: cb
type: vsh
repo: craftbox
tag: v0.1.0
runners:
- type: nextflow
engines:
- type: native

View File

@@ -0,0 +1,128 @@
workflow run_wf {
take:
input_ch
main:
// Count the number of input events per sample
// Results from events with the same sample ID need to be concatenated.
event_counts_ch = input_ch
| map {id, state ->
def new_state = state + ["event_id": id]
def new_event = [state.sample_id, new_state]
return new_event
}
| groupTuple(by: 0)
| flatMap { id, states ->
def orig_event_ids = states.collect{it.event_id}
def new_events = orig_event_ids.collect{ orig_event_id ->
[orig_event_id, ["n_events": states.size()]]
}
return new_events
}
// The number of events per sample needs is passed number to `groupTuple()`
// so that it can emit the sample as soon as it is ready. This makes sure
// that the samples are processed asynchronously.
output_ch = input_ch.join(event_counts_ch)
| flatMap {id, state_demultiplex, state_event_counts ->
assert state_demultiplex.input_r1.size() == state_demultiplex.input_r2.size(),
"Expected output from well demultiplexing to contain equal amount or forward and reverse FASTQ files."
def new_states = [state_demultiplex.input_r1, state_demultiplex.input_r2].transpose().collect{ fastq_files ->
def (r1_file, r2_file) = fastq_files
def regex = ~/^(\w+)_R[12]{1}_001\.fastq(\.gz)?$/
def parsed_file_name = r1_file.name =~ regex
def parsed_file_name_r2 = r2_file.name =~ regex
def well_id = parsed_file_name[0][1]
def well_id_r2 = parsed_file_name_r2[0][1]
assert (well_id.length() != 0) && (well_id == well_id_r2)
def new_state = state_demultiplex + [
"input_r1": r1_file,
"input_r2": r2_file,
"event_id": id,
]
def group_settings = groupKey("${state_demultiplex.sample_id}_${well_id}", state_event_counts.n_events)
return [group_settings, new_state]
}
return new_states
}
| groupTuple(by: 0, sort: "hash", remainder: true)
| map {group_settings, sample_states ->
def input_r1 = sample_states.collect{it.input_r1}.flatten()
def input_r2 = sample_states.collect{it.input_r2}.flatten()
def event_ids = sample_states.collect{it.event_id}
def sample_id_list = sample_states.collect{it.sample_id}.unique()
assert sample_id_list.size() == 1
def sample_id = sample_id_list[0]
assert input_r1.size() == input_r2.size()
def new_state = [
"input_r1": input_r1,
"input_r2": input_r2,
"event_id": event_ids,
"sample_id": sample_id,
]
return [group_settings.target, new_state]
}
| concat_text.run(
directives: [label: ["lowmem", "lowcpu"]],
key: "concat_samples_r1",
runIf: {id, state -> state.input_r1.size() > 1},
fromState: { id, state ->
def output_file_name = state.input_r1[0].name
[
input: state.input_r1,
gzip_output: false,
output: output_file_name
]
},
toState: { id, result, state ->
def newState = state + [ input_r1: [ result.output ] ]
return newState
}
)
| concat_text.run(
directives: [label: ["lowmem", "lowcpu"]],
key: "concat_samples_r2",
runIf: {id, state -> state.input_r2.size() > 1},
fromState: { id, state ->
def output_file_name = state.input_r2[0].name
[
input: state.input_r2,
gzip_output: false,
output: output_file_name
]
},
toState: { id, result, state ->
def newState = state + [ input_r2: [ result.output ] ]
return newState
}
)
| map {id, state ->
def new_state = [state.sample_id, state]
return new_state
}
| groupTuple(by: 0, sort: 'hash')
| map {id, states ->
def new_state = [
"input_r1": states.collect{it.input_r1}.flatten(),
"input_r2": states.collect{it.input_r2}.flatten(),
"_meta": ["join_id": states[0].event_id[0]]
]
return [id, new_state]
}
| setState(
[
"output_r1": "input_r1",
"output_r2": "input_r2",
"_meta": "_meta"
]
)
emit:
output_ch
}

View File

@@ -11,11 +11,10 @@ argument_groups:
The id of the job
type: string
required: true
- name: "--params"
- name: "--params_yaml"
description: |
The state to save
base64 encoded yaml file containing the state
type: string
multiple: true
required: true
- name: Outputs

View File

@@ -1,26 +1,12 @@
import re
import yaml
import base64
## VIASH START
par = {
"id": "sample_one",
"params": ['workflow_id=sample_one',
'umi_length=10',
'fastq_output_r1[0]=fastq/*_R1_001.fastq',
'fastq_output_r2[0]=fastq/*_R2_001.fastq',
'star_output[0]=star.$id/*',
'nrReadsNrGenesPerChrom=nrReadsNrGenesPerChrom.$id.txt',
'star_qc_metrics=starLogs.$id.txt',
'eset=eset.$id.rds',
'f_data=fData.$id.tsv',
'p_data=pData.$id.tsv',
'html_report=report.$id.html',
'input_r1[0]=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R1_001.fastq',
'input_r1[1]=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R2_001.fastq',
'barcodesFasta=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/360-wells-with-ids.fasta',
'genomeDir=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/genomeDir/gencode.v41.star.sparse',
'annotation=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/genomeDir/gencode.v41.annotation.gtf.gz'],
"output": 'params_out.yaml'
"params_yaml": "cGFyYW1zX3lhbWw6IHt9Cg==",
"output": "output.yaml"
}
## VIASH END
@@ -28,26 +14,41 @@ class Dumper(yaml.Dumper):
def increase_indent(self, flow=False, indentless=False):
return super(Dumper, self).increase_indent(flow, False)
params = {}
for param in par['params']:
param = param.replace('$id', par['id'])
key, value = param.split('=')
def decode_params_yaml(encoded_yaml):
# Step 1: Decode from Base64
yaml_bytes = base64.b64decode(encoded_yaml)
array_match = re.match(r'(.+)\[(\d+)\]$', key)
if array_match:
base_key = array_match.group(1)
index = int(array_match.group(2))
if base_key not in params:
params[base_key] = []
while len(params[base_key]) <= index:
params[base_key].append(None)
params[base_key][index] = value
else:
params[key] = value
# Step 2: Convert bytes to string
yaml_string = yaml_bytes.decode('utf-8')
# Step 3: Extract pattern for Java path objects
# Find pattern: !!sun.nio.fs.UnixPath /path/to/file
pattern = r'!!sun\.nio\.fs\.UnixPath\s+([^\n]+)'
# Replace with the actual path string (captured group)
yaml_string = re.sub(pattern, r'\1', yaml_string)
# Handle any remaining empty UnixPath objects
yaml_string = yaml_string.replace('!!sun.nio.fs.UnixPath {}', '""')
# Step 4: Parse YAML
yaml_data = yaml.safe_load(yaml_string)
return yaml_data
def replace_id(value, sample_id):
if isinstance(value, str):
return value.replace('$id', sample_id)
elif isinstance(value, list):
return [replace_id(item, sample_id) for item in value]
return value
print(par['params_yaml'])
params = decode_params_yaml(par['params_yaml'])
for key, value in params.items():
params[key] = replace_id(value, par["id"])
with open(par["output"], 'w') as f:
yaml.dump(params, f, default_flow_style=False, Dumper=Dumper)
yaml.dump(params, f, default_flow_style=False, Dumper=Dumper)

View File

View File

@@ -8,14 +8,14 @@ argument_groups:
arguments:
- name: --input_r1
description: |
Forward reads in FASTQ format. Multiple files can be provided which will
Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will
be demultiplexed separately before joining the results for each individual well.
type: file
required: true
multiple: true
- name: --input_r2
description: |
Reverse reads in FASTQ format. Multiple files can be provided which will
Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will
be demultiplexed separately before joining the results for each individual well.
type: file
required: true
@@ -35,22 +35,22 @@ argument_groups:
- name: --annotation
type: file
required: true
- name: --sample_id
type: string
required: false
description: |
Sample ID for the provided input files. If not provided, the value of --id
will be used. Input files will allways be demultiplexed separately,
but the FASTQs for wells with matching sample IDs will be concatenated before mapping.
- name: Output arguments
arguments:
- name: --fastq_output_r1
description: List of demultiplexed fastq files
- name: "--fastq_output"
description: "Directory containing output fastq files"
type: file
direction: output
multiple: true
required: true
default: "fastq/*_R1_001.fastq"
- name: --fastq_output_r2
description: List of demultiplexed fastq files
type: file
default: "fastq/*"
direction: output
multiple: true
required: true
default: "fastq/*_R2_001.fastq"
- name: --star_output
description: Output from mapping with STAR
type: file
@@ -120,6 +120,8 @@ dependencies:
repository: local
- name: report/create_report
repository: local
- name: utils/concatRuns
repository: local
- name: utils/save_params
repository: local
repositories:

View File

@@ -1,63 +1,146 @@
workflow run_wf {
take:
input_ch
raw_ch
main:
input_ch = raw_ch
// Use the event ID as the default for the sample ID
| map {id, state ->
def sample_id = state.sample_id ?: id
def newState = state + ["sample_id": sample_id, "run_id": id]
return [id, newState]
}
// The featureData only has one requirement: the genome annotation.
// It can be generated straight away.
// It can be generated straight away. Most of the time, there is one shared
// annotation for all of the inputs and the fData should only be calculated once.
// The state is manpulated in such a way that there is one event created per unique
// input annotation file. In turn, the featureData file can joined into the original input
// channel which allows it to be shared across events if required.
f_data_ch = input_ch
| save_params.run(
fromState: {id, state ->
// Convert state to list of key=value parameters
def params_list = []
// Add each parameter as key=value
state.each { key, value ->
if (value != null) {
// Handle different types of values
if (value instanceof Collection) {
// For collections, add multiple entries with array notation
value.eachWithIndex { item, index ->
params_list.add("${key}[${index}]=${item}")
}
} else {
// For simple values, just add key=value
params_list.add("${key}=${value}")
}
}
fromState: {id, state ->
// Define the function before using it
def convertPaths
convertPaths = { value ->
if (value instanceof java.nio.file.Path)
return value.toUriString()
else if (value instanceof Collection)
return value.collect { convertPaths(it) }
else
return value
}
// Apply conversion to all state values
def convertedState = state.collectEntries { k, v -> [(k): convertPaths(v)] }
def yaml = new org.yaml.snakeyaml.Yaml()
def yamlString = yaml.dump(convertedState)
def encodedYaml = yamlString.bytes.encodeBase64().toString()
return [
"id": id,
"params": params_list,
"params_yaml": encodedYaml,
"output": "${id}_parameters.yaml"
]
},
toState: ["parameters": "output"]
)
| toSortedList()
| flatMap {ids_and_states ->
def annotation_files = ids_and_states.inject([:]){ old_state, id_and_state ->
def (id, state) = id_and_state
def annotation_file = state.annotation
def new_state = old_state + [(annotation_file): (old_state.getOrDefault(annotation_file, []) + [id])]
return new_state
}
def file_names = annotation_files.keySet().collect{it.name}
assert (file_names.toSet().size() == file_names.size()):
"Please make sure that the annotation files have unique file names."
def new_states = annotation_files.collect{annotation_file, value ->
def new_state = [annotation_file.name , ["annotation": annotation_file, "event_ids": value]]
return new_state
}
return new_states
}
| create_fdata.run(
directives: [label: ["lowmem", "lowcpu"]],
fromState: [
"gtf": "annotation",
"output": "f_data"
],
toState: {id, result, state -> ["f_data": result.output]}
toState: ["f_data": "output"]
)
| flatMap {_, state ->
def new_states = state.event_ids.collect{event_id ->
[event_id, ["f_data": state.f_data]]
}
return new_states
}
// Perform mapping of each well.
mapping_ch = input_ch
demultiplex_ch = input_ch
| well_demultiplex.run(
fromState: [
"input_r1": "input_r1",
"input_r2": "input_r2",
"barcodesFasta": "barcodesFasta",
],
toState: [
"input_r1": "output_r1",
"input_r2": "output_r2",
]
toState: {id, result, state ->
def all_fastq = result.output_r1 + result.output_r2
def output_dir = all_fastq.collect{it.parent}.unique()
assert output_dir.size() == 1: "Expected output from well demultiplexing to reside into one directory."
def new_state = state + [
"input_r1": result.output_r1,
"input_r2": result.output_r2,
"fastq_output_directory": output_dir[0],
]
return new_state
}
)
fastq_output_directory_ch = demultiplex_ch
| map {id, state ->
def new_event = [state.sample_id, state]
return new_event
}
| groupTuple(by: 0, sort: "hash")
| map {id, states ->
def fastq_output_dirs = states.collect{it.fastq_output_directory}
def new_state = ["fastq_output_directory": fastq_output_dirs]
def new_event = [id, new_state]
return [id, new_state]
}
concat_samples_ch = demultiplex_ch.join(f_data_ch)
| map {id, demutliplex_state, f_data_state ->
def newState = demutliplex_state + ["f_data": f_data_state["f_data"]]
[id, newState]
}
| concatRuns.run(
fromState: [
"input_r1": "input_r1",
"input_r2": "input_r2",
"sample_id": "sample_id",
],
toState: {id, result, state ->
def state_overwite = [
"input_r1": result.output_r1,
"input_r2": result.output_r2,
"_meta": ["join_id": state.run_id]
]
return state + state_overwite
}
)
pool_ch = concat_samples_ch.join(fastq_output_directory_ch)
| map {id, demux_state, fastq_output_directory_state ->
def new_state = demux_state + fastq_output_directory_state
return [id, new_state]
}
| parallel_map.run(
directives: ["label": ["highmem", "lowcpu"]],
fromState: {id, state ->
@@ -74,9 +157,6 @@ workflow run_wf {
"star_output": "output",
]
)
// From the mapped wells, create statistics based on the BAM files.
pool_ch = mapping_ch
// Split the events from 1 event per pool into events per well
// and add extra metadata about the wells to the state.
| well_metadata.run(
@@ -130,10 +210,10 @@ workflow run_wf {
// Gather the keys from all states. for some state items,
// we need gather all the different items from across the states
def barcodes = states.collect{it.barcode}
assert barcodes.clone().unique().size() == barcodes.size(), \
assert barcodes.clone().unique().size() == barcodes.size(): \
"Error when gathering information for pool ${id}, barcodes are not unique!"
def well_ids = states.collect{it.well_id}
assert well_ids.clone().unique().size() == well_ids.size(), \
assert well_ids.clone().unique().size() == well_ids.size(): \
"Error when gathering information for pool ${id}, well IDs are not unique!"
def custom_state = [
"input_r1": states.collect{it.input_r1},
@@ -154,7 +234,7 @@ workflow run_wf {
// All other state should have a unique value
def old_state_items = other_state_keys.inject([:]){ old_state, argument_name ->
argument_values = states.collect{it.get(argument_name)}.unique()
assert argument_values.size() == 1, "Arguments should be the same across modalities. Please report this \
assert argument_values.size() == 1: "Arguments should be the same across modalities. Please report this \
as a bug. Argument name: $argument_name, \
argument value: $argument_values"
def argument_value
@@ -197,7 +277,7 @@ workflow run_wf {
]
)
p_data_ch = star_logs_ch.join(pool_statistics_ch, remainder: true)
eset_ch = star_logs_ch.join(pool_statistics_ch, remainder: true)
| map {id, star_logs_state, pool_statistics_state ->
def newState = star_logs_state + ["nrReadsNrGenesPerChromPool": pool_statistics_state.nrReadsNrGenesPerChromPool]
return [id, newState]
@@ -211,12 +291,6 @@ workflow run_wf {
],
toState: ["p_data": "output"],
)
eset_ch = p_data_ch.join(f_data_ch, remainder: true)
| map {id, p_data_state, f_data_state ->
def newState = p_data_state + ["f_data": f_data_state["f_data"]]
[id, newState]
}
| create_eset.run(
directives: [label: ["lowmem", "lowcpu"]],
fromState: [
@@ -258,13 +332,14 @@ workflow run_wf {
output_ch = eset_ch.join(report_channel)
| map {id, state_eset, state_report ->
def new_state = state_eset + ["html_report": state_report.html_report]
def new_state = state_eset + [
"html_report": state_report.html_report,
]
[id, new_state]
}
| setState([
"star_output": "star_output",
"fastq_output_r1": "input_r1",
"fastq_output_r2": "input_r2",
"star_output": "star_output",
"fastq_output": "fastq_output_directory",
"star_output": "star_output",
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChromPool",
"star_qc_metrics": "star_qc_metrics",
@@ -272,7 +347,8 @@ workflow run_wf {
"f_data": "f_data",
"p_data": "p_data",
"html_report": "html_report",
"parameters": "parameters"
"parameters": "parameters",
"_meta": "_meta",
])

View File

@@ -6,7 +6,6 @@ argument_groups:
arguments:
- name: --input
description: Base directory of the form `s3:/<bucket>/Sequencing/<Sequencer>/<RunID>/<demultiplex_dir>`
multiple: true
type: file
required: true
- name: --barcodesFasta

View File

@@ -8,19 +8,13 @@ workflow run_wf {
input_ch
main:
output_ch = input_ch
// Multiple runs can be provided, and the reads for these runs will
// be concatenated. Here, we gather the FASTQ files from each input directory first.
| flatMap {id, state ->
// Create an input event per input directory
def new_state = state.input.withIndex().collect{input_dir, id_index ->
def state_item = state + ["input": input_dir, "index": id_index, "run_id": id]
return ["${id}_${id_index}".toString(), state_item]
}
return new_state
}
htrnaseq_ch = input_ch
// List the FASTQ files per input directory
// Be careful: an event per lane is created!
| map {id, state ->
def new_state = state + ["run_id": id]
return [id, new_state]
}
| listInputDir.run(
fromState: [
"input": "input",
@@ -38,13 +32,11 @@ workflow run_wf {
// there might be multiple FASTQs for a single sample that correspond to the
// lanes. So the fastq files must be gathered across lanes and input folders
// in order to create an input lists for R1 and R2.
| map {id, state -> [state.sample_id, state]}
| groupTuple(by: 0, sort: { state1, state2 ->
if (state1.index == state2.index) {
return state1.lane <=> state2.lane
}
return state1.index <=> state2.index
})
// The ID of the event here is important! It determines the name of the output
// folders for the FASTQ files and these folders are published as-is later.
// The folder where the FASTQ files are stored in should be named after the run ID.
| map {id, state -> ["${state.sample_id}/${state.run_id}".toString(), state]}
| groupTuple(by: 0, sort: "hash")
| map {id, states ->
def new_r1 = states.collect{it.r1_output}
def new_r2 = states.collect{it.r2_output}
@@ -53,7 +45,7 @@ workflow run_wf {
// TODO: this can be asserted.
def new_state = states[0] + [
"r1": new_r1,
"r2": new_r2
"r2": new_r2,
]
return [id, new_state]
}
@@ -62,8 +54,7 @@ workflow run_wf {
f_data: 'fData/$id.txt',
p_data: 'pData/$id.txt',
star_output: 'star_output/$id/*',
fastq_output_r1: 'fastq/*_R1_001.fastq',
fastq_output_r2: 'fastq/*_R1_001.fastq',
fastq_output: 'fastq/*',
eset: 'esets/$id.rds',
nrReadsNrGenesPerChrom: 'nrReadsNrGenesPerChrom/$id.txt',
star_qc_metrics: 'starLogs/$id.txt',
@@ -76,32 +67,32 @@ workflow run_wf {
genomeDir: "genomeDir",
annotation: "annotation",
umi_length: "umi_length",
sample_id: "sample_id",
],
toState: { id, result, state -> state + result }
)
// The HT-RNAseq workflow outputs multiple events, one per 'pool' (usually a plate)
// but for publishing the results, this is not handy because we want to use the $id
// variable as a pointer to the target data.
//
// So, we should combine everything together
//
// project_id / experiment_id / date_workflow
// project_id / experiment_id / "data_processed" / date_workflow
grouped_ch = htrnaseq_ch
| toSortedList
| map{ vs ->
def all_fastqs
[
vs[0][1].run_id, // The original ID
[
star_output: reduce_paths(vs.collect{ it[1].star_output }.flatten()),
fastq_output_r1: reduce_paths(vs.collect{ it[1].fastq_output_r1 }.flatten(), 1),
fastq_output_r2: reduce_paths(vs.collect{ it[1].fastq_output_r2 }.flatten(), 1),
nrReadsNrGenesPerChrom: reduce_paths(vs.collect{ it[1].nrReadsNrGenesPerChrom }),
star_qc_metrics: reduce_paths(vs.collect{ it[1].star_qc_metrics }),
eset: reduce_paths(vs.collect{ it[1].eset }),
f_data: reduce_paths(vs.collect{ it[1].f_data }),
p_data: reduce_paths(vs.collect{ it[1].p_data }),
fastq_output: vs.collect{ it[1].fastq_output }.flatten().unique(),
html_report: vs.collect{ it[1].html_report }[0], // The report is for all pools
plain_output: vs.collect{ it[1].plain_output }[0],
project_id: vs.collect{ it[1].project_id }[0],
@@ -110,12 +101,13 @@ workflow run_wf {
]
}
results_publish_ch = grouped_ch
| publish_results.run(
fromState: { id, state ->
def project = (state.plain_output) ? id : "${state.project_id}"
def experiment = (state.plain_output) ? id : "${state.experiment_id}"
def id0 = "${project}/${experiment}"
def id1 = (state.plain_output) ? id : "${id0}/${date}"
def id1 = (state.plain_output) ? id : "${id0}/data_processed/${date}"
def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}"
if (id == id2) {
@@ -146,14 +138,24 @@ workflow run_wf {
]
)
fastq_publish_ch = grouped_ch
| flatMap{id, state ->
def new_states = state.fastq_output.collect{fastq_dir ->
def new_id = fastq_dir.name // The folder name corresponds to the run
def fastq_files = fastq_dir.listFiles()
def new_state = [
"fastq_output": fastq_files
]
return [new_id, new_state]
}
return new_states
}
| publish_fastqs.run(
fromState: { id, state ->
def id0 = "${id}"
def id1 = (state.plain_output) ? id : "${id0}/${date}"
def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}"
println(state.plain_output)
if (id == id2) {
println("Publising fastqs to ${params.fastq_publish_dir}")
} else {
@@ -161,8 +163,7 @@ workflow run_wf {
}
[
input_r1: state.fastq_output_r1,
input_r2: state.fastq_output_r2,
input: state.fastq_output,
output: "${id2}",
]
},
@@ -177,7 +178,7 @@ workflow run_wf {
)
emit:
output_ch
grouped_ch
| map{ id, state -> [ id, [ _meta: [ join_id: state.run_id ] ] ] }
}

View File

@@ -60,6 +60,8 @@ workflow run_wf {
output: new_output,
error_rate: 0.10,
demultiplex_mode: "single",
output_r1: state.output_r1,
output_r2: state.output_r2,
]
},
toState: { id, result, state ->

View File

@@ -94,6 +94,9 @@ test_resources:
path: "mapping_dir"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -182,11 +185,13 @@ engines:
bioc:
- "Seurat"
bioc_force_install: false
warnings_as_errors: true
test_setup:
- type: "r"
cran:
- "testthat"
bioc_force_install: false
warnings_as_errors: true
entrypoint: []
cmd: null
- type: "native"
@@ -197,8 +202,8 @@ build_info:
engine: "docker|native"
output: "target/executable/eset/create_eset"
executable: "target/executable/eset/create_eset/create_eset"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -208,7 +213,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# create_eset save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -173,27 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "create_eset save-params"
echo ""
echo "Arguments:"
echo " --pDataFile"
echo " type: file, required parameter, file must exist"
echo ""
echo " --fDataFile"
echo " type: file, required parameter, file must exist"
echo ""
echo " --mappingDir"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --poolName"
echo " type: string, required parameter"
echo ""
echo " --output"
echo " type: file, required parameter, output, file must exist"
echo " default: eset.\$id.rds"
}
# initialise variables
VIASH_MODE='run'
@@ -470,16 +449,16 @@ function ViashDockerfile {
cat << 'VIASHDOCKER'
FROM rocker/r2u:24.04
ENTRYPOINT []
RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
Rscript -e 'if (!requireNamespace("Seurat", quietly = TRUE)) BiocManager::install("Seurat")' && \
Rscript -e 'remotes::install_cran(c("data.table", "nlcv"), repos = "https://cran.rstudio.com")'
RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
Rscript -e 'options(warn = 2); if (!requireNamespace("Seurat", quietly = TRUE)) BiocManager::install("Seurat")' && \
Rscript -e 'options(warn = 2); remotes::install_cran(c("data.table", "nlcv"), repos = "https://cran.rstudio.com")'
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component eset create_eset"
LABEL org.opencontainers.image.created="2025-03-25T15:33:57Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:02Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -594,6 +573,53 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "create_eset save-params"
echo ""
echo "Arguments:"
echo " --pDataFile"
echo " type: file, required parameter, file must exist"
echo ""
echo " --fDataFile"
echo " type: file, required parameter, file must exist"
echo ""
echo " --mappingDir"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --poolName"
echo " type: string, required parameter"
echo ""
echo " --output"
echo " type: file, required parameter, output, file must exist"
echo " default: eset.\$id.rds"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -70,6 +70,9 @@ test_resources:
path: "test_annotation.gtf"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -176,8 +179,8 @@ build_info:
engine: "docker|native"
output: "target/executable/eset/create_fdata"
executable: "target/executable/eset/create_fdata/create_fdata"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -187,7 +190,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# create_fdata save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -173,26 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "create_fdata save-params"
echo ""
echo "Create a fdata file"
echo ""
echo "Arguments:"
echo " --gtf"
echo " type: file, required parameter, file must exist"
echo " Genome annotation file in GTF format."
echo ""
echo " --output"
echo " type: file, output, file must exist"
echo " default: fData.\$id.txt"
echo " Tab-delimited text file containing information about the 'gene' or"
echo " 'transcript'"
echo " entries from the input GTF file. The 'transcript' entries are used in"
echo " case the source"
echo " of the GTF was 'refGene' or 'ncbiRefSeq'."
}
# initialise variables
VIASH_MODE='run'
@@ -478,9 +458,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component eset create_fdata"
LABEL org.opencontainers.image.created="2025-03-25T15:33:57Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:02Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -595,6 +575,52 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "create_fdata save-params"
echo ""
echo "Create a fdata file"
echo ""
echo "Arguments:"
echo " --gtf"
echo " type: file, required parameter, file must exist"
echo " Genome annotation file in GTF format."
echo ""
echo " --output"
echo " type: file, output, file must exist"
echo " default: fData.\$id.txt"
echo " Tab-delimited text file containing information about the 'gene' or"
echo " 'transcript'"
echo " entries from the input GTF file. The 'transcript' entries are used in"
echo " case the source"
echo " of the GTF was 'refGene' or 'ncbiRefSeq'."
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -84,6 +84,9 @@ test_resources:
path: "starLogs.txt"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -190,8 +193,8 @@ build_info:
engine: "docker|native"
output: "target/executable/eset/create_pdata"
executable: "target/executable/eset/create_pdata/create_pdata"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -201,7 +204,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# create_pdata save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -173,36 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "create_pdata save-params"
echo ""
echo "Create a pdata file by combining the mapping statistics"
echo ""
echo "Arguments:"
echo " --star_stats_file"
echo " type: file, required parameter, file must exist"
echo " Tab-delimited text file containing statistics (per column) that were"
echo " generated"
echo " from the STAR log files (Log.final.out, Summary.csv,"
echo " ReadsPerGene.out.tab)."
echo " Each entry (row) in the file describes the values for one well"
echo " (barcode)."
echo ""
echo " --nrReadsNrGenesPerChromPool"
echo " type: file, required parameter, file must exist"
echo " Pivot table in tsv format of the combined nrReadsNrGenesPerChrom files"
echo " from STAR."
echo " Describes per chromosome (as columns) the number of reads, as well as"
echo " the total number"
echo " of reads per cell barcode and the percentage of nuclear, ERCC and"
echo " mitochondrial"
echo " reads."
echo ""
echo " --output"
echo " type: file, output, file must exist"
echo " default: pData.\$id.txt"
}
# initialise variables
VIASH_MODE='run'
@@ -488,9 +458,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component eset create_pdata"
LABEL org.opencontainers.image.created="2025-03-25T15:33:57Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -605,6 +575,62 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "create_pdata save-params"
echo ""
echo "Create a pdata file by combining the mapping statistics"
echo ""
echo "Arguments:"
echo " --star_stats_file"
echo " type: file, required parameter, file must exist"
echo " Tab-delimited text file containing statistics (per column) that were"
echo " generated"
echo " from the STAR log files (Log.final.out, Summary.csv,"
echo " ReadsPerGene.out.tab)."
echo " Each entry (row) in the file describes the values for one well"
echo " (barcode)."
echo ""
echo " --nrReadsNrGenesPerChromPool"
echo " type: file, required parameter, file must exist"
echo " Pivot table in tsv format of the combined nrReadsNrGenesPerChrom files"
echo " from STAR."
echo " Describes per chromosome (as columns) the number of reads, as well as"
echo " the total number"
echo " of reads per cell barcode and the percentage of nuclear, ERCC and"
echo " mitochondrial"
echo " reads."
echo ""
echo " --output"
echo " type: file, output, file must exist"
echo " default: pData.\$id.txt"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -50,6 +50,9 @@ resources:
description: "This component test the ExpressionSet object as output by the main pipeline."
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -137,6 +140,7 @@ engines:
bioc:
- "Biobase"
bioc_force_install: false
warnings_as_errors: true
entrypoint: []
cmd: null
- type: "native"
@@ -147,8 +151,8 @@ build_info:
engine: "docker|native"
output: "target/executable/integration_test_components/htrnaseq/check_eset"
executable: "target/executable/integration_test_components/htrnaseq/check_eset/check_eset"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -158,7 +162,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# check_eset save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -172,21 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "check_eset save-params"
echo ""
echo "This component test the ExpressionSet object as output by the main pipeline."
echo ""
echo "Inputs:"
echo " --eset"
echo " type: file, required parameter, file must exist"
echo " example: eset.rds"
echo " Path to an ExpressionSet object."
echo ""
echo " --star_output"
echo " type: file, required parameter, multiple values allowed, file must exist"
}
# initialise variables
VIASH_MODE='run'
@@ -463,16 +448,16 @@ function ViashDockerfile {
cat << 'VIASHDOCKER'
FROM bioconductor/bioconductor_docker:3.19
ENTRYPOINT []
RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
Rscript -e 'if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \
Rscript -e 'remotes::install_cran(c("bit64"), repos = "https://cran.rstudio.com")'
RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
Rscript -e 'options(warn = 2); if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \
Rscript -e 'options(warn = 2); remotes::install_cran(c("bit64"), repos = "https://cran.rstudio.com")'
LABEL org.opencontainers.image.authors="Dries Schaumont"
LABEL org.opencontainers.image.description="Companion container for running component integration_test_components/htrnaseq check_eset"
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:01Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -587,6 +572,47 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "check_eset save-params"
echo ""
echo "This component test the ExpressionSet object as output by the main pipeline."
echo ""
echo "Inputs:"
echo " --eset"
echo " type: file, required parameter, file must exist"
echo " example: eset.rds"
echo " Path to an ExpressionSet object."
echo ""
echo " --star_output"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -57,6 +57,9 @@ resources:
description: "This component test the cutadapt output from the well_demultiplex subworkflow."
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -157,8 +160,8 @@ build_info:
engine: "docker|native"
output: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output"
executable: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -168,7 +171,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# check_cutadapt_output save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -172,25 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "check_cutadapt_output save-params"
echo ""
echo "This component test the cutadapt output from the well_demultiplex subworkflow."
echo ""
echo "Inputs:"
echo " --fastq_r1"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Path to the forward reads to test."
echo ""
echo " --fastq_r2"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Path to the reverse reads to test."
echo ""
echo " --ids"
echo " type: string, required parameter, multiple values allowed"
echo " Well IDs for the corresponding fastq input"
}
# initialise variables
VIASH_MODE='run'
@@ -476,9 +457,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont"
LABEL org.opencontainers.image.description="Companion container for running component integration_test_components/well_demultiplexing check_cutadapt_output"
LABEL org.opencontainers.image.created="2025-03-25T15:33:58Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -593,6 +574,51 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "check_cutadapt_output save-params"
echo ""
echo "This component test the cutadapt output from the well_demultiplex subworkflow."
echo ""
echo "Inputs:"
echo " --fastq_r1"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Path to the forward reads to test."
echo ""
echo " --fastq_r2"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Path to the reverse reads to test."
echo ""
echo " --ids"
echo " type: string, required parameter, multiple values allowed"
echo " Well IDs for the corresponding fastq input"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -5,18 +5,8 @@ argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_r1"
description: "Directory to write R1 fastq data to"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Directory to write R2 fastq data to"
name: "--input"
description: "Directory to write fastq data to"
info: null
must_exist: true
create_parent: true
@@ -47,6 +37,9 @@ resources:
description: "Publish the fastq files per well"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -142,8 +135,8 @@ build_info:
engine: "docker|native"
output: "target/executable/io/publish_fastqs"
executable: "target/executable/io/publish_fastqs/publish_fastqs"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -153,7 +146,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# publish_fastqs save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -169,26 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "publish_fastqs save-params"
echo ""
echo "Publish the fastq files per well"
echo ""
echo "Input arguments:"
echo " --input_r1"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Directory to write R1 fastq data to"
echo ""
echo " --input_r2"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Directory to write R2 fastq data to"
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, output, file must exist"
echo " default: \$id"
}
# initialise variables
VIASH_MODE='run'
@@ -470,9 +450,9 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
LABEL org.opencontainers.image.description="Companion container for running component io publish_fastqs"
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:01Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -587,6 +567,48 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "publish_fastqs save-params"
echo ""
echo "Publish the fastq files per well"
echo ""
echo "Input arguments:"
echo " --input"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Directory to write fastq data to"
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, output, file must exist"
echo " default: \$id"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''
@@ -612,37 +634,20 @@ while [[ $# -gt 0 ]]; do
echo "publish_fastqs save-params"
exit
;;
--input_r1)
if [ -z "$VIASH_PAR_INPUT_R1" ]; then
VIASH_PAR_INPUT_R1="$2"
--input)
if [ -z "$VIASH_PAR_INPUT" ]; then
VIASH_PAR_INPUT="$2"
else
VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;""$2"
VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2"
fi
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r1. Use "--help" to get more information on the parameters. && exit 1
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--input_r1=*)
if [ -z "$VIASH_PAR_INPUT_R1" ]; then
VIASH_PAR_INPUT_R1=$(ViashRemoveFlags "$1")
--input=*)
if [ -z "$VIASH_PAR_INPUT" ]; then
VIASH_PAR_INPUT=$(ViashRemoveFlags "$1")
else
VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;"$(ViashRemoveFlags "$1")
fi
shift 1
;;
--input_r2)
if [ -z "$VIASH_PAR_INPUT_R2" ]; then
VIASH_PAR_INPUT_R2="$2"
else
VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;""$2"
fi
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r2. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--input_r2=*)
if [ -z "$VIASH_PAR_INPUT_R2" ]; then
VIASH_PAR_INPUT_R2=$(ViashRemoveFlags "$1")
else
VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;"$(ViashRemoveFlags "$1")
VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1")
fi
shift 1
;;
@@ -829,12 +834,8 @@ fi
# check whether required parameters exist
if [ -z ${VIASH_PAR_INPUT_R1+x} ]; then
ViashError '--input_r1' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_PAR_INPUT_R2+x} ]; then
ViashError '--input_r2' is a required argument. Use "--help" to get more information on the parameters.
if [ -z ${VIASH_PAR_INPUT+x} ]; then
ViashError '--input' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_META_NAME+x} ]; then
@@ -868,22 +869,10 @@ if [ -z ${VIASH_PAR_OUTPUT+x} ]; then
fi
# check whether required files exist
if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then
if [ ! -z "$VIASH_PAR_INPUT" ]; then
IFS=';'
set -f
for file in $VIASH_PAR_INPUT_R1; do
unset IFS
if [ ! -e "$file" ]; then
ViashError "Input file '$file' does not exist."
exit 1
fi
done
set +f
fi
if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then
IFS=';'
set -f
for file in $VIASH_PAR_INPUT_R2; do
for file in $VIASH_PAR_INPUT; do
unset IFS
if [ ! -e "$file" ]; then
ViashError "Input file '$file' does not exist."
@@ -984,27 +973,16 @@ fi
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
# detect volumes from file arguments
VIASH_CHOWN_VARS=()
if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then
VIASH_TEST_INPUT_R1=()
if [ ! -z "$VIASH_PAR_INPUT" ]; then
VIASH_TEST_INPUT=()
IFS=';'
for var in $VIASH_PAR_INPUT_R1; do
for var in $VIASH_PAR_INPUT; do
unset IFS
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
var=$(ViashDockerAutodetectMount "$var")
VIASH_TEST_INPUT_R1+=( "$var" )
VIASH_TEST_INPUT+=( "$var" )
done
VIASH_PAR_INPUT_R1=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R1[*]}")
fi
if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then
VIASH_TEST_INPUT_R2=()
IFS=';'
for var in $VIASH_PAR_INPUT_R2; do
unset IFS
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
var=$(ViashDockerAutodetectMount "$var")
VIASH_TEST_INPUT_R2+=( "$var" )
done
VIASH_PAR_INPUT_R2=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R2[*]}")
VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}")
fi
if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" )
@@ -1080,8 +1058,7 @@ trap interrupt INT SIGINT
cat > "\$tempscript" << 'VIASHMAIN'
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "${VIASH_PAR_INPUT_R1}" | sed "s#'#'\"'\"'#g;s#.*#par_input_r1='&'#" ; else echo "# par_input_r1="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "${VIASH_PAR_INPUT_R2}" | sed "s#'#'\"'\"'#g;s#.*#par_input_r2='&'#" ; else echo "# par_input_r2="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
@@ -1113,14 +1090,9 @@ mkdir -p "\$par_output" && echo "\$par_output created"
echo
echo "Copying files..."
IFS=";" read -ra input_r1 <<<\$par_input_r1
IFS=";" read -ra input_r2 <<<\$par_input_r2
IFS=";" read -ra input <<<\$par_input
for i in "\${input_r1[@]}"; do
cp -rL "\$i" "\$par_output/"
done
for i in "\${input_r2[@]}"; do
for i in "\${input[@]}"; do
cp -rL "\$i" "\$par_output/"
done
VIASHMAIN
@@ -1133,31 +1105,18 @@ VIASHEOF
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
# strip viash automount from file paths
if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then
unset VIASH_TEST_INPUT_R1
if [ ! -z "$VIASH_PAR_INPUT" ]; then
unset VIASH_TEST_INPUT
IFS=';'
for var in $VIASH_PAR_INPUT_R1; do
for var in $VIASH_PAR_INPUT; do
unset IFS
if [ -z "$VIASH_TEST_INPUT_R1" ]; then
VIASH_TEST_INPUT_R1="$(ViashDockerStripAutomount "$var")"
if [ -z "$VIASH_TEST_INPUT" ]; then
VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")"
else
VIASH_TEST_INPUT_R1="$VIASH_TEST_INPUT_R1;""$(ViashDockerStripAutomount "$var")"
VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashDockerStripAutomount "$var")"
fi
done
VIASH_PAR_INPUT_R1="$VIASH_TEST_INPUT_R1"
fi
if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then
unset VIASH_TEST_INPUT_R2
IFS=';'
for var in $VIASH_PAR_INPUT_R2; do
unset IFS
if [ -z "$VIASH_TEST_INPUT_R2" ]; then
VIASH_TEST_INPUT_R2="$(ViashDockerStripAutomount "$var")"
else
VIASH_TEST_INPUT_R2="$VIASH_TEST_INPUT_R2;""$(ViashDockerStripAutomount "$var")"
fi
done
VIASH_PAR_INPUT_R2="$VIASH_TEST_INPUT_R2"
VIASH_PAR_INPUT="$VIASH_TEST_INPUT"
fi
if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT")

View File

@@ -91,6 +91,9 @@ resources:
description: "Publish the results"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -186,8 +189,8 @@ build_info:
engine: "docker|native"
output: "target/executable/io/publish_results"
executable: "target/executable/io/publish_results/publish_results"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -197,7 +200,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# publish_results save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -169,40 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "publish_results save-params"
echo ""
echo "Publish the results"
echo ""
echo "Input arguments:"
echo " --star_output"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Output from mapping with STAR"
echo ""
echo " --nrReadsNrGenesPerChrom"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --star_qc_metrics"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --eset"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --f_data"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --p_data"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --html_report"
echo " type: file, required parameter, file must exist"
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, output, file must exist"
echo " default: \$id"
}
# initialise variables
VIASH_MODE='run'
@@ -484,9 +450,9 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
LABEL org.opencontainers.image.description="Companion container for running component io publish_results"
LABEL org.opencontainers.image.created="2025-03-25T15:33:57Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:02Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -601,6 +567,66 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "publish_results save-params"
echo ""
echo "Publish the results"
echo ""
echo "Input arguments:"
echo " --star_output"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Output from mapping with STAR"
echo ""
echo " --nrReadsNrGenesPerChrom"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --star_qc_metrics"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --eset"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --f_data"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --p_data"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --html_report"
echo " type: file, required parameter, file must exist"
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, output, file must exist"
echo " default: \$id"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -164,6 +164,9 @@ test_resources:
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -278,8 +281,8 @@ build_info:
engine: "docker|native"
output: "target/executable/parallel_map"
executable: "target/executable/parallel_map/parallel_map"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -289,7 +292,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# parallel_map save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -173,85 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "parallel_map save-params"
echo ""
echo "Map wells in batch, using STAR"
echo "Spliced Transcripts Alignment to a Reference (C) Alexander Dobin"
echo "https://github.com/alexdobin/STAR"
echo ""
echo "Input arguments:"
echo " --input_r1"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Input FASTQ files for the forward reads. All FASTQ file names must start"
echo " with the prefix '{well_id}_R1', where"
echo " 'well_id' can be found as the sequence identifier in the barcodes FASTA"
echo " file (see 'barcodesFasta' argument)."
echo " For each FASTQ file, a matching FASTQ file for the reverse reads must be"
echo " provided to the 'input_r2' argument,"
echo " meaning that their 'well_id' prefix must match. The number of items"
echo " provided for 'input_r1' must be equal"
echo " to the number of items for 'input_r2'."
echo ""
echo " --input_r2"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Input FASTQ files for the reverse reads. All FASTQ file names must start"
echo " with the prefix '{well_id}_R2', where"
echo " 'well_id' can be found as the sequence identifier in the barcodes FASTA"
echo " file (see 'barcodesFasta' argument)."
echo " For each FASTQ file, a matching FASTQ file for the reverse reads must be"
echo " provided to the 'input_r1' argument,"
echo " meaning that their 'well_id' prefix must match. The number of items"
echo " provided for 'input_r1' must be equal"
echo " to the number of items for 'input_r2'."
echo ""
echo " --genomeDir"
echo " type: file, required parameter, file must exist"
echo " Reference genome to match to. Can be generated from genomic FASTA"
echo " sequences and a genome annotation"
echo " by using STAR with '--runMode genomeGenerate'."
echo ""
echo " --barcodesFasta"
echo " type: file, required parameter, file must exist"
echo " FASTA file where each entry specifies a unique barcode sequence present"
echo " at the start of the forward input reads"
echo " (input_r1). The IDs of each barcode (the start of the FASTA headers up"
echo " until the first whitespace character) must"
echo " match with the start of one input FASTQ pair."
echo ""
echo "Barcode arguments:"
echo " --umiLength"
echo " type: integer, required parameter"
echo " Length of the Unique Molecular Identifiers (UMI). The UMI are expected"
echo " to be located after the barcodes in the"
echo " forwards reads."
echo ""
echo " --limitBAMsortRAM"
echo " type: string"
echo " default: 10000000000"
echo ""
echo "Runtime arguments:"
echo " --runThreadN"
echo " type: integer"
echo " default: 1"
echo " Number of threads to use for a single STAR execution."
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, required parameter, multiple values allowed, output, file"
echo "must exist"
echo " default: ./*"
echo " A list of output folders which are the result of using STAR to map each"
echo " input FASTQ pair STAR to the reference genome."
echo " The order of the items DO NOT match with the order of the entries in the"
echo " barcodes FASTA file or the input FASTQ pairs."
echo ""
echo " --joblog"
echo " type: file, output, file must exist"
echo " default: execution_log.txt"
echo " Where to store the log file listing all the jobs."
}
# initialise variables
VIASH_MODE='run'
@@ -540,9 +461,9 @@ ENV STAR_BINARY=STAR
COPY STAR /usr/local/bin/$STAR_BINARY
LABEL org.opencontainers.image.authors="Dries Schaumont, Toni Verbeiren"
LABEL org.opencontainers.image.description="Companion container for running component parallel_map"
LABEL org.opencontainers.image.created="2025-03-25T15:33:58Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -657,6 +578,111 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "parallel_map save-params"
echo ""
echo "Map wells in batch, using STAR"
echo "Spliced Transcripts Alignment to a Reference (C) Alexander Dobin"
echo "https://github.com/alexdobin/STAR"
echo ""
echo "Input arguments:"
echo " --input_r1"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Input FASTQ files for the forward reads. All FASTQ file names must start"
echo " with the prefix '{well_id}_R1', where"
echo " 'well_id' can be found as the sequence identifier in the barcodes FASTA"
echo " file (see 'barcodesFasta' argument)."
echo " For each FASTQ file, a matching FASTQ file for the reverse reads must be"
echo " provided to the 'input_r2' argument,"
echo " meaning that their 'well_id' prefix must match. The number of items"
echo " provided for 'input_r1' must be equal"
echo " to the number of items for 'input_r2'."
echo ""
echo " --input_r2"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Input FASTQ files for the reverse reads. All FASTQ file names must start"
echo " with the prefix '{well_id}_R2', where"
echo " 'well_id' can be found as the sequence identifier in the barcodes FASTA"
echo " file (see 'barcodesFasta' argument)."
echo " For each FASTQ file, a matching FASTQ file for the reverse reads must be"
echo " provided to the 'input_r1' argument,"
echo " meaning that their 'well_id' prefix must match. The number of items"
echo " provided for 'input_r1' must be equal"
echo " to the number of items for 'input_r2'."
echo ""
echo " --genomeDir"
echo " type: file, required parameter, file must exist"
echo " Reference genome to match to. Can be generated from genomic FASTA"
echo " sequences and a genome annotation"
echo " by using STAR with '--runMode genomeGenerate'."
echo ""
echo " --barcodesFasta"
echo " type: file, required parameter, file must exist"
echo " FASTA file where each entry specifies a unique barcode sequence present"
echo " at the start of the forward input reads"
echo " (input_r1). The IDs of each barcode (the start of the FASTA headers up"
echo " until the first whitespace character) must"
echo " match with the start of one input FASTQ pair."
echo ""
echo "Barcode arguments:"
echo " --umiLength"
echo " type: integer, required parameter"
echo " Length of the Unique Molecular Identifiers (UMI). The UMI are expected"
echo " to be located after the barcodes in the"
echo " forwards reads."
echo ""
echo " --limitBAMsortRAM"
echo " type: string"
echo " default: 10000000000"
echo ""
echo "Runtime arguments:"
echo " --runThreadN"
echo " type: integer"
echo " default: 1"
echo " Number of threads to use for a single STAR execution."
echo ""
echo "Output arguments:"
echo " --output"
echo " type: file, required parameter, multiple values allowed, output, file"
echo "must exist"
echo " default: ./*"
echo " A list of output folders which are the result of using STAR to map each"
echo " input FASTQ pair STAR to the reference genome."
echo " The order of the items DO NOT match with the order of the entries in the"
echo " barcodes FASTA file or the input FASTQ pairs."
echo ""
echo " --joblog"
echo " type: file, output, file must exist"
echo " default: execution_log.txt"
echo " Where to store the log file listing all the jobs."
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''
@@ -1421,7 +1447,8 @@ for barcode_index in "\${!barcodes[@]}"; do
fi
done
echo "Did not find FASTQ files files for well \${well_id}! "\\
"Make sure that the input files have the correct file name format."
"Make sure that the input files have the correct file name format."\\
"Input files: \${input_r1[@]}"
exit 1
done

View File

@@ -75,6 +75,9 @@ test_resources:
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -184,12 +187,14 @@ engines:
- "install.packages(\"oaStyle\", repos = c(rdepot = \"https://repos.openanalytics.eu/repo/public\"\
, getOption(\"repos\")))"
bioc_force_install: false
warnings_as_errors: true
test_setup:
- type: "r"
packages:
- "testthat"
- "R.utils"
bioc_force_install: false
warnings_as_errors: true
entrypoint: []
cmd: null
- type: "native"
@@ -200,8 +205,8 @@ build_info:
engine: "docker|native"
output: "target/executable/report/create_report"
executable: "target/executable/report/create_report/create_report"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -211,7 +216,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# create_report save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -173,20 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "create_report save-params"
echo ""
echo "Create a basic QC report in HTML format based on a number of esets."
echo ""
echo "Arguments:"
echo " --eset"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --output_report"
echo " type: file, required parameter, output, file must exist"
echo " example: report.html"
}
# initialise variables
VIASH_MODE='run'
@@ -467,18 +453,18 @@ RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y procps pandoc && \
rm -rf /var/lib/apt/lists/*
RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
Rscript -e 'if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \
Rscript -e 'if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) BiocManager::install("ComplexHeatmap")' && \
Rscript -e 'remotes::install_cran(c("ggplot2", "knitr", "gridExtra", "RColorBrewer", "processx", "whisker", "rmarkdown", "bookdown", "data.table", "platetools", "htmltools", "DT", "logger", "bit64"), repos = "https://cran.rstudio.com")' && \
Rscript -e 'install.packages("oaStyle", repos = c(rdepot = "https://repos.openanalytics.eu/repo/public", getOption("repos")))'
RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
Rscript -e 'options(warn = 2); if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \
Rscript -e 'options(warn = 2); if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) BiocManager::install("ComplexHeatmap")' && \
Rscript -e 'options(warn = 2); remotes::install_cran(c("ggplot2", "knitr", "gridExtra", "RColorBrewer", "processx", "whisker", "rmarkdown", "bookdown", "data.table", "platetools", "htmltools", "DT", "logger", "bit64"), repos = "https://cran.rstudio.com")' && \
Rscript -e 'options(warn = 2); install.packages("oaStyle", repos = c(rdepot = "https://repos.openanalytics.eu/repo/public", getOption("repos")))'
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component report create_report"
LABEL org.opencontainers.image.created="2025-03-25T15:33:58Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -593,6 +579,46 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "create_report save-params"
echo ""
echo "Create a basic QC report in HTML format based on a number of esets."
echo ""
echo "Arguments:"
echo " --eset"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo ""
echo " --output_report"
echo " type: file, required parameter, output, file must exist"
echo " example: report.html"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -91,6 +91,9 @@ test_resources:
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -197,8 +200,8 @@ build_info:
engine: "docker|native"
output: "target/executable/stats/combine_star_logs"
executable: "target/executable/stats/combine_star_logs/combine_star_logs"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -208,7 +211,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# combine_star_logs save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -172,39 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "combine_star_logs save-params"
echo ""
echo "Arguments:"
echo " --barcodes"
echo " type: string, required parameter, multiple values allowed"
echo " Barcodes responding to the respective log files."
echo ""
echo " --star_logs"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " example: Log.final.out"
echo " Paths to the STAR log files (most frequently called Log.final.out)"
echo ""
echo " --gene_summary_logs"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " example: Summary.txt"
echo " Paths to the Summary.csv files from the STAR Solo output. Can be found"
echo " in"
echo " the 'Solo.out/Gene' folder relative to the root of the STAR output"
echo " directory."
echo ""
echo " --reads_per_gene_logs"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Paths to the 'ReadsPerGene.out.tab' files as output by STAR."
echo ""
echo " --output"
echo " type: file, output, file must exist"
echo " default: starLogs.txt"
echo " Tab-delimited file describing for each barcode (as the rows), the"
echo " metrics (as columns)"
echo " gathered from the different input files."
}
# initialise variables
VIASH_MODE='run'
@@ -490,9 +457,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont"
LABEL org.opencontainers.image.description="Companion container for running component stats combine_star_logs"
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:01Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -607,6 +574,65 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "combine_star_logs save-params"
echo ""
echo "Arguments:"
echo " --barcodes"
echo " type: string, required parameter, multiple values allowed"
echo " Barcodes responding to the respective log files."
echo ""
echo " --star_logs"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " example: Log.final.out"
echo " Paths to the STAR log files (most frequently called Log.final.out)"
echo ""
echo " --gene_summary_logs"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " example: Summary.txt"
echo " Paths to the Summary.csv files from the STAR Solo output. Can be found"
echo " in"
echo " the 'Solo.out/Gene' folder relative to the root of the STAR output"
echo " directory."
echo ""
echo " --reads_per_gene_logs"
echo " type: file, required parameter, multiple values allowed, file must exist"
echo " Paths to the 'ReadsPerGene.out.tab' files as output by STAR."
echo ""
echo " --output"
echo " type: file, output, file must exist"
echo " default: starLogs.txt"
echo " Tab-delimited file describing for each barcode (as the rows), the"
echo " metrics (as columns)"
echo " gathered from the different input files."
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -75,6 +75,9 @@ test_resources:
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -181,8 +184,8 @@ build_info:
engine: "docker|native"
output: "target/executable/stats/generate_pool_statistics"
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -192,7 +195,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# generate_pool_statistics save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -173,31 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "generate_pool_statistics save-params"
echo ""
echo "Arguments:"
echo " --nrReadsNrGenesPerChrom"
echo " type: file, multiple values allowed, file must exist"
echo " default: processedBamFile_well1.tsv;processedBamfile_well2.tsv"
echo " Path to an output file that contains a .tsv formatted table describing"
echo " per chromosome the number of reads that were mapped to that chromosome"
echo " (NumberOfReads"
echo " column) and the number of genes on that chromosome that had at least one"
echo " read mapped to it (NumberOfGenes)."
echo ""
echo " --nrReadsNrGenesPerChromPool"
echo " type: file, output, file must exist"
echo " example: nrReadsNrGenesPerChrom.txt"
echo " Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom"
echo " files. Describes"
echo " per chromosome (as columns) the number of reads, as well as the total"
echo " number"
echo " of reads per cell barcode and the percentage of nuclear, ERCC and"
echo " mitochondrial"
echo " reads."
}
# initialise variables
VIASH_MODE='run'
@@ -483,9 +458,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component stats generate_pool_statistics"
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:02Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -600,6 +575,57 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "generate_pool_statistics save-params"
echo ""
echo "Arguments:"
echo " --nrReadsNrGenesPerChrom"
echo " type: file, multiple values allowed, file must exist"
echo " default: processedBamFile_well1.tsv;processedBamfile_well2.tsv"
echo " Path to an output file that contains a .tsv formatted table describing"
echo " per chromosome the number of reads that were mapped to that chromosome"
echo " (NumberOfReads"
echo " column) and the number of genes on that chromosome that had at least one"
echo " read mapped to it (NumberOfGenes)."
echo ""
echo " --nrReadsNrGenesPerChromPool"
echo " type: file, output, file must exist"
echo " example: nrReadsNrGenesPerChrom.txt"
echo " Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom"
echo " files. Describes"
echo " per chromosome (as columns) the number of reads, as well as the total"
echo " number"
echo " of reads per cell barcode and the percentage of nuclear, ERCC and"
echo " mitochondrial"
echo " reads."
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -146,6 +146,9 @@ test_resources:
path: "empty.sam"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -263,8 +266,8 @@ build_info:
engine: "docker|native"
output: "target/executable/stats/generate_well_statistics"
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -274,7 +277,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# generate_well_statistics save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -173,65 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "generate_well_statistics save-params"
echo ""
echo "Generate summary statistics from BAM files generated by STAR solo."
echo ""
echo "Arguments:"
echo " --input"
echo " type: file, file must exist"
echo " example: input.bam"
echo " The .bam file as returned by the mapping tool STAR."
echo ""
echo " --barcode"
echo " type: string, required parameter"
echo " The barcode for the well that is being processed. Is only used to add a"
echo " metadata"
echo " column to all output files."
echo ""
echo " --well_id"
echo " type: string, required parameter"
echo " ID of this well. Only used to add a metadata column to the output files."
echo ""
echo " --processedBAMFile"
echo " type: file, output, file must exist"
echo " default: processedBamFile.txt"
echo " Path to a .tsv file listing, per read in the BAM file,"
echo " the value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the"
echo " chromsome to which the read was mapped to."
echo ""
echo " --nrReadsNrGenesPerChrom"
echo " type: file, output, file must exist"
echo " default: nrReadsNrGenesPerChrom.txt"
echo " Path to an output file that contains a .tsv formatted table describing"
echo " per chromosome the number of reads that were mapped to that chromosome"
echo " (NumberOfReads"
echo " column) and the number of genes on that chromosome that had at least one"
echo " read mapped to it (NumberOfGenes)."
echo ""
echo " --nrReadsNrUMIsPerCB"
echo " type: file, output, file must exist"
echo " default: nrReadsNrUMIsPerCB.txt"
echo " Path to an output file that contains a .tsv formatted table describing"
echo " per barcode the number of UMI's (nrUMIs) and the total number of reads"
echo " (NumberOfReads)."
echo ""
echo " --umiFreqTop"
echo " type: file, output, file must exist"
echo " default: umiFreqTop100.txt"
echo " Path to an output file that contains a .tsv formatted table describing"
echo " per UMI (column UB) the frequency at which they occur in the reads"
echo " (column"
echo " N). Only the top 100 UMIs are included."
echo ""
echo " --threads"
echo " type: integer"
echo " default: 1"
echo " min: 1"
echo " Number of threads to use for decompressing BAM files."
}
# initialise variables
VIASH_MODE='run'
@@ -520,9 +461,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component stats generate_well_statistics"
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:01Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -637,6 +578,91 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "generate_well_statistics save-params"
echo ""
echo "Generate summary statistics from BAM files generated by STAR solo."
echo ""
echo "Arguments:"
echo " --input"
echo " type: file, file must exist"
echo " example: input.bam"
echo " The .bam file as returned by the mapping tool STAR."
echo ""
echo " --barcode"
echo " type: string, required parameter"
echo " The barcode for the well that is being processed. Is only used to add a"
echo " metadata"
echo " column to all output files."
echo ""
echo " --well_id"
echo " type: string, required parameter"
echo " ID of this well. Only used to add a metadata column to the output files."
echo ""
echo " --processedBAMFile"
echo " type: file, output, file must exist"
echo " default: processedBamFile.txt"
echo " Path to a .tsv file listing, per read in the BAM file,"
echo " the value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the"
echo " chromsome to which the read was mapped to."
echo ""
echo " --nrReadsNrGenesPerChrom"
echo " type: file, output, file must exist"
echo " default: nrReadsNrGenesPerChrom.txt"
echo " Path to an output file that contains a .tsv formatted table describing"
echo " per chromosome the number of reads that were mapped to that chromosome"
echo " (NumberOfReads"
echo " column) and the number of genes on that chromosome that had at least one"
echo " read mapped to it (NumberOfGenes)."
echo ""
echo " --nrReadsNrUMIsPerCB"
echo " type: file, output, file must exist"
echo " default: nrReadsNrUMIsPerCB.txt"
echo " Path to an output file that contains a .tsv formatted table describing"
echo " per barcode the number of UMI's (nrUMIs) and the total number of reads"
echo " (NumberOfReads)."
echo ""
echo " --umiFreqTop"
echo " type: file, output, file must exist"
echo " default: umiFreqTop100.txt"
echo " Path to an output file that contains a .tsv formatted table describing"
echo " per UMI (column UB) the frequency at which they occur in the reads"
echo " (column"
echo " N). Only the top 100 UMIs are included."
echo ""
echo " --threads"
echo " type: integer"
echo " default: 1"
echo " min: 1"
echo " Number of threads to use for decompressing BAM files."
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''

View File

@@ -13,12 +13,12 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "string"
name: "--params"
description: "The state to save\n"
name: "--params_yaml"
description: "base64 encoded yaml file containing the state\n"
info: null
required: true
direction: "input"
multiple: true
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
@@ -40,6 +40,9 @@ resources:
description: "Save parameters to a file\n"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -140,8 +143,8 @@ build_info:
engine: "docker|native"
output: "target/executable/utils/save_params"
executable: "target/executable/utils/save_params/save_params"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -151,7 +154,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# save_params save-params
#
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -169,26 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "save_params save-params"
echo ""
echo "Save parameters to a file"
echo ""
echo "Inputs:"
echo " --id"
echo " type: string, required parameter"
echo " The id of the job"
echo ""
echo " --params"
echo " type: string, required parameter, multiple values allowed"
echo " The state to save"
echo ""
echo "Outputs:"
echo " --output"
echo " type: string, required parameter"
echo " The output file"
}
# initialise variables
VIASH_MODE='run'
@@ -473,9 +453,9 @@ RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "pyyaml"
LABEL org.opencontainers.image.description="Companion container for running component utils save_params"
LABEL org.opencontainers.image.created="2025-03-25T15:33:58Z"
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
LABEL org.opencontainers.image.version="save-params"
VIASHDOCKER
@@ -590,6 +570,52 @@ fi
# initialise docker variables
VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "save_params save-params"
echo ""
echo "Save parameters to a file"
echo ""
echo "Inputs:"
echo " --id"
echo " type: string, required parameter"
echo " The id of the job"
echo ""
echo " --params_yaml"
echo " type: string, required parameter"
echo " base64 encoded yaml file containing the state"
echo ""
echo "Outputs:"
echo " --output"
echo " type: string, required parameter"
echo " The output file"
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
echo " ---memory=STRING"
echo " Amount of memory to use. Examples: 4GB, 3MiB."
echo ""
echo "Viash built in Docker:"
echo " ---setup=STRATEGY"
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
echo " Default: ifneedbepullelsecachedbuild"
echo " ---dockerfile"
echo " Print the dockerfile to stdout."
echo " ---docker_run_args=ARG"
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
echo " ---docker_image_id"
echo " Print the docker image id to stdout."
echo " ---debug"
echo " Enter the docker container for debugging purposes."
echo ""
echo "Viash built in Engines:"
echo " ---engine=ENGINE_ID"
echo " Specify the engine to use. Options are: docker, native."
echo " Default: docker"
}
# initialise array
VIASH_POSITIONAL_ARGS=''
@@ -626,21 +652,15 @@ while [[ $# -gt 0 ]]; do
VIASH_PAR_ID=$(ViashRemoveFlags "$1")
shift 1
;;
--params)
if [ -z "$VIASH_PAR_PARAMS" ]; then
VIASH_PAR_PARAMS="$2"
else
VIASH_PAR_PARAMS="$VIASH_PAR_PARAMS;""$2"
fi
[ $# -lt 2 ] && ViashError Not enough arguments passed to --params. Use "--help" to get more information on the parameters. && exit 1
--params_yaml)
[ -n "$VIASH_PAR_PARAMS_YAML" ] && ViashError Bad arguments for option \'--params_yaml\': \'$VIASH_PAR_PARAMS_YAML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_PARAMS_YAML="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --params_yaml. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--params=*)
if [ -z "$VIASH_PAR_PARAMS" ]; then
VIASH_PAR_PARAMS=$(ViashRemoveFlags "$1")
else
VIASH_PAR_PARAMS="$VIASH_PAR_PARAMS;"$(ViashRemoveFlags "$1")
fi
--params_yaml=*)
[ -n "$VIASH_PAR_PARAMS_YAML" ] && ViashError Bad arguments for option \'--params_yaml=*\': \'$VIASH_PAR_PARAMS_YAML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_PARAMS_YAML=$(ViashRemoveFlags "$1")
shift 1
;;
--output)
@@ -830,8 +850,8 @@ if [ -z ${VIASH_PAR_ID+x} ]; then
ViashError '--id' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_PAR_PARAMS+x} ]; then
ViashError '--params' is a required argument. Use "--help" to get more information on the parameters.
if [ -z ${VIASH_PAR_PARAMS_YAML+x} ]; then
ViashError '--params_yaml' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_PAR_OUTPUT+x} ]; then
@@ -1018,12 +1038,13 @@ trap interrupt INT SIGINT
cat > "\$tempscript" << 'VIASHMAIN'
import re
import yaml
import base64
## VIASH START
# The following code has been auto-generated by Viash.
par = {
'id': $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "r'${VIASH_PAR_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'params': $( if [ ! -z ${VIASH_PAR_PARAMS+x} ]; then echo "r'${VIASH_PAR_PARAMS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ),
'params_yaml': $( if [ ! -z ${VIASH_PAR_PARAMS_YAML+x} ]; then echo "r'${VIASH_PAR_PARAMS_YAML//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi )
}
meta = {
@@ -1056,27 +1077,41 @@ class Dumper(yaml.Dumper):
def increase_indent(self, flow=False, indentless=False):
return super(Dumper, self).increase_indent(flow, False)
params = {}
for param in par['params']:
param = param.replace('\$id', par['id'])
key, value = param.split('=')
def decode_params_yaml(encoded_yaml):
# Step 1: Decode from Base64
yaml_bytes = base64.b64decode(encoded_yaml)
array_match = re.match(r'(.+)\\[(\\d+)\\]\$', key)
if array_match:
base_key = array_match.group(1)
index = int(array_match.group(2))
if base_key not in params:
params[base_key] = []
while len(params[base_key]) <= index:
params[base_key].append(None)
params[base_key][index] = value
else:
params[key] = value
# Step 2: Convert bytes to string
yaml_string = yaml_bytes.decode('utf-8')
# Step 3: Extract pattern for Java path objects
# Find pattern: !!sun.nio.fs.UnixPath /path/to/file
pattern = r'!!sun\\.nio\\.fs\\.UnixPath\\s+([^\\n]+)'
# Replace with the actual path string (captured group)
yaml_string = re.sub(pattern, r'\\1', yaml_string)
# Handle any remaining empty UnixPath objects
yaml_string = yaml_string.replace('!!sun.nio.fs.UnixPath {}', '""')
# Step 4: Parse YAML
yaml_data = yaml.safe_load(yaml_string)
return yaml_data
def replace_id(value, sample_id):
if isinstance(value, str):
return value.replace('\$id', sample_id)
elif isinstance(value, list):
return [replace_id(item, sample_id) for item in value]
return value
print(par['params_yaml'])
params = decode_params_yaml(par['params_yaml'])
for key, value in params.items():
params[key] = replace_id(value, par["id"])
with open(par["output"], 'w') as f:
yaml.dump(params, f, default_flow_style=False, Dumper=Dumper)
VIASHMAIN

View File

@@ -94,6 +94,9 @@ test_resources:
path: "mapping_dir"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -182,11 +185,13 @@ engines:
bioc:
- "Seurat"
bioc_force_install: false
warnings_as_errors: true
test_setup:
- type: "r"
cran:
- "testthat"
bioc_force_install: false
warnings_as_errors: true
entrypoint: []
cmd: null
- type: "native"
@@ -197,8 +202,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/eset/create_eset"
executable: "target/nextflow/eset/create_eset/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -208,7 +213,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// create_eset save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1753,33 +1910,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2944,6 +3175,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3048,7 +3283,8 @@ meta = [
"bioc" : [
"Seurat"
],
"bioc_force_install" : false
"bioc_force_install" : false,
"warnings_as_errors" : true
}
],
"test_setup" : [
@@ -3057,7 +3293,8 @@ meta = [
"cran" : [
"testthat"
],
"bioc_force_install" : false
"bioc_force_install" : false,
"warnings_as_errors" : true
}
]
},
@@ -3071,8 +3308,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/eset/create_eset",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3087,7 +3324,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -70,6 +70,9 @@ test_resources:
path: "test_annotation.gtf"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -176,8 +179,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/eset/create_fdata"
executable: "target/nextflow/eset/create_fdata/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -187,7 +190,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// create_fdata save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1753,33 +1910,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2911,6 +3142,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3043,8 +3278,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/eset/create_fdata",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3059,7 +3294,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -84,6 +84,9 @@ test_resources:
path: "starLogs.txt"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -190,8 +193,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/eset/create_pdata"
executable: "target/nextflow/eset/create_pdata/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -201,7 +204,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// create_pdata save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1753,33 +1910,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2925,6 +3156,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3057,8 +3292,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/eset/create_pdata",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3073,7 +3308,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -50,6 +50,9 @@ resources:
description: "This component test the ExpressionSet object as output by the main pipeline."
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -137,6 +140,7 @@ engines:
bioc:
- "Biobase"
bioc_force_install: false
warnings_as_errors: true
entrypoint: []
cmd: null
- type: "native"
@@ -147,8 +151,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/integration_test_components/htrnaseq/check_eset"
executable: "target/nextflow/integration_test_components/htrnaseq/check_eset/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -158,7 +162,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// check_eset save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1752,33 +1909,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2879,6 +3110,10 @@ meta = [
],
"description" : "This component test the ExpressionSet object as output by the main pipeline.",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -2982,7 +3217,8 @@ meta = [
"bioc" : [
"Biobase"
],
"bioc_force_install" : false
"bioc_force_install" : false,
"warnings_as_errors" : true
}
]
},
@@ -2996,8 +3232,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/integration_test_components/htrnaseq/check_eset",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3012,7 +3248,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -57,6 +57,9 @@ resources:
description: "This component test the cutadapt output from the well_demultiplex subworkflow."
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -157,8 +160,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output"
executable: "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -168,7 +171,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// check_cutadapt_output save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1752,33 +1909,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2886,6 +3117,10 @@ meta = [
],
"description" : "This component test the cutadapt output from the well_demultiplex subworkflow.",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3008,8 +3243,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3024,7 +3259,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -5,18 +5,8 @@ argument_groups:
- name: "Input arguments"
arguments:
- type: "file"
name: "--input_r1"
description: "Directory to write R1 fastq data to"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Directory to write R2 fastq data to"
name: "--input"
description: "Directory to write fastq data to"
info: null
must_exist: true
create_parent: true
@@ -47,6 +37,9 @@ resources:
description: "Publish the fastq files per well"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -142,8 +135,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/io/publish_fastqs"
executable: "target/nextflow/io/publish_fastqs/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -153,7 +146,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// publish_fastqs save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2813,19 +3044,8 @@ meta = [
"arguments" : [
{
"type" : "file",
"name" : "--input_r1",
"description" : "Directory to write R1 fastq data to",
"must_exist" : true,
"create_parent" : true,
"required" : true,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--input_r2",
"description" : "Directory to write R2 fastq data to",
"name" : "--input",
"description" : "Directory to write fastq data to",
"must_exist" : true,
"create_parent" : true,
"required" : true,
@@ -2868,6 +3088,10 @@ meta = [
],
"description" : "Publish the fastq files per well",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -2982,8 +3206,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/io/publish_fastqs",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -2998,7 +3222,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3036,8 +3260,7 @@ tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
## VIASH START
# The following code has been auto-generated by Viash.
$( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "${VIASH_PAR_INPUT_R1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_r1='&'#" ; else echo "# par_input_r1="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "${VIASH_PAR_INPUT_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_r2='&'#" ; else echo "# par_input_r2="; fi )
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
@@ -3069,14 +3292,9 @@ mkdir -p "\\$par_output" && echo "\\$par_output created"
echo
echo "Copying files..."
IFS=";" read -ra input_r1 <<<\\$par_input_r1
IFS=";" read -ra input_r2 <<<\\$par_input_r2
IFS=";" read -ra input <<<\\$par_input
for i in "\\${input_r1[@]}"; do
cp -rL "\\$i" "\\$par_output/"
done
for i in "\\${input_r2[@]}"; do
for i in "\\${input[@]}"; do
cp -rL "\\$i" "\\$par_output/"
done
VIASHMAIN

View File

@@ -14,21 +14,11 @@
"properties": {
"input_r1": {
"input": {
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R1 fastq data to",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R1 fastq data to"
}
,
"input_r2": {
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R2 fastq data to",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R2 fastq data to"
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write fastq data to",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write fastq data to"
}

View File

@@ -91,6 +91,9 @@ resources:
description: "Publish the results"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -186,8 +189,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/io/publish_results"
executable: "target/nextflow/io/publish_results/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -197,7 +200,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// publish_results save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2917,6 +3148,10 @@ meta = [
],
"description" : "Publish the results",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3031,8 +3266,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/io/publish_results",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3047,7 +3282,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -164,6 +164,9 @@ test_resources:
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -278,8 +281,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/parallel_map"
executable: "target/nextflow/parallel_map/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -289,7 +292,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// parallel_map save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1753,33 +1910,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -3005,6 +3236,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3143,8 +3378,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/parallel_map",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3159,7 +3394,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3331,7 +3566,8 @@ for barcode_index in "\\${!barcodes[@]}"; do
fi
done
echo "Did not find FASTQ files files for well \\${well_id}! "\\\\
"Make sure that the input files have the correct file name format."
"Make sure that the input files have the correct file name format."\\\\
"Input files: \\${input_r1[@]}"
exit 1
done

View File

@@ -75,6 +75,9 @@ test_resources:
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -184,12 +187,14 @@ engines:
- "install.packages(\"oaStyle\", repos = c(rdepot = \"https://repos.openanalytics.eu/repo/public\"\
, getOption(\"repos\")))"
bioc_force_install: false
warnings_as_errors: true
test_setup:
- type: "r"
packages:
- "testthat"
- "R.utils"
bioc_force_install: false
warnings_as_errors: true
entrypoint: []
cmd: null
- type: "native"
@@ -200,8 +205,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/report/create_report"
executable: "target/nextflow/report/create_report/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -211,7 +216,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// create_report save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1753,33 +1910,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2924,6 +3155,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3052,7 +3287,8 @@ meta = [
"script" : [
"install.packages(\\"oaStyle\\", repos = c(rdepot = \\"https://repos.openanalytics.eu/repo/public\\", getOption(\\"repos\\")))"
],
"bioc_force_install" : false
"bioc_force_install" : false,
"warnings_as_errors" : true
}
],
"test_setup" : [
@@ -3062,7 +3298,8 @@ meta = [
"testthat",
"R.utils"
],
"bioc_force_install" : false
"bioc_force_install" : false,
"warnings_as_errors" : true
}
]
},
@@ -3076,8 +3313,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/report/create_report",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3092,7 +3329,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -91,6 +91,9 @@ test_resources:
path: "test_data"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -197,8 +200,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/stats/combine_star_logs"
executable: "target/nextflow/stats/combine_star_logs/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -208,7 +211,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// combine_star_logs save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1752,33 +1909,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2927,6 +3158,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3059,8 +3294,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/stats/combine_star_logs",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3075,7 +3310,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -75,6 +75,9 @@ test_resources:
is_executable: true
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -181,8 +184,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/stats/generate_pool_statistics"
executable: "target/nextflow/stats/generate_pool_statistics/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -192,7 +195,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// generate_pool_statistics save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1753,33 +1910,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2911,6 +3142,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3043,8 +3278,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/stats/generate_pool_statistics",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3059,7 +3294,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -146,6 +146,9 @@ test_resources:
path: "empty.sam"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -263,8 +266,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/stats/generate_well_statistics"
executable: "target/nextflow/stats/generate_well_statistics/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -274,7 +277,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// generate_well_statistics save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1753,33 +1910,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2992,6 +3223,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3138,8 +3373,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/stats/generate_well_statistics",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3154,7 +3389,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -0,0 +1,193 @@
name: "concatRuns"
namespace: "utils"
version: "save-params"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--input_r1"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--input_r2"
info: null
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--sample_id"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_r1"
description: "Path to read 1 fastq/fasta file"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output_r2"
description: "Path to read 2 fastq/fasta file"
info: null
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: true
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Concatenate well FASTQ files from different runs in order to increase\
\ sequencing depth.\n"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
dependencies:
- name: "concat_text"
repository:
type: "vsh"
repo: "craftbox"
tag: "v0.1.0"
repositories:
- type: "vsh"
name: "cb"
repo: "craftbox"
tag: "v0.1.0"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
- type: "native"
id: "native"
build_info:
config: "src/utils/concatRuns/config.vsh.yaml"
runner: "nextflow"
engine: "native|native"
output: "target/nextflow/utils/concatRuns"
executable: "target/nextflow/utils/concatRuns/main.nf"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/concat_text"
package_config:
name: "htrnaseq"
version: "save-params"
description: "High-throughput pipeline [WIP]\n"
info:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'save-params'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'utils/concatRuns'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'save-params'
description = 'Concatenate well FASTQ files from different runs in order to increase sequencing depth.\n'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}
includeConfig("nextflow_labels.config")

View File

@@ -0,0 +1,108 @@
executor {
$k8s {
submitRateLimit = '10sec'
pollInterval = '1 sec'
}
}
process {
container = 'nextflow/bash:latest'
// default resources
memory = { 8.Gb * task.attempt }
cpus = 8
maxForks = 36
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = 192.GB
// Resource labels
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 8 }
withLabel: midcpu { cpus = 16 }
withLabel: highcpu { cpus = 32 }
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 32.GB * task.attempt ) } }
}
profiles {
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
docker {
docker.fixOwnership = true
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
}
}
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,112 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "concatRuns",
"description": "Concatenate well FASTQ files from different runs in order to increase sequencing depth.\n",
"type": "object",
"definitions": {
"arguments" : {
"title": "Arguments",
"type": "object",
"description": "No description",
"properties": {
"input_r1": {
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. ",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. "
}
,
"input_r2": {
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. ",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. "
}
,
"sample_id": {
"type":
"string",
"description": "Type: `string`, required. ",
"help_text": "Type: `string`, required. "
}
,
"output_r1": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.output_r1_*.output_r1_*`, multiple_sep: `\";\"`. Path to read 1 fastq/fasta file",
"help_text": "Type: List of `file`, default: `$id.$key.output_r1_*.output_r1_*`, multiple_sep: `\";\"`. Path to read 1 fastq/fasta file"
,
"default":"$id.$key.output_r1_*.output_r1_*"
}
,
"output_r2": {
"type":
"string",
"description": "Type: List of `file`, default: `$id.$key.output_r2_*.output_r2_*`, multiple_sep: `\";\"`. Path to read 2 fastq/fasta file",
"help_text": "Type: List of `file`, default: `$id.$key.output_r2_*.output_r2_*`, multiple_sep: `\";\"`. Path to read 2 fastq/fasta file"
,
"default":"$id.$key.output_r2_*.output_r2_*"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -83,6 +83,9 @@ resources:
description: "List the contents of a directory and parse contained fastq files"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -164,8 +167,8 @@ build_info:
engine: "native|native"
output: "target/nextflow/utils/listInputDir"
executable: "target/nextflow/utils/listInputDir/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -175,7 +178,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// listInputDir save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2907,6 +3138,10 @@ meta = [
],
"description" : "List the contents of a directory and parse contained fastq files",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3003,8 +3238,8 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/utils/listInputDir",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3019,7 +3254,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [

View File

@@ -13,12 +13,12 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "string"
name: "--params"
description: "The state to save\n"
name: "--params_yaml"
description: "base64 encoded yaml file containing the state\n"
info: null
required: true
direction: "input"
multiple: true
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
@@ -40,6 +40,9 @@ resources:
description: "Save parameters to a file\n"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -140,8 +143,8 @@ build_info:
engine: "docker|native"
output: "target/nextflow/utils/save_params"
executable: "target/nextflow/utils/save_params/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -151,7 +154,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// save_params save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2822,11 +3053,11 @@ meta = [
},
{
"type" : "string",
"name" : "--params",
"description" : "The state to save\n",
"name" : "--params_yaml",
"description" : "base64 encoded yaml file containing the state\n",
"required" : true,
"direction" : "input",
"multiple" : true,
"multiple" : false,
"multiple_sep" : ";"
}
]
@@ -2860,6 +3091,10 @@ meta = [
],
"description" : "Save parameters to a file\n",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -2982,8 +3217,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "target/nextflow/utils/save_params",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -2998,7 +3233,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3036,12 +3271,13 @@ tempscript=".viash_script.sh"
cat > "$tempscript" << VIASHMAIN
import re
import yaml
import base64
## VIASH START
# The following code has been auto-generated by Viash.
par = {
'id': $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "r'${VIASH_PAR_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'params': $( if [ ! -z ${VIASH_PAR_PARAMS+x} ]; then echo "r'${VIASH_PAR_PARAMS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ),
'params_yaml': $( if [ ! -z ${VIASH_PAR_PARAMS_YAML+x} ]; then echo "r'${VIASH_PAR_PARAMS_YAML//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
}
meta = {
@@ -3074,27 +3310,41 @@ class Dumper(yaml.Dumper):
def increase_indent(self, flow=False, indentless=False):
return super(Dumper, self).increase_indent(flow, False)
params = {}
for param in par['params']:
param = param.replace('\\$id', par['id'])
key, value = param.split('=')
def decode_params_yaml(encoded_yaml):
# Step 1: Decode from Base64
yaml_bytes = base64.b64decode(encoded_yaml)
array_match = re.match(r'(.+)\\\\[(\\\\d+)\\\\]\\$', key)
if array_match:
base_key = array_match.group(1)
index = int(array_match.group(2))
if base_key not in params:
params[base_key] = []
while len(params[base_key]) <= index:
params[base_key].append(None)
params[base_key][index] = value
else:
params[key] = value
# Step 2: Convert bytes to string
yaml_string = yaml_bytes.decode('utf-8')
# Step 3: Extract pattern for Java path objects
# Find pattern: !!sun.nio.fs.UnixPath /path/to/file
pattern = r'!!sun\\\\.nio\\\\.fs\\\\.UnixPath\\\\s+([^\\\\n]+)'
# Replace with the actual path string (captured group)
yaml_string = re.sub(pattern, r'\\\\1', yaml_string)
# Handle any remaining empty UnixPath objects
yaml_string = yaml_string.replace('!!sun.nio.fs.UnixPath {}', '""')
# Step 4: Parse YAML
yaml_data = yaml.safe_load(yaml_string)
return yaml_data
def replace_id(value, sample_id):
if isinstance(value, str):
return value.replace('\\$id', sample_id)
elif isinstance(value, list):
return [replace_id(item, sample_id) for item in value]
return value
print(par['params_yaml'])
params = decode_params_yaml(par['params_yaml'])
for key, value in params.items():
params[key] = replace_id(value, par["id"])
with open(par["output"], 'w') as f:
yaml.dump(params, f, default_flow_style=False, Dumper=Dumper)
VIASHMAIN

View File

@@ -24,11 +24,11 @@
,
"params": {
"params_yaml": {
"type":
"string",
"description": "Type: List of `string`, required, multiple_sep: `\";\"`. The state to save\n",
"help_text": "Type: List of `string`, required, multiple_sep: `\";\"`. The state to save\n"
"description": "Type: `string`, required. base64 encoded yaml file containing the state\n",
"help_text": "Type: `string`, required. base64 encoded yaml file containing the state\n"
}

View File

@@ -20,9 +20,9 @@ argument_groups:
arguments:
- type: "file"
name: "--input_r1"
description: "Forward reads in FASTQ format. Multiple files can be provided which\
\ will\nbe demultiplexed separately before joining the results for each individual\
\ well.\n"
description: "Forward reads in FASTQ format. Multiple files corresponding to different\
\ lanes can be provided which will\nbe demultiplexed separately before joining\
\ the results for each individual well.\n"
info: null
must_exist: true
create_parent: true
@@ -32,9 +32,9 @@ argument_groups:
multiple_sep: ";"
- type: "file"
name: "--input_r2"
description: "Reverse reads in FASTQ format. Multiple files can be provided which\
\ will\nbe demultiplexed separately before joining the results for each individual\
\ well.\n"
description: "Reverse reads in FASTQ format. Multiple files corresponding to different\
\ lanes can be provided which will\nbe demultiplexed separately before joining\
\ the results for each individual well.\n"
info: null
must_exist: true
create_parent: true
@@ -80,26 +80,25 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--sample_id"
description: "Sample ID for the provided input files. If not provided, the value\
\ of --id\nwill be used. Input files will allways be demultiplexed separately,\n\
but the FASTQs for wells with matching sample IDs will be concatenated before\
\ mapping.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output arguments"
arguments:
- type: "file"
name: "--fastq_output_r1"
description: "List of demultiplexed fastq files"
name: "--fastq_output"
description: "Directory containing output fastq files"
info: null
default:
- "fastq/*_R1_001.fastq"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--fastq_output_r2"
description: "List of demultiplexed fastq files"
info: null
default:
- "fastq/*_R2_001.fastq"
- "fastq/*"
must_exist: true
create_parent: true
required: true
@@ -203,6 +202,9 @@ test_resources:
entrypoint: "test_wf2"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -237,6 +239,9 @@ dependencies:
- name: "report/create_report"
repository:
type: "local"
- name: "utils/concatRuns"
repository:
type: "local"
- name: "utils/save_params"
repository:
type: "local"
@@ -325,8 +330,8 @@ build_info:
engine: "native|native"
output: "target/nextflow/workflows/htrnaseq"
executable: "target/nextflow/workflows/htrnaseq/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/nextflow/stats/combine_star_logs"
@@ -339,6 +344,7 @@ build_info:
- "target/nextflow/eset/create_fdata"
- "target/nextflow/eset/create_pdata"
- "target/nextflow/report/create_report"
- "target/nextflow/utils/concatRuns"
- "target/nextflow/utils/save_params"
package_config:
name: "htrnaseq"
@@ -348,7 +354,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// htrnaseq save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1752,33 +1909,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2840,7 +3071,7 @@ meta = [
{
"type" : "file",
"name" : "--input_r1",
"description" : "Forward reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n",
"description" : "Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n",
"must_exist" : true,
"create_parent" : true,
"required" : true,
@@ -2851,7 +3082,7 @@ meta = [
{
"type" : "file",
"name" : "--input_r2",
"description" : "Reverse reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n",
"description" : "Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n",
"must_exist" : true,
"create_parent" : true,
"required" : true,
@@ -2901,6 +3132,15 @@ meta = [
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--sample_id",
"description" : "Sample ID for the provided input files. If not provided, the value of --id\nwill be used. Input files will allways be demultiplexed separately,\nbut the FASTQs for wells with matching sample IDs will be concatenated before mapping.\n",
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
@@ -2909,24 +3149,10 @@ meta = [
"arguments" : [
{
"type" : "file",
"name" : "--fastq_output_r1",
"description" : "List of demultiplexed fastq files",
"name" : "--fastq_output",
"description" : "Directory containing output fastq files",
"default" : [
"fastq/*_R1_001.fastq"
],
"must_exist" : true,
"create_parent" : true,
"required" : true,
"direction" : "output",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--fastq_output_r2",
"description" : "List of demultiplexed fastq files",
"default" : [
"fastq/*_R2_001.fastq"
"fastq/*"
],
"must_exist" : true,
"create_parent" : true,
@@ -3058,6 +3284,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3124,6 +3354,12 @@ meta = [
"type" : "local"
}
},
{
"name" : "utils/concatRuns",
"repository" : {
"type" : "local"
}
},
{
"name" : "utils/save_params",
"repository" : {
@@ -3234,8 +3470,8 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/workflows/htrnaseq",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3250,7 +3486,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3289,70 +3525,154 @@ include { create_eset } from "${meta.resources_dir}/../../../nextflow/eset/creat
include { create_fdata } from "${meta.resources_dir}/../../../nextflow/eset/create_fdata/main.nf"
include { create_pdata } from "${meta.resources_dir}/../../../nextflow/eset/create_pdata/main.nf"
include { create_report } from "${meta.resources_dir}/../../../nextflow/report/create_report/main.nf"
include { concatRuns } from "${meta.resources_dir}/../../../nextflow/utils/concatRuns/main.nf"
include { save_params } from "${meta.resources_dir}/../../../nextflow/utils/save_params/main.nf"
// inner workflow
// user-provided Nextflow code
workflow run_wf {
take:
input_ch
raw_ch
main:
input_ch = raw_ch
// Use the event ID as the default for the sample ID
| map {id, state ->
def sample_id = state.sample_id ?: id
def newState = state + ["sample_id": sample_id, "run_id": id]
return [id, newState]
}
// The featureData only has one requirement: the genome annotation.
// It can be generated straight away.
// It can be generated straight away. Most of the time, there is one shared
// annotation for all of the inputs and the fData should only be calculated once.
// The state is manpulated in such a way that there is one event created per unique
// input annotation file. In turn, the featureData file can joined into the original input
// channel which allows it to be shared across events if required.
f_data_ch = input_ch
| save_params.run(
fromState: {id, state ->
// Convert state to list of key=value parameters
def params_list = []
// Add each parameter as key=value
state.each { key, value ->
if (value != null) {
// Handle different types of values
if (value instanceof Collection) {
// For collections, add multiple entries with array notation
value.eachWithIndex { item, index ->
params_list.add("${key}[${index}]=${item}")
}
} else {
// For simple values, just add key=value
params_list.add("${key}=${value}")
}
}
fromState: {id, state ->
// Define the function before using it
def convertPaths
convertPaths = { value ->
if (value instanceof java.nio.file.Path)
return value.toUriString()
else if (value instanceof Collection)
return value.collect { convertPaths(it) }
else
return value
}
// Apply conversion to all state values
def convertedState = state.collectEntries { k, v -> [(k): convertPaths(v)] }
def yaml = new org.yaml.snakeyaml.Yaml()
def yamlString = yaml.dump(convertedState)
def encodedYaml = yamlString.bytes.encodeBase64().toString()
return [
"id": id,
"params": params_list,
"params_yaml": encodedYaml,
"output": "${id}_parameters.yaml"
]
},
toState: ["parameters": "output"]
)
| toSortedList()
| flatMap {ids_and_states ->
def annotation_files = ids_and_states.inject([:]){ old_state, id_and_state ->
def (id, state) = id_and_state
def annotation_file = state.annotation
def new_state = old_state + [(annotation_file): (old_state.getOrDefault(annotation_file, []) + [id])]
return new_state
}
def file_names = annotation_files.keySet().collect{it.name}
assert (file_names.toSet().size() == file_names.size()):
"Please make sure that the annotation files have unique file names."
def new_states = annotation_files.collect{annotation_file, value ->
def new_state = [annotation_file.name , ["annotation": annotation_file, "event_ids": value]]
return new_state
}
return new_states
}
| create_fdata.run(
directives: [label: ["lowmem", "lowcpu"]],
fromState: [
"gtf": "annotation",
"output": "f_data"
],
toState: {id, result, state -> ["f_data": result.output]}
toState: ["f_data": "output"]
)
| flatMap {_, state ->
def new_states = state.event_ids.collect{event_id ->
[event_id, ["f_data": state.f_data]]
}
return new_states
}
// Perform mapping of each well.
mapping_ch = input_ch
demultiplex_ch = input_ch
| well_demultiplex.run(
fromState: [
"input_r1": "input_r1",
"input_r2": "input_r2",
"barcodesFasta": "barcodesFasta",
],
toState: [
"input_r1": "output_r1",
"input_r2": "output_r2",
]
toState: {id, result, state ->
def all_fastq = result.output_r1 + result.output_r2
def output_dir = all_fastq.collect{it.parent}.unique()
assert output_dir.size() == 1: "Expected output from well demultiplexing to reside into one directory."
def new_state = state + [
"input_r1": result.output_r1,
"input_r2": result.output_r2,
"fastq_output_directory": output_dir[0],
]
return new_state
}
)
fastq_output_directory_ch = demultiplex_ch
| map {id, state ->
def new_event = [state.sample_id, state]
return new_event
}
| groupTuple(by: 0, sort: "hash")
| map {id, states ->
def fastq_output_dirs = states.collect{it.fastq_output_directory}
def new_state = ["fastq_output_directory": fastq_output_dirs]
def new_event = [id, new_state]
return [id, new_state]
}
concat_samples_ch = demultiplex_ch.join(f_data_ch)
| map {id, demutliplex_state, f_data_state ->
def newState = demutliplex_state + ["f_data": f_data_state["f_data"]]
[id, newState]
}
| concatRuns.run(
fromState: [
"input_r1": "input_r1",
"input_r2": "input_r2",
"sample_id": "sample_id",
],
toState: {id, result, state ->
def state_overwite = [
"input_r1": result.output_r1,
"input_r2": result.output_r2,
"_meta": ["join_id": state.run_id]
]
return state + state_overwite
}
)
pool_ch = concat_samples_ch.join(fastq_output_directory_ch)
| map {id, demux_state, fastq_output_directory_state ->
def new_state = demux_state + fastq_output_directory_state
return [id, new_state]
}
| parallel_map.run(
directives: ["label": ["highmem", "lowcpu"]],
fromState: {id, state ->
@@ -3369,9 +3689,6 @@ workflow run_wf {
"star_output": "output",
]
)
// From the mapped wells, create statistics based on the BAM files.
pool_ch = mapping_ch
// Split the events from 1 event per pool into events per well
// and add extra metadata about the wells to the state.
| well_metadata.run(
@@ -3425,10 +3742,10 @@ workflow run_wf {
// Gather the keys from all states. for some state items,
// we need gather all the different items from across the states
def barcodes = states.collect{it.barcode}
assert barcodes.clone().unique().size() == barcodes.size(), \
assert barcodes.clone().unique().size() == barcodes.size(): \
"Error when gathering information for pool ${id}, barcodes are not unique!"
def well_ids = states.collect{it.well_id}
assert well_ids.clone().unique().size() == well_ids.size(), \
assert well_ids.clone().unique().size() == well_ids.size(): \
"Error when gathering information for pool ${id}, well IDs are not unique!"
def custom_state = [
"input_r1": states.collect{it.input_r1},
@@ -3449,7 +3766,7 @@ workflow run_wf {
// All other state should have a unique value
def old_state_items = other_state_keys.inject([:]){ old_state, argument_name ->
argument_values = states.collect{it.get(argument_name)}.unique()
assert argument_values.size() == 1, "Arguments should be the same across modalities. Please report this \
assert argument_values.size() == 1: "Arguments should be the same across modalities. Please report this \
as a bug. Argument name: $argument_name, \
argument value: $argument_values"
def argument_value
@@ -3492,7 +3809,7 @@ workflow run_wf {
]
)
p_data_ch = star_logs_ch.join(pool_statistics_ch, remainder: true)
eset_ch = star_logs_ch.join(pool_statistics_ch, remainder: true)
| map {id, star_logs_state, pool_statistics_state ->
def newState = star_logs_state + ["nrReadsNrGenesPerChromPool": pool_statistics_state.nrReadsNrGenesPerChromPool]
return [id, newState]
@@ -3506,12 +3823,6 @@ workflow run_wf {
],
toState: ["p_data": "output"],
)
eset_ch = p_data_ch.join(f_data_ch, remainder: true)
| map {id, p_data_state, f_data_state ->
def newState = p_data_state + ["f_data": f_data_state["f_data"]]
[id, newState]
}
| create_eset.run(
directives: [label: ["lowmem", "lowcpu"]],
fromState: [
@@ -3553,13 +3864,14 @@ workflow run_wf {
output_ch = eset_ch.join(report_channel)
| map {id, state_eset, state_report ->
def new_state = state_eset + ["html_report": state_report.html_report]
def new_state = state_eset + [
"html_report": state_report.html_report,
]
[id, new_state]
}
| setState([
"star_output": "star_output",
"fastq_output_r1": "input_r1",
"fastq_output_r2": "input_r2",
"star_output": "star_output",
"fastq_output": "fastq_output_directory",
"star_output": "star_output",
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChromPool",
"star_qc_metrics": "star_qc_metrics",
@@ -3567,7 +3879,8 @@ workflow run_wf {
"f_data": "f_data",
"p_data": "p_data",
"html_report": "html_report",
"parameters": "parameters"
"parameters": "parameters",
"_meta": "_meta",
])

View File

@@ -18,7 +18,7 @@
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n"
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n"
}
@@ -28,7 +28,7 @@
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n"
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n"
}
@@ -74,6 +74,16 @@
}
,
"sample_id": {
"type":
"string",
"description": "Type: `string`. Sample ID for the provided input files",
"help_text": "Type: `string`. Sample ID for the provided input files. If not provided, the value of --id\nwill be used. Input files will allways be demultiplexed separately,\nbut the FASTQs for wells with matching sample IDs will be concatenated before mapping.\n"
}
}
},
@@ -85,24 +95,13 @@
"properties": {
"fastq_output_r1": {
"fastq_output": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.fastq_output_r1_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files",
"help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_r1_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files"
"description": "Type: List of `file`, required, default: `$id.$key.fastq_output_*.fastq_output_*`, multiple_sep: `\";\"`. Directory containing output fastq files",
"help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_*.fastq_output_*`, multiple_sep: `\";\"`. Directory containing output fastq files"
,
"default":"$id.$key.fastq_output_r1_*.fastq"
}
,
"fastq_output_r2": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.fastq_output_r2_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files",
"help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_r2_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files"
,
"default":"$id.$key.fastq_output_r2_*.fastq"
"default":"$id.$key.fastq_output_*.fastq_output_*"
}

View File

@@ -12,7 +12,7 @@ argument_groups:
create_parent: true
required: true
direction: "input"
multiple: true
multiple: false
multiple_sep: ";"
- type: "file"
name: "--barcodesFasta"
@@ -123,6 +123,9 @@ resources:
description: "Runner for HT RNA-seq pipeline"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -217,8 +220,8 @@ build_info:
engine: "native|native"
output: "target/nextflow/workflows/runner"
executable: "target/nextflow/workflows/runner/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/nextflow/utils/listInputDir"
@@ -233,7 +236,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// runner save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1749,33 +1906,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2819,7 +3050,7 @@ meta = [
"create_parent" : true,
"required" : true,
"direction" : "input",
"multiple" : true,
"multiple" : false,
"multiple_sep" : ";"
},
{
@@ -2959,6 +3190,10 @@ meta = [
],
"description" : "Runner for HT RNA-seq pipeline",
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3081,8 +3316,8 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/workflows/runner",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3097,7 +3332,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3143,19 +3378,13 @@ workflow run_wf {
input_ch
main:
output_ch = input_ch
// Multiple runs can be provided, and the reads for these runs will
// be concatenated. Here, we gather the FASTQ files from each input directory first.
| flatMap {id, state ->
// Create an input event per input directory
def new_state = state.input.withIndex().collect{input_dir, id_index ->
def state_item = state + ["input": input_dir, "index": id_index, "run_id": id]
return ["${id}_${id_index}".toString(), state_item]
}
return new_state
}
htrnaseq_ch = input_ch
// List the FASTQ files per input directory
// Be careful: an event per lane is created!
| map {id, state ->
def new_state = state + ["run_id": id]
return [id, new_state]
}
| listInputDir.run(
fromState: [
"input": "input",
@@ -3173,13 +3402,11 @@ workflow run_wf {
// there might be multiple FASTQs for a single sample that correspond to the
// lanes. So the fastq files must be gathered across lanes and input folders
// in order to create an input lists for R1 and R2.
| map {id, state -> [state.sample_id, state]}
| groupTuple(by: 0, sort: { state1, state2 ->
if (state1.index == state2.index) {
return state1.lane <=> state2.lane
}
return state1.index <=> state2.index
})
// The ID of the event here is important! It determines the name of the output
// folders for the FASTQ files and these folders are published as-is later.
// The folder where the FASTQ files are stored in should be named after the run ID.
| map {id, state -> ["${state.sample_id}/${state.run_id}".toString(), state]}
| groupTuple(by: 0, sort: "hash")
| map {id, states ->
def new_r1 = states.collect{it.r1_output}
def new_r2 = states.collect{it.r2_output}
@@ -3188,7 +3415,7 @@ workflow run_wf {
// TODO: this can be asserted.
def new_state = states[0] + [
"r1": new_r1,
"r2": new_r2
"r2": new_r2,
]
return [id, new_state]
}
@@ -3197,8 +3424,7 @@ workflow run_wf {
f_data: 'fData/$id.txt',
p_data: 'pData/$id.txt',
star_output: 'star_output/$id/*',
fastq_output_r1: 'fastq/*_R1_001.fastq',
fastq_output_r2: 'fastq/*_R1_001.fastq',
fastq_output: 'fastq/*',
eset: 'esets/$id.rds',
nrReadsNrGenesPerChrom: 'nrReadsNrGenesPerChrom/$id.txt',
star_qc_metrics: 'starLogs/$id.txt',
@@ -3211,32 +3437,32 @@ workflow run_wf {
genomeDir: "genomeDir",
annotation: "annotation",
umi_length: "umi_length",
sample_id: "sample_id",
],
toState: { id, result, state -> state + result }
)
// The HT-RNAseq workflow outputs multiple events, one per 'pool' (usually a plate)
// but for publishing the results, this is not handy because we want to use the $id
// variable as a pointer to the target data.
//
// So, we should combine everything together
//
// project_id / experiment_id / date_workflow
// project_id / experiment_id / "data_processed" / date_workflow
grouped_ch = htrnaseq_ch
| toSortedList
| map{ vs ->
def all_fastqs
[
vs[0][1].run_id, // The original ID
[
star_output: reduce_paths(vs.collect{ it[1].star_output }.flatten()),
fastq_output_r1: reduce_paths(vs.collect{ it[1].fastq_output_r1 }.flatten(), 1),
fastq_output_r2: reduce_paths(vs.collect{ it[1].fastq_output_r2 }.flatten(), 1),
nrReadsNrGenesPerChrom: reduce_paths(vs.collect{ it[1].nrReadsNrGenesPerChrom }),
star_qc_metrics: reduce_paths(vs.collect{ it[1].star_qc_metrics }),
eset: reduce_paths(vs.collect{ it[1].eset }),
f_data: reduce_paths(vs.collect{ it[1].f_data }),
p_data: reduce_paths(vs.collect{ it[1].p_data }),
fastq_output: vs.collect{ it[1].fastq_output }.flatten().unique(),
html_report: vs.collect{ it[1].html_report }[0], // The report is for all pools
plain_output: vs.collect{ it[1].plain_output }[0],
project_id: vs.collect{ it[1].project_id }[0],
@@ -3245,12 +3471,13 @@ workflow run_wf {
]
}
results_publish_ch = grouped_ch
| publish_results.run(
fromState: { id, state ->
def project = (state.plain_output) ? id : "${state.project_id}"
def experiment = (state.plain_output) ? id : "${state.experiment_id}"
def id0 = "${project}/${experiment}"
def id1 = (state.plain_output) ? id : "${id0}/${date}"
def id1 = (state.plain_output) ? id : "${id0}/data_processed/${date}"
def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}"
if (id == id2) {
@@ -3281,14 +3508,24 @@ workflow run_wf {
]
)
fastq_publish_ch = grouped_ch
| flatMap{id, state ->
def new_states = state.fastq_output.collect{fastq_dir ->
def new_id = fastq_dir.name // The folder name corresponds to the run
def fastq_files = fastq_dir.listFiles()
def new_state = [
"fastq_output": fastq_files
]
return [new_id, new_state]
}
return new_states
}
| publish_fastqs.run(
fromState: { id, state ->
def id0 = "${id}"
def id1 = (state.plain_output) ? id : "${id0}/${date}"
def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}"
println(state.plain_output)
if (id == id2) {
println("Publising fastqs to ${params.fastq_publish_dir}")
} else {
@@ -3296,8 +3533,7 @@ workflow run_wf {
}
[
input_r1: state.fastq_output_r1,
input_r2: state.fastq_output_r2,
input: state.fastq_output,
output: "${id2}",
]
},
@@ -3312,7 +3548,7 @@ workflow run_wf {
)
emit:
output_ch
grouped_ch
| map{ id, state -> [ id, [ _meta: [ join_id: state.run_id ] ] ] }
}

View File

@@ -17,8 +17,8 @@
"input": {
"type":
"string",
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`",
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`"
"description": "Type: `file`, required. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`",
"help_text": "Type: `file`, required. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`"
}

View File

@@ -109,6 +109,9 @@ test_resources:
entrypoint: "test_wf2"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -210,8 +213,8 @@ build_info:
engine: "native|native"
output: "target/nextflow/workflows/well_demultiplex"
executable: "target/nextflow/workflows/well_demultiplex/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/cutadapt"
@@ -224,7 +227,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// well_demultiplex save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1753,33 +1910,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2955,6 +3186,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3083,8 +3318,8 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/workflows/well_demultiplex",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3099,7 +3334,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [
@@ -3195,6 +3430,8 @@ workflow run_wf {
output: new_output,
error_rate: 0.10,
demultiplex_mode: "single",
output_r1: state.output_r1,
output_r2: state.output_r2,
]
},
toState: { id, result, state ->

View File

@@ -127,6 +127,9 @@ resources:
dest: "nextflow_labels.config"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
requirements:
commands:
- "ps"
@@ -208,8 +211,8 @@ build_info:
engine: "native|native"
output: "target/nextflow/workflows/well_metadata"
executable: "target/nextflow/workflows/well_metadata/main.nf"
viash_version: "0.9.0"
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
viash_version: "0.9.2"
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
@@ -219,7 +222,7 @@ package_config:
test_resources:
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
dest: "resources_test"
viash_version: "0.9.0"
viash_version: "0.9.2"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// well_metadata save-params
//
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
Map _processInputValues(Map inputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.required) {
if (arg.required && arg.direction == "input") {
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
}
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
return outputs
}
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
class IDChecker {
final def items = [] as Set
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
}
return joinStatesWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
def publishFiles(Map args) {
def key_ = args.get("key")
assert key_ != null : "publishFiles: key must be specified"
workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}
process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}
// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"
def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"
workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
def collectFiles(obj) {
if (obj instanceof java.io.File || obj instanceof Path) {
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]
def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -1752,33 +1909,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
return value_
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}
// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
def key_ = workflowArgs["key"]
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
workflow workflowInstance {
take: input_
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
def chInitialOutput = chArgsWithDefaults
def chInitialOutputMulti = chArgsWithDefaults
| _debug(workflowArgs, "processed")
// run workflow
| innerWorkflowFactory(workflowArgs)
// check output tuple
| map { id_, output_ ->
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
// This number is used to sort the events later when the events are gathered from across the channels.
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
def newChannel = channel
| map {tuple ->
assert tuple instanceof List :
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
def newEvent = [channelIndex] + tuple
return newEvent
}
return newChannel
}
// Put the events into 1 channel, cover case where there is only one channel is emitted
def chInitialOutput = chInitialOutputList.size() > 1 ? \
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
chInitialOutputListWithIndexedEvents[0]
def chInitialOutputProcessed = chInitialOutput
| map { tuple ->
def channelId = tuple[0]
def id_ = tuple[1]
def output_ = tuple[2]
// see if output map contains metadata
def meta_ =
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
output_ = output_.findAll{k, v -> k != "_meta"}
// check value types
output_ = _processOutputValues(output_, meta.config, id_, key_)
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
output_ = output_.values()[0]
}
[join_id, id_, output_]
[join_id, channelId, id_, output_]
}
// | view{"chInitialOutput: ${it.take(3)}"}
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
// output tuple format: [join_id, channel_id, id, new_state, ...]
| map{ tup ->
def new_state = workflowArgs.toState(tup.drop(2).take(3))
tup.take(3) + [new_state] + tup.drop(5)
}
if (workflowArgs.auto.publish == "state") {
def chPublishFiles = chPublishWithPreviousState
// input tuple format: [join_id, channel_id, id, new_state, ...]
// output tuple format: [join_id, channel_id, id, new_state]
| map{ tup ->
tup.take(4)
}
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(2).take(3)
}
| publishFilesByConfig(key: key_, config: meta.config)
}
// Join the state from the events that were emitted from different channels
def chJoined = chInitialOutputProcessed
| map {tuple ->
def join_id = tuple[0]
def channel_id = tuple[1]
def id = tuple[2]
def other = tuple.drop(3)
// Below, groupTuple is used to join the events. To make sure resuming a workflow
// keeps working, the output state must be deterministic. This means the state needs to be
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
// but hashing the state when it is large can be problematic in terms of performance.
// Therefore, a custom comparator function is provided. We add the channel ID to the
// states so that we can use the channel ID to sort the items.
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
// A comparator that is provided to groupTuple's 'sort' argument is applied
// to all elements of the event tuple (that is not the 'id'). The comparator
// closure that is used below expects the input to be List. So the join_id and
// channel_id must also be wrapped in a list.
[[join_id], [channel_id], id] + stateWithChannelID
}
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
| map {join_ids, _, id, statesWithChannelID ->
// Remove the channel IDs from the states
def states = statesWithChannelID.collect{it[1]}
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
def newJoinIdUnique = newJoinId[0]
// Merge the states from the different channels
def newState = states.inject([:]){ old_state, state_to_add ->
return old_state + state_to_add.collectEntries{k, v ->
if (!multipleArgs.contains(k)) {
// if the key is not a multiple argument, we expect only one value
if (old_state.containsKey(k)) {
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
}
[k, v]
} else {
// if the key is a multiple argument, append the different values into one list
def prevValue = old_state.getOrDefault(k, [])
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
[k, prevValueAsList + v]
}
}
}
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
// simplify output if need be
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
newState = newState.values()[0]
}
return [newJoinIdUnique, id, newState]
}
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
// input tuple format: [join_id, id, output, prev_state, ...]
// output tuple format: [join_id, id, new_state, ...]
| map{ tup ->
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
}
if (workflowArgs.auto.publish == "state") {
def chPublish = chNewState
def chPublishStates = chNewState
// input tuple format: [join_id, id, new_state, ...]
// output tuple format: [join_id, id, new_state]
| map{ tup ->
tup.take(3)
}
safeJoin(chPublish, chArgsWithDefaults, key_)
safeJoin(chPublishStates, chArgsWithDefaults, key_)
// input tuple format: [join_id, id, new_state, orig_state, ...]
// output tuple format: [id, new_state, orig_state]
| map { tup ->
tup.drop(1).take(3)
}
}
| publishStatesByConfig(key: key_, config: meta.config)
}
// remove join_id and meta
chReturn = chNewState
| map { tup ->
// input tuple format: [join_id, id, new_state, ...]
@@ -2967,6 +3198,10 @@ meta = [
}
],
"status" : "enabled",
"scope" : {
"image" : "public",
"target" : "public"
},
"requirements" : {
"commands" : [
"ps"
@@ -3063,8 +3298,8 @@ meta = [
"runner" : "nextflow",
"engine" : "native|native",
"output" : "target/nextflow/workflows/well_metadata",
"viash_version" : "0.9.0",
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
"viash_version" : "0.9.2",
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3079,7 +3314,7 @@ meta = [
}
]
},
"viash_version" : "0.9.0",
"viash_version" : "0.9.2",
"source" : "src",
"target" : "target",
"config_mods" : [