Build branch save-params with version save-params (05ac6a3)
Build pipeline: viash-hub.htrnaseq.save-params-h88fh
Source commit: 05ac6a3d24
Source message: updated filepathpath parsing
This commit is contained in:
54
CHANGELOG.md
54
CHANGELOG.md
@@ -1,3 +1,57 @@
|
||||
# htrnaseq v0.7.0
|
||||
|
||||
## Breaking changes
|
||||
|
||||
The `runner` and `htrnaseq` workflow now output FASTQ files corresponding to the barcodes per input ID (per sequencing run).
|
||||
Previously, when multiple input folders or multiple input FASTQ files were provided
|
||||
(for the `runner` and `htrnaseq` workflows respectively), the demultiplexed FASTQ files for these inputs were concatenated
|
||||
and provided as output. For the `htrnaseq` workflow, reads can still be combined by using a newly added `sampleID` argument.
|
||||
This means that two lists of FASTQ files can be provided for a single sample, and by assigning the same `sampleID`,
|
||||
these reads will be joined. For example, with other arguments are left out for brevity:
|
||||
|
||||
```yaml
|
||||
- id: sample1_run1
|
||||
input_r1: [sample_1_L001_1_R1.fastq, sample_1_L002_1_R1.fastq]
|
||||
input_r2: [sample_1_L001_1_R2.fastq, sample_1_L002_1_R2.fastq]
|
||||
sampleID: "sample_1"
|
||||
- id: sample1_run2
|
||||
input_r1: [sample_1_L001_1_R1.fastq, sample_1_L002_1_R1.fastq]
|
||||
input_r2: [sample_1_L001_1_R2.fastq, sample_1_L002_1_R2.fastq]
|
||||
sampleID: "sample_1"
|
||||
- id: sample_2
|
||||
input_r1: [sample_2_L001_1_R1.fastq, sample_2_L002_1_R1.fastq]
|
||||
input_r2: [sample_2_L001_1_R2.fastq, sample_2_L002_1_R2.fastq]
|
||||
```
|
||||
|
||||
For the runner, concatenation of data across samples is automatically inferred. Previously, multiple IDs (events) could be
|
||||
provided which were processed in parallel. This is no longer possible, as providing multiple will cause the matching
|
||||
samples for these runs to be concatenated.
|
||||
|
||||
|
||||
For example, the following old parameter yaml
|
||||
```yaml
|
||||
- id: run1
|
||||
input: ["run_folder_1/", run_folder_2/]
|
||||
```
|
||||
should now be provided as:
|
||||
```yaml
|
||||
- id: run1
|
||||
input: "run_folder_1/"
|
||||
- id: run2
|
||||
input: run_folder_2/
|
||||
```
|
||||
|
||||
## Minor changes
|
||||
|
||||
* Updated viash to `0.9.2` (PR #49)
|
||||
|
||||
# htrnaseq v0.6.0
|
||||
|
||||
## Breaking changes
|
||||
|
||||
* `runner`: a subdirectory `data_processed` is now added to the output structure, in between
|
||||
the experiment ID and the directory with the workflow date and version (PR #45).
|
||||
|
||||
# htrnaseq v0.5.5
|
||||
|
||||
## New functionality
|
||||
|
||||
@@ -7,7 +7,7 @@ links:
|
||||
issue_tracker: https://github.com/viash-hub/htrnaseq/issues
|
||||
repository: https://github.com/viash-hub/htrnaseq
|
||||
|
||||
viash_version: 0.9.0
|
||||
viash_version: 0.9.2
|
||||
|
||||
info:
|
||||
test_resources:
|
||||
|
||||
@@ -8,13 +8,8 @@ mkdir -p "$par_output" && echo "$par_output created"
|
||||
|
||||
echo
|
||||
echo "Copying files..."
|
||||
IFS=";" read -ra input_r1 <<<$par_input_r1
|
||||
IFS=";" read -ra input_r2 <<<$par_input_r2
|
||||
IFS=";" read -ra input <<<$par_input
|
||||
|
||||
for i in "${input_r1[@]}"; do
|
||||
for i in "${input[@]}"; do
|
||||
cp -rL "$i" "$par_output/"
|
||||
done
|
||||
|
||||
for i in "${input_r2[@]}"; do
|
||||
cp -rL "$i" "$par_output/"
|
||||
done
|
||||
done
|
||||
@@ -4,13 +4,8 @@ description: "Publish the fastq files per well"
|
||||
argument_groups:
|
||||
- name: Input arguments
|
||||
arguments:
|
||||
- name: --input_r1
|
||||
description: Directory to write R1 fastq data to
|
||||
type: file
|
||||
multiple: true
|
||||
required: true
|
||||
- name: --input_r2
|
||||
description: Directory to write R2 fastq data to
|
||||
- name: --input
|
||||
description: Directory to write fastq data to
|
||||
type: file
|
||||
multiple: true
|
||||
required: true
|
||||
|
||||
@@ -114,7 +114,8 @@ for barcode_index in "${!barcodes[@]}"; do
|
||||
fi
|
||||
done
|
||||
echo "Did not find FASTQ files files for well ${well_id}! "\
|
||||
"Make sure that the input files have the correct file name format."
|
||||
"Make sure that the input files have the correct file name format."\
|
||||
"Input files: ${input_r1[@]}"
|
||||
exit 1
|
||||
done
|
||||
|
||||
|
||||
43
src/utils/concatRuns/config.vsh.yaml
Normal file
43
src/utils/concatRuns/config.vsh.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
name: concatRuns
|
||||
namespace: utils
|
||||
description: |
|
||||
Concatenate well FASTQ files from different runs in order to increase sequencing depth.
|
||||
arguments:
|
||||
- name: "--input_r1"
|
||||
type: file
|
||||
required: true
|
||||
multiple: true
|
||||
- name: "--input_r2"
|
||||
type: file
|
||||
required: true
|
||||
multiple: true
|
||||
- name: "--sample_id"
|
||||
type: string
|
||||
required: true
|
||||
- name: "--output_r1"
|
||||
type: file
|
||||
multiple: true
|
||||
description: Path to read 1 fastq/fasta file
|
||||
direction: output
|
||||
- name: "--output_r2"
|
||||
type: file
|
||||
multiple: true
|
||||
description: Path to read 2 fastq/fasta file
|
||||
direction: output
|
||||
resources:
|
||||
- type: nextflow_script
|
||||
path: main.nf
|
||||
entrypoint: run_wf
|
||||
dependencies:
|
||||
- name: concat_text
|
||||
repository: cb
|
||||
repositories:
|
||||
- name: cb
|
||||
type: vsh
|
||||
repo: craftbox
|
||||
tag: v0.1.0
|
||||
runners:
|
||||
- type: nextflow
|
||||
|
||||
engines:
|
||||
- type: native
|
||||
128
src/utils/concatRuns/main.nf
Normal file
128
src/utils/concatRuns/main.nf
Normal file
@@ -0,0 +1,128 @@
|
||||
workflow run_wf {
|
||||
|
||||
take:
|
||||
input_ch
|
||||
|
||||
main:
|
||||
// Count the number of input events per sample
|
||||
// Results from events with the same sample ID need to be concatenated.
|
||||
event_counts_ch = input_ch
|
||||
| map {id, state ->
|
||||
def new_state = state + ["event_id": id]
|
||||
def new_event = [state.sample_id, new_state]
|
||||
return new_event
|
||||
}
|
||||
| groupTuple(by: 0)
|
||||
| flatMap { id, states ->
|
||||
def orig_event_ids = states.collect{it.event_id}
|
||||
def new_events = orig_event_ids.collect{ orig_event_id ->
|
||||
[orig_event_id, ["n_events": states.size()]]
|
||||
}
|
||||
return new_events
|
||||
}
|
||||
|
||||
|
||||
// The number of events per sample needs is passed number to `groupTuple()`
|
||||
// so that it can emit the sample as soon as it is ready. This makes sure
|
||||
// that the samples are processed asynchronously.
|
||||
output_ch = input_ch.join(event_counts_ch)
|
||||
| flatMap {id, state_demultiplex, state_event_counts ->
|
||||
assert state_demultiplex.input_r1.size() == state_demultiplex.input_r2.size(),
|
||||
"Expected output from well demultiplexing to contain equal amount or forward and reverse FASTQ files."
|
||||
def new_states = [state_demultiplex.input_r1, state_demultiplex.input_r2].transpose().collect{ fastq_files ->
|
||||
def (r1_file, r2_file) = fastq_files
|
||||
def regex = ~/^(\w+)_R[12]{1}_001\.fastq(\.gz)?$/
|
||||
def parsed_file_name = r1_file.name =~ regex
|
||||
def parsed_file_name_r2 = r2_file.name =~ regex
|
||||
def well_id = parsed_file_name[0][1]
|
||||
def well_id_r2 = parsed_file_name_r2[0][1]
|
||||
|
||||
assert (well_id.length() != 0) && (well_id == well_id_r2)
|
||||
def new_state = state_demultiplex + [
|
||||
"input_r1": r1_file,
|
||||
"input_r2": r2_file,
|
||||
"event_id": id,
|
||||
]
|
||||
def group_settings = groupKey("${state_demultiplex.sample_id}_${well_id}", state_event_counts.n_events)
|
||||
return [group_settings, new_state]
|
||||
|
||||
}
|
||||
return new_states
|
||||
}
|
||||
| groupTuple(by: 0, sort: "hash", remainder: true)
|
||||
| map {group_settings, sample_states ->
|
||||
def input_r1 = sample_states.collect{it.input_r1}.flatten()
|
||||
def input_r2 = sample_states.collect{it.input_r2}.flatten()
|
||||
def event_ids = sample_states.collect{it.event_id}
|
||||
def sample_id_list = sample_states.collect{it.sample_id}.unique()
|
||||
assert sample_id_list.size() == 1
|
||||
def sample_id = sample_id_list[0]
|
||||
assert input_r1.size() == input_r2.size()
|
||||
|
||||
def new_state = [
|
||||
"input_r1": input_r1,
|
||||
"input_r2": input_r2,
|
||||
"event_id": event_ids,
|
||||
"sample_id": sample_id,
|
||||
]
|
||||
return [group_settings.target, new_state]
|
||||
}
|
||||
| concat_text.run(
|
||||
directives: [label: ["lowmem", "lowcpu"]],
|
||||
key: "concat_samples_r1",
|
||||
runIf: {id, state -> state.input_r1.size() > 1},
|
||||
fromState: { id, state ->
|
||||
def output_file_name = state.input_r1[0].name
|
||||
[
|
||||
input: state.input_r1,
|
||||
gzip_output: false,
|
||||
output: output_file_name
|
||||
]
|
||||
},
|
||||
toState: { id, result, state ->
|
||||
def newState = state + [ input_r1: [ result.output ] ]
|
||||
return newState
|
||||
}
|
||||
)
|
||||
| concat_text.run(
|
||||
directives: [label: ["lowmem", "lowcpu"]],
|
||||
key: "concat_samples_r2",
|
||||
runIf: {id, state -> state.input_r2.size() > 1},
|
||||
fromState: { id, state ->
|
||||
def output_file_name = state.input_r2[0].name
|
||||
[
|
||||
input: state.input_r2,
|
||||
gzip_output: false,
|
||||
output: output_file_name
|
||||
]
|
||||
},
|
||||
toState: { id, result, state ->
|
||||
def newState = state + [ input_r2: [ result.output ] ]
|
||||
return newState
|
||||
}
|
||||
)
|
||||
| map {id, state ->
|
||||
def new_state = [state.sample_id, state]
|
||||
return new_state
|
||||
}
|
||||
| groupTuple(by: 0, sort: 'hash')
|
||||
| map {id, states ->
|
||||
def new_state = [
|
||||
"input_r1": states.collect{it.input_r1}.flatten(),
|
||||
"input_r2": states.collect{it.input_r2}.flatten(),
|
||||
"_meta": ["join_id": states[0].event_id[0]]
|
||||
]
|
||||
return [id, new_state]
|
||||
}
|
||||
| setState(
|
||||
[
|
||||
"output_r1": "input_r1",
|
||||
"output_r2": "input_r2",
|
||||
"_meta": "_meta"
|
||||
]
|
||||
)
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
}
|
||||
@@ -11,11 +11,10 @@ argument_groups:
|
||||
The id of the job
|
||||
type: string
|
||||
required: true
|
||||
- name: "--params"
|
||||
- name: "--params_yaml"
|
||||
description: |
|
||||
The state to save
|
||||
base64 encoded yaml file containing the state
|
||||
type: string
|
||||
multiple: true
|
||||
required: true
|
||||
|
||||
- name: Outputs
|
||||
|
||||
@@ -1,26 +1,12 @@
|
||||
import re
|
||||
import yaml
|
||||
import base64
|
||||
|
||||
## VIASH START
|
||||
par = {
|
||||
"id": "sample_one",
|
||||
"params": ['workflow_id=sample_one',
|
||||
'umi_length=10',
|
||||
'fastq_output_r1[0]=fastq/*_R1_001.fastq',
|
||||
'fastq_output_r2[0]=fastq/*_R2_001.fastq',
|
||||
'star_output[0]=star.$id/*',
|
||||
'nrReadsNrGenesPerChrom=nrReadsNrGenesPerChrom.$id.txt',
|
||||
'star_qc_metrics=starLogs.$id.txt',
|
||||
'eset=eset.$id.rds',
|
||||
'f_data=fData.$id.tsv',
|
||||
'p_data=pData.$id.tsv',
|
||||
'html_report=report.$id.html',
|
||||
'input_r1[0]=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R1_001.fastq',
|
||||
'input_r1[1]=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/100k/SRR14730301/VH02001612_S9_R2_001.fastq',
|
||||
'barcodesFasta=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/360-wells-with-ids.fasta',
|
||||
'genomeDir=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/genomeDir/gencode.v41.star.sparse',
|
||||
'annotation=/home/jakubmajercik/Data_Intuitive/htrnaseq/v1/genomeDir/gencode.v41.annotation.gtf.gz'],
|
||||
"output": 'params_out.yaml'
|
||||
"params_yaml": "cGFyYW1zX3lhbWw6IHt9Cg==",
|
||||
"output": "output.yaml"
|
||||
}
|
||||
## VIASH END
|
||||
|
||||
@@ -28,26 +14,41 @@ class Dumper(yaml.Dumper):
|
||||
def increase_indent(self, flow=False, indentless=False):
|
||||
return super(Dumper, self).increase_indent(flow, False)
|
||||
|
||||
params = {}
|
||||
|
||||
for param in par['params']:
|
||||
param = param.replace('$id', par['id'])
|
||||
key, value = param.split('=')
|
||||
def decode_params_yaml(encoded_yaml):
|
||||
# Step 1: Decode from Base64
|
||||
yaml_bytes = base64.b64decode(encoded_yaml)
|
||||
|
||||
array_match = re.match(r'(.+)\[(\d+)\]$', key)
|
||||
if array_match:
|
||||
base_key = array_match.group(1)
|
||||
index = int(array_match.group(2))
|
||||
|
||||
if base_key not in params:
|
||||
params[base_key] = []
|
||||
|
||||
while len(params[base_key]) <= index:
|
||||
params[base_key].append(None)
|
||||
|
||||
params[base_key][index] = value
|
||||
else:
|
||||
params[key] = value
|
||||
|
||||
# Step 2: Convert bytes to string
|
||||
yaml_string = yaml_bytes.decode('utf-8')
|
||||
|
||||
# Step 3: Extract pattern for Java path objects
|
||||
# Find pattern: !!sun.nio.fs.UnixPath /path/to/file
|
||||
pattern = r'!!sun\.nio\.fs\.UnixPath\s+([^\n]+)'
|
||||
|
||||
# Replace with the actual path string (captured group)
|
||||
yaml_string = re.sub(pattern, r'\1', yaml_string)
|
||||
|
||||
# Handle any remaining empty UnixPath objects
|
||||
yaml_string = yaml_string.replace('!!sun.nio.fs.UnixPath {}', '""')
|
||||
|
||||
# Step 4: Parse YAML
|
||||
yaml_data = yaml.safe_load(yaml_string)
|
||||
|
||||
return yaml_data
|
||||
|
||||
def replace_id(value, sample_id):
|
||||
if isinstance(value, str):
|
||||
return value.replace('$id', sample_id)
|
||||
elif isinstance(value, list):
|
||||
return [replace_id(item, sample_id) for item in value]
|
||||
return value
|
||||
|
||||
print(par['params_yaml'])
|
||||
|
||||
params = decode_params_yaml(par['params_yaml'])
|
||||
for key, value in params.items():
|
||||
params[key] = replace_id(value, par["id"])
|
||||
|
||||
with open(par["output"], 'w') as f:
|
||||
yaml.dump(params, f, default_flow_style=False, Dumper=Dumper)
|
||||
yaml.dump(params, f, default_flow_style=False, Dumper=Dumper)
|
||||
|
||||
|
||||
0
src/utils/save_params/test.py
Normal file
0
src/utils/save_params/test.py
Normal file
@@ -8,14 +8,14 @@ argument_groups:
|
||||
arguments:
|
||||
- name: --input_r1
|
||||
description: |
|
||||
Forward reads in FASTQ format. Multiple files can be provided which will
|
||||
Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will
|
||||
be demultiplexed separately before joining the results for each individual well.
|
||||
type: file
|
||||
required: true
|
||||
multiple: true
|
||||
- name: --input_r2
|
||||
description: |
|
||||
Reverse reads in FASTQ format. Multiple files can be provided which will
|
||||
Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will
|
||||
be demultiplexed separately before joining the results for each individual well.
|
||||
type: file
|
||||
required: true
|
||||
@@ -35,22 +35,22 @@ argument_groups:
|
||||
- name: --annotation
|
||||
type: file
|
||||
required: true
|
||||
- name: --sample_id
|
||||
type: string
|
||||
required: false
|
||||
description: |
|
||||
Sample ID for the provided input files. If not provided, the value of --id
|
||||
will be used. Input files will allways be demultiplexed separately,
|
||||
but the FASTQs for wells with matching sample IDs will be concatenated before mapping.
|
||||
- name: Output arguments
|
||||
arguments:
|
||||
- name: --fastq_output_r1
|
||||
description: List of demultiplexed fastq files
|
||||
- name: "--fastq_output"
|
||||
description: "Directory containing output fastq files"
|
||||
type: file
|
||||
direction: output
|
||||
multiple: true
|
||||
required: true
|
||||
default: "fastq/*_R1_001.fastq"
|
||||
- name: --fastq_output_r2
|
||||
description: List of demultiplexed fastq files
|
||||
type: file
|
||||
default: "fastq/*"
|
||||
direction: output
|
||||
multiple: true
|
||||
required: true
|
||||
default: "fastq/*_R2_001.fastq"
|
||||
- name: --star_output
|
||||
description: Output from mapping with STAR
|
||||
type: file
|
||||
@@ -120,6 +120,8 @@ dependencies:
|
||||
repository: local
|
||||
- name: report/create_report
|
||||
repository: local
|
||||
- name: utils/concatRuns
|
||||
repository: local
|
||||
- name: utils/save_params
|
||||
repository: local
|
||||
repositories:
|
||||
|
||||
@@ -1,63 +1,146 @@
|
||||
workflow run_wf {
|
||||
take:
|
||||
input_ch
|
||||
raw_ch
|
||||
|
||||
main:
|
||||
input_ch = raw_ch
|
||||
// Use the event ID as the default for the sample ID
|
||||
| map {id, state ->
|
||||
def sample_id = state.sample_id ?: id
|
||||
def newState = state + ["sample_id": sample_id, "run_id": id]
|
||||
return [id, newState]
|
||||
}
|
||||
|
||||
// The featureData only has one requirement: the genome annotation.
|
||||
// It can be generated straight away.
|
||||
// It can be generated straight away. Most of the time, there is one shared
|
||||
// annotation for all of the inputs and the fData should only be calculated once.
|
||||
// The state is manpulated in such a way that there is one event created per unique
|
||||
// input annotation file. In turn, the featureData file can joined into the original input
|
||||
// channel which allows it to be shared across events if required.
|
||||
f_data_ch = input_ch
|
||||
|
||||
| save_params.run(
|
||||
fromState: {id, state ->
|
||||
// Convert state to list of key=value parameters
|
||||
def params_list = []
|
||||
|
||||
// Add each parameter as key=value
|
||||
state.each { key, value ->
|
||||
if (value != null) {
|
||||
// Handle different types of values
|
||||
if (value instanceof Collection) {
|
||||
// For collections, add multiple entries with array notation
|
||||
value.eachWithIndex { item, index ->
|
||||
params_list.add("${key}[${index}]=${item}")
|
||||
}
|
||||
} else {
|
||||
// For simple values, just add key=value
|
||||
params_list.add("${key}=${value}")
|
||||
}
|
||||
}
|
||||
fromState: {id, state ->
|
||||
// Define the function before using it
|
||||
def convertPaths
|
||||
convertPaths = { value ->
|
||||
if (value instanceof java.nio.file.Path)
|
||||
return value.toUriString()
|
||||
else if (value instanceof Collection)
|
||||
return value.collect { convertPaths(it) }
|
||||
else
|
||||
return value
|
||||
}
|
||||
|
||||
// Apply conversion to all state values
|
||||
def convertedState = state.collectEntries { k, v -> [(k): convertPaths(v)] }
|
||||
|
||||
def yaml = new org.yaml.snakeyaml.Yaml()
|
||||
def yamlString = yaml.dump(convertedState)
|
||||
def encodedYaml = yamlString.bytes.encodeBase64().toString()
|
||||
|
||||
return [
|
||||
"id": id,
|
||||
"params": params_list,
|
||||
"params_yaml": encodedYaml,
|
||||
"output": "${id}_parameters.yaml"
|
||||
]
|
||||
},
|
||||
toState: ["parameters": "output"]
|
||||
)
|
||||
|
||||
| toSortedList()
|
||||
| flatMap {ids_and_states ->
|
||||
def annotation_files = ids_and_states.inject([:]){ old_state, id_and_state ->
|
||||
def (id, state) = id_and_state
|
||||
def annotation_file = state.annotation
|
||||
def new_state = old_state + [(annotation_file): (old_state.getOrDefault(annotation_file, []) + [id])]
|
||||
return new_state
|
||||
}
|
||||
def file_names = annotation_files.keySet().collect{it.name}
|
||||
assert (file_names.toSet().size() == file_names.size()):
|
||||
"Please make sure that the annotation files have unique file names."
|
||||
def new_states = annotation_files.collect{annotation_file, value ->
|
||||
def new_state = [annotation_file.name , ["annotation": annotation_file, "event_ids": value]]
|
||||
return new_state
|
||||
}
|
||||
return new_states
|
||||
}
|
||||
| create_fdata.run(
|
||||
directives: [label: ["lowmem", "lowcpu"]],
|
||||
fromState: [
|
||||
"gtf": "annotation",
|
||||
"output": "f_data"
|
||||
],
|
||||
toState: {id, result, state -> ["f_data": result.output]}
|
||||
toState: ["f_data": "output"]
|
||||
)
|
||||
| flatMap {_, state ->
|
||||
def new_states = state.event_ids.collect{event_id ->
|
||||
[event_id, ["f_data": state.f_data]]
|
||||
}
|
||||
return new_states
|
||||
}
|
||||
|
||||
// Perform mapping of each well.
|
||||
mapping_ch = input_ch
|
||||
demultiplex_ch = input_ch
|
||||
| well_demultiplex.run(
|
||||
fromState: [
|
||||
"input_r1": "input_r1",
|
||||
"input_r2": "input_r2",
|
||||
"barcodesFasta": "barcodesFasta",
|
||||
],
|
||||
toState: [
|
||||
"input_r1": "output_r1",
|
||||
"input_r2": "output_r2",
|
||||
]
|
||||
toState: {id, result, state ->
|
||||
def all_fastq = result.output_r1 + result.output_r2
|
||||
def output_dir = all_fastq.collect{it.parent}.unique()
|
||||
assert output_dir.size() == 1: "Expected output from well demultiplexing to reside into one directory."
|
||||
def new_state = state + [
|
||||
"input_r1": result.output_r1,
|
||||
"input_r2": result.output_r2,
|
||||
"fastq_output_directory": output_dir[0],
|
||||
]
|
||||
return new_state
|
||||
}
|
||||
)
|
||||
|
||||
fastq_output_directory_ch = demultiplex_ch
|
||||
| map {id, state ->
|
||||
def new_event = [state.sample_id, state]
|
||||
return new_event
|
||||
}
|
||||
| groupTuple(by: 0, sort: "hash")
|
||||
| map {id, states ->
|
||||
def fastq_output_dirs = states.collect{it.fastq_output_directory}
|
||||
def new_state = ["fastq_output_directory": fastq_output_dirs]
|
||||
def new_event = [id, new_state]
|
||||
return [id, new_state]
|
||||
}
|
||||
|
||||
|
||||
concat_samples_ch = demultiplex_ch.join(f_data_ch)
|
||||
| map {id, demutliplex_state, f_data_state ->
|
||||
def newState = demutliplex_state + ["f_data": f_data_state["f_data"]]
|
||||
[id, newState]
|
||||
}
|
||||
| concatRuns.run(
|
||||
fromState: [
|
||||
"input_r1": "input_r1",
|
||||
"input_r2": "input_r2",
|
||||
"sample_id": "sample_id",
|
||||
],
|
||||
toState: {id, result, state ->
|
||||
def state_overwite = [
|
||||
"input_r1": result.output_r1,
|
||||
"input_r2": result.output_r2,
|
||||
"_meta": ["join_id": state.run_id]
|
||||
]
|
||||
return state + state_overwite
|
||||
}
|
||||
)
|
||||
|
||||
pool_ch = concat_samples_ch.join(fastq_output_directory_ch)
|
||||
| map {id, demux_state, fastq_output_directory_state ->
|
||||
def new_state = demux_state + fastq_output_directory_state
|
||||
return [id, new_state]
|
||||
}
|
||||
| parallel_map.run(
|
||||
directives: ["label": ["highmem", "lowcpu"]],
|
||||
fromState: {id, state ->
|
||||
@@ -74,9 +157,6 @@ workflow run_wf {
|
||||
"star_output": "output",
|
||||
]
|
||||
)
|
||||
|
||||
// From the mapped wells, create statistics based on the BAM files.
|
||||
pool_ch = mapping_ch
|
||||
// Split the events from 1 event per pool into events per well
|
||||
// and add extra metadata about the wells to the state.
|
||||
| well_metadata.run(
|
||||
@@ -130,10 +210,10 @@ workflow run_wf {
|
||||
// Gather the keys from all states. for some state items,
|
||||
// we need gather all the different items from across the states
|
||||
def barcodes = states.collect{it.barcode}
|
||||
assert barcodes.clone().unique().size() == barcodes.size(), \
|
||||
assert barcodes.clone().unique().size() == barcodes.size(): \
|
||||
"Error when gathering information for pool ${id}, barcodes are not unique!"
|
||||
def well_ids = states.collect{it.well_id}
|
||||
assert well_ids.clone().unique().size() == well_ids.size(), \
|
||||
assert well_ids.clone().unique().size() == well_ids.size(): \
|
||||
"Error when gathering information for pool ${id}, well IDs are not unique!"
|
||||
def custom_state = [
|
||||
"input_r1": states.collect{it.input_r1},
|
||||
@@ -154,7 +234,7 @@ workflow run_wf {
|
||||
// All other state should have a unique value
|
||||
def old_state_items = other_state_keys.inject([:]){ old_state, argument_name ->
|
||||
argument_values = states.collect{it.get(argument_name)}.unique()
|
||||
assert argument_values.size() == 1, "Arguments should be the same across modalities. Please report this \
|
||||
assert argument_values.size() == 1: "Arguments should be the same across modalities. Please report this \
|
||||
as a bug. Argument name: $argument_name, \
|
||||
argument value: $argument_values"
|
||||
def argument_value
|
||||
@@ -197,7 +277,7 @@ workflow run_wf {
|
||||
]
|
||||
)
|
||||
|
||||
p_data_ch = star_logs_ch.join(pool_statistics_ch, remainder: true)
|
||||
eset_ch = star_logs_ch.join(pool_statistics_ch, remainder: true)
|
||||
| map {id, star_logs_state, pool_statistics_state ->
|
||||
def newState = star_logs_state + ["nrReadsNrGenesPerChromPool": pool_statistics_state.nrReadsNrGenesPerChromPool]
|
||||
return [id, newState]
|
||||
@@ -211,12 +291,6 @@ workflow run_wf {
|
||||
],
|
||||
toState: ["p_data": "output"],
|
||||
)
|
||||
|
||||
eset_ch = p_data_ch.join(f_data_ch, remainder: true)
|
||||
| map {id, p_data_state, f_data_state ->
|
||||
def newState = p_data_state + ["f_data": f_data_state["f_data"]]
|
||||
[id, newState]
|
||||
}
|
||||
| create_eset.run(
|
||||
directives: [label: ["lowmem", "lowcpu"]],
|
||||
fromState: [
|
||||
@@ -258,13 +332,14 @@ workflow run_wf {
|
||||
|
||||
output_ch = eset_ch.join(report_channel)
|
||||
| map {id, state_eset, state_report ->
|
||||
def new_state = state_eset + ["html_report": state_report.html_report]
|
||||
def new_state = state_eset + [
|
||||
"html_report": state_report.html_report,
|
||||
]
|
||||
[id, new_state]
|
||||
}
|
||||
| setState([
|
||||
"star_output": "star_output",
|
||||
"fastq_output_r1": "input_r1",
|
||||
"fastq_output_r2": "input_r2",
|
||||
"star_output": "star_output",
|
||||
"fastq_output": "fastq_output_directory",
|
||||
"star_output": "star_output",
|
||||
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChromPool",
|
||||
"star_qc_metrics": "star_qc_metrics",
|
||||
@@ -272,7 +347,8 @@ workflow run_wf {
|
||||
"f_data": "f_data",
|
||||
"p_data": "p_data",
|
||||
"html_report": "html_report",
|
||||
"parameters": "parameters"
|
||||
"parameters": "parameters",
|
||||
"_meta": "_meta",
|
||||
])
|
||||
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ argument_groups:
|
||||
arguments:
|
||||
- name: --input
|
||||
description: Base directory of the form `s3:/<bucket>/Sequencing/<Sequencer>/<RunID>/<demultiplex_dir>`
|
||||
multiple: true
|
||||
type: file
|
||||
required: true
|
||||
- name: --barcodesFasta
|
||||
|
||||
@@ -8,19 +8,13 @@ workflow run_wf {
|
||||
input_ch
|
||||
|
||||
main:
|
||||
output_ch = input_ch
|
||||
// Multiple runs can be provided, and the reads for these runs will
|
||||
// be concatenated. Here, we gather the FASTQ files from each input directory first.
|
||||
| flatMap {id, state ->
|
||||
// Create an input event per input directory
|
||||
def new_state = state.input.withIndex().collect{input_dir, id_index ->
|
||||
def state_item = state + ["input": input_dir, "index": id_index, "run_id": id]
|
||||
return ["${id}_${id_index}".toString(), state_item]
|
||||
}
|
||||
return new_state
|
||||
}
|
||||
htrnaseq_ch = input_ch
|
||||
// List the FASTQ files per input directory
|
||||
// Be careful: an event per lane is created!
|
||||
| map {id, state ->
|
||||
def new_state = state + ["run_id": id]
|
||||
return [id, new_state]
|
||||
}
|
||||
| listInputDir.run(
|
||||
fromState: [
|
||||
"input": "input",
|
||||
@@ -38,13 +32,11 @@ workflow run_wf {
|
||||
// there might be multiple FASTQs for a single sample that correspond to the
|
||||
// lanes. So the fastq files must be gathered across lanes and input folders
|
||||
// in order to create an input lists for R1 and R2.
|
||||
| map {id, state -> [state.sample_id, state]}
|
||||
| groupTuple(by: 0, sort: { state1, state2 ->
|
||||
if (state1.index == state2.index) {
|
||||
return state1.lane <=> state2.lane
|
||||
}
|
||||
return state1.index <=> state2.index
|
||||
})
|
||||
// The ID of the event here is important! It determines the name of the output
|
||||
// folders for the FASTQ files and these folders are published as-is later.
|
||||
// The folder where the FASTQ files are stored in should be named after the run ID.
|
||||
| map {id, state -> ["${state.sample_id}/${state.run_id}".toString(), state]}
|
||||
| groupTuple(by: 0, sort: "hash")
|
||||
| map {id, states ->
|
||||
def new_r1 = states.collect{it.r1_output}
|
||||
def new_r2 = states.collect{it.r2_output}
|
||||
@@ -53,7 +45,7 @@ workflow run_wf {
|
||||
// TODO: this can be asserted.
|
||||
def new_state = states[0] + [
|
||||
"r1": new_r1,
|
||||
"r2": new_r2
|
||||
"r2": new_r2,
|
||||
]
|
||||
return [id, new_state]
|
||||
}
|
||||
@@ -62,8 +54,7 @@ workflow run_wf {
|
||||
f_data: 'fData/$id.txt',
|
||||
p_data: 'pData/$id.txt',
|
||||
star_output: 'star_output/$id/*',
|
||||
fastq_output_r1: 'fastq/*_R1_001.fastq',
|
||||
fastq_output_r2: 'fastq/*_R1_001.fastq',
|
||||
fastq_output: 'fastq/*',
|
||||
eset: 'esets/$id.rds',
|
||||
nrReadsNrGenesPerChrom: 'nrReadsNrGenesPerChrom/$id.txt',
|
||||
star_qc_metrics: 'starLogs/$id.txt',
|
||||
@@ -76,32 +67,32 @@ workflow run_wf {
|
||||
genomeDir: "genomeDir",
|
||||
annotation: "annotation",
|
||||
umi_length: "umi_length",
|
||||
sample_id: "sample_id",
|
||||
],
|
||||
toState: { id, result, state -> state + result }
|
||||
)
|
||||
|
||||
// The HT-RNAseq workflow outputs multiple events, one per 'pool' (usually a plate)
|
||||
// but for publishing the results, this is not handy because we want to use the $id
|
||||
// variable as a pointer to the target data.
|
||||
//
|
||||
// So, we should combine everything together
|
||||
//
|
||||
// project_id / experiment_id / date_workflow
|
||||
|
||||
// project_id / experiment_id / "data_processed" / date_workflow
|
||||
grouped_ch = htrnaseq_ch
|
||||
| toSortedList
|
||||
|
||||
| map{ vs ->
|
||||
def all_fastqs
|
||||
[
|
||||
vs[0][1].run_id, // The original ID
|
||||
[
|
||||
star_output: reduce_paths(vs.collect{ it[1].star_output }.flatten()),
|
||||
fastq_output_r1: reduce_paths(vs.collect{ it[1].fastq_output_r1 }.flatten(), 1),
|
||||
fastq_output_r2: reduce_paths(vs.collect{ it[1].fastq_output_r2 }.flatten(), 1),
|
||||
nrReadsNrGenesPerChrom: reduce_paths(vs.collect{ it[1].nrReadsNrGenesPerChrom }),
|
||||
star_qc_metrics: reduce_paths(vs.collect{ it[1].star_qc_metrics }),
|
||||
eset: reduce_paths(vs.collect{ it[1].eset }),
|
||||
f_data: reduce_paths(vs.collect{ it[1].f_data }),
|
||||
p_data: reduce_paths(vs.collect{ it[1].p_data }),
|
||||
fastq_output: vs.collect{ it[1].fastq_output }.flatten().unique(),
|
||||
html_report: vs.collect{ it[1].html_report }[0], // The report is for all pools
|
||||
plain_output: vs.collect{ it[1].plain_output }[0],
|
||||
project_id: vs.collect{ it[1].project_id }[0],
|
||||
@@ -110,12 +101,13 @@ workflow run_wf {
|
||||
]
|
||||
}
|
||||
|
||||
results_publish_ch = grouped_ch
|
||||
| publish_results.run(
|
||||
fromState: { id, state ->
|
||||
def project = (state.plain_output) ? id : "${state.project_id}"
|
||||
def experiment = (state.plain_output) ? id : "${state.experiment_id}"
|
||||
def id0 = "${project}/${experiment}"
|
||||
def id1 = (state.plain_output) ? id : "${id0}/${date}"
|
||||
def id1 = (state.plain_output) ? id : "${id0}/data_processed/${date}"
|
||||
def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}"
|
||||
|
||||
if (id == id2) {
|
||||
@@ -146,14 +138,24 @@ workflow run_wf {
|
||||
]
|
||||
)
|
||||
|
||||
fastq_publish_ch = grouped_ch
|
||||
| flatMap{id, state ->
|
||||
def new_states = state.fastq_output.collect{fastq_dir ->
|
||||
def new_id = fastq_dir.name // The folder name corresponds to the run
|
||||
def fastq_files = fastq_dir.listFiles()
|
||||
def new_state = [
|
||||
"fastq_output": fastq_files
|
||||
]
|
||||
return [new_id, new_state]
|
||||
}
|
||||
return new_states
|
||||
}
|
||||
| publish_fastqs.run(
|
||||
fromState: { id, state ->
|
||||
def id0 = "${id}"
|
||||
def id1 = (state.plain_output) ? id : "${id0}/${date}"
|
||||
def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}"
|
||||
|
||||
println(state.plain_output)
|
||||
|
||||
if (id == id2) {
|
||||
println("Publising fastqs to ${params.fastq_publish_dir}")
|
||||
} else {
|
||||
@@ -161,8 +163,7 @@ workflow run_wf {
|
||||
}
|
||||
|
||||
[
|
||||
input_r1: state.fastq_output_r1,
|
||||
input_r2: state.fastq_output_r2,
|
||||
input: state.fastq_output,
|
||||
output: "${id2}",
|
||||
]
|
||||
},
|
||||
@@ -177,7 +178,7 @@ workflow run_wf {
|
||||
)
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
grouped_ch
|
||||
| map{ id, state -> [ id, [ _meta: [ join_id: state.run_id ] ] ] }
|
||||
}
|
||||
|
||||
|
||||
@@ -60,6 +60,8 @@ workflow run_wf {
|
||||
output: new_output,
|
||||
error_rate: 0.10,
|
||||
demultiplex_mode: "single",
|
||||
output_r1: state.output_r1,
|
||||
output_r2: state.output_r2,
|
||||
]
|
||||
},
|
||||
toState: { id, result, state ->
|
||||
|
||||
@@ -94,6 +94,9 @@ test_resources:
|
||||
path: "mapping_dir"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -182,11 +185,13 @@ engines:
|
||||
bioc:
|
||||
- "Seurat"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
test_setup:
|
||||
- type: "r"
|
||||
cran:
|
||||
- "testthat"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
@@ -197,8 +202,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/eset/create_eset"
|
||||
executable: "target/executable/eset/create_eset/create_eset"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -208,7 +213,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# create_eset save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -173,27 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "create_eset save-params"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --pDataFile"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo ""
|
||||
echo " --fDataFile"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo ""
|
||||
echo " --mappingDir"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --poolName"
|
||||
echo " type: string, required parameter"
|
||||
echo ""
|
||||
echo " --output"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo " default: eset.\$id.rds"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -470,16 +449,16 @@ function ViashDockerfile {
|
||||
cat << 'VIASHDOCKER'
|
||||
FROM rocker/r2u:24.04
|
||||
ENTRYPOINT []
|
||||
RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
|
||||
Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
|
||||
Rscript -e 'if (!requireNamespace("Seurat", quietly = TRUE)) BiocManager::install("Seurat")' && \
|
||||
Rscript -e 'remotes::install_cran(c("data.table", "nlcv"), repos = "https://cran.rstudio.com")'
|
||||
RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
|
||||
Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
|
||||
Rscript -e 'options(warn = 2); if (!requireNamespace("Seurat", quietly = TRUE)) BiocManager::install("Seurat")' && \
|
||||
Rscript -e 'options(warn = 2); remotes::install_cran(c("data.table", "nlcv"), repos = "https://cran.rstudio.com")'
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component eset create_eset"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:57Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:02Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -594,6 +573,53 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "create_eset save-params"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --pDataFile"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo ""
|
||||
echo " --fDataFile"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo ""
|
||||
echo " --mappingDir"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --poolName"
|
||||
echo " type: string, required parameter"
|
||||
echo ""
|
||||
echo " --output"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo " default: eset.\$id.rds"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -70,6 +70,9 @@ test_resources:
|
||||
path: "test_annotation.gtf"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -176,8 +179,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/eset/create_fdata"
|
||||
executable: "target/executable/eset/create_fdata/create_fdata"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -187,7 +190,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# create_fdata save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -173,26 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "create_fdata save-params"
|
||||
echo ""
|
||||
echo "Create a fdata file"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --gtf"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Genome annotation file in GTF format."
|
||||
echo ""
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: fData.\$id.txt"
|
||||
echo " Tab-delimited text file containing information about the 'gene' or"
|
||||
echo " 'transcript'"
|
||||
echo " entries from the input GTF file. The 'transcript' entries are used in"
|
||||
echo " case the source"
|
||||
echo " of the GTF was 'refGene' or 'ncbiRefSeq'."
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -478,9 +458,9 @@ RUN pip install --upgrade pip && \
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component eset create_fdata"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:57Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:02Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -595,6 +575,52 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "create_fdata save-params"
|
||||
echo ""
|
||||
echo "Create a fdata file"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --gtf"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Genome annotation file in GTF format."
|
||||
echo ""
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: fData.\$id.txt"
|
||||
echo " Tab-delimited text file containing information about the 'gene' or"
|
||||
echo " 'transcript'"
|
||||
echo " entries from the input GTF file. The 'transcript' entries are used in"
|
||||
echo " case the source"
|
||||
echo " of the GTF was 'refGene' or 'ncbiRefSeq'."
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -84,6 +84,9 @@ test_resources:
|
||||
path: "starLogs.txt"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -190,8 +193,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/eset/create_pdata"
|
||||
executable: "target/executable/eset/create_pdata/create_pdata"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -201,7 +204,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# create_pdata save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -173,36 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "create_pdata save-params"
|
||||
echo ""
|
||||
echo "Create a pdata file by combining the mapping statistics"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --star_stats_file"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Tab-delimited text file containing statistics (per column) that were"
|
||||
echo " generated"
|
||||
echo " from the STAR log files (Log.final.out, Summary.csv,"
|
||||
echo " ReadsPerGene.out.tab)."
|
||||
echo " Each entry (row) in the file describes the values for one well"
|
||||
echo " (barcode)."
|
||||
echo ""
|
||||
echo " --nrReadsNrGenesPerChromPool"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Pivot table in tsv format of the combined nrReadsNrGenesPerChrom files"
|
||||
echo " from STAR."
|
||||
echo " Describes per chromosome (as columns) the number of reads, as well as"
|
||||
echo " the total number"
|
||||
echo " of reads per cell barcode and the percentage of nuclear, ERCC and"
|
||||
echo " mitochondrial"
|
||||
echo " reads."
|
||||
echo ""
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: pData.\$id.txt"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -488,9 +458,9 @@ RUN pip install --upgrade pip && \
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component eset create_pdata"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:57Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -605,6 +575,62 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "create_pdata save-params"
|
||||
echo ""
|
||||
echo "Create a pdata file by combining the mapping statistics"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --star_stats_file"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Tab-delimited text file containing statistics (per column) that were"
|
||||
echo " generated"
|
||||
echo " from the STAR log files (Log.final.out, Summary.csv,"
|
||||
echo " ReadsPerGene.out.tab)."
|
||||
echo " Each entry (row) in the file describes the values for one well"
|
||||
echo " (barcode)."
|
||||
echo ""
|
||||
echo " --nrReadsNrGenesPerChromPool"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Pivot table in tsv format of the combined nrReadsNrGenesPerChrom files"
|
||||
echo " from STAR."
|
||||
echo " Describes per chromosome (as columns) the number of reads, as well as"
|
||||
echo " the total number"
|
||||
echo " of reads per cell barcode and the percentage of nuclear, ERCC and"
|
||||
echo " mitochondrial"
|
||||
echo " reads."
|
||||
echo ""
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: pData.\$id.txt"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -50,6 +50,9 @@ resources:
|
||||
description: "This component test the ExpressionSet object as output by the main pipeline."
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -137,6 +140,7 @@ engines:
|
||||
bioc:
|
||||
- "Biobase"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
@@ -147,8 +151,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integration_test_components/htrnaseq/check_eset"
|
||||
executable: "target/executable/integration_test_components/htrnaseq/check_eset/check_eset"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -158,7 +162,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# check_eset save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -172,21 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "check_eset save-params"
|
||||
echo ""
|
||||
echo "This component test the ExpressionSet object as output by the main pipeline."
|
||||
echo ""
|
||||
echo "Inputs:"
|
||||
echo " --eset"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " example: eset.rds"
|
||||
echo " Path to an ExpressionSet object."
|
||||
echo ""
|
||||
echo " --star_output"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -463,16 +448,16 @@ function ViashDockerfile {
|
||||
cat << 'VIASHDOCKER'
|
||||
FROM bioconductor/bioconductor_docker:3.19
|
||||
ENTRYPOINT []
|
||||
RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
|
||||
Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
|
||||
Rscript -e 'if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \
|
||||
Rscript -e 'remotes::install_cran(c("bit64"), repos = "https://cran.rstudio.com")'
|
||||
RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
|
||||
Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
|
||||
Rscript -e 'options(warn = 2); if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \
|
||||
Rscript -e 'options(warn = 2); remotes::install_cran(c("bit64"), repos = "https://cran.rstudio.com")'
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component integration_test_components/htrnaseq check_eset"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:01Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -587,6 +572,47 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "check_eset save-params"
|
||||
echo ""
|
||||
echo "This component test the ExpressionSet object as output by the main pipeline."
|
||||
echo ""
|
||||
echo "Inputs:"
|
||||
echo " --eset"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " example: eset.rds"
|
||||
echo " Path to an ExpressionSet object."
|
||||
echo ""
|
||||
echo " --star_output"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -57,6 +57,9 @@ resources:
|
||||
description: "This component test the cutadapt output from the well_demultiplex subworkflow."
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -157,8 +160,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output"
|
||||
executable: "target/executable/integration_test_components/well_demultiplexing/check_cutadapt_output/check_cutadapt_output"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -168,7 +171,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# check_cutadapt_output save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -172,25 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "check_cutadapt_output save-params"
|
||||
echo ""
|
||||
echo "This component test the cutadapt output from the well_demultiplex subworkflow."
|
||||
echo ""
|
||||
echo "Inputs:"
|
||||
echo " --fastq_r1"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Path to the forward reads to test."
|
||||
echo ""
|
||||
echo " --fastq_r2"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Path to the reverse reads to test."
|
||||
echo ""
|
||||
echo " --ids"
|
||||
echo " type: string, required parameter, multiple values allowed"
|
||||
echo " Well IDs for the corresponding fastq input"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -476,9 +457,9 @@ RUN pip install --upgrade pip && \
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component integration_test_components/well_demultiplexing check_cutadapt_output"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:58Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -593,6 +574,51 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "check_cutadapt_output save-params"
|
||||
echo ""
|
||||
echo "This component test the cutadapt output from the well_demultiplex subworkflow."
|
||||
echo ""
|
||||
echo "Inputs:"
|
||||
echo " --fastq_r1"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Path to the forward reads to test."
|
||||
echo ""
|
||||
echo " --fastq_r2"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Path to the reverse reads to test."
|
||||
echo ""
|
||||
echo " --ids"
|
||||
echo " type: string, required parameter, multiple values allowed"
|
||||
echo " Well IDs for the corresponding fastq input"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -5,18 +5,8 @@ argument_groups:
|
||||
- name: "Input arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input_r1"
|
||||
description: "Directory to write R1 fastq data to"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input_r2"
|
||||
description: "Directory to write R2 fastq data to"
|
||||
name: "--input"
|
||||
description: "Directory to write fastq data to"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
@@ -47,6 +37,9 @@ resources:
|
||||
description: "Publish the fastq files per well"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -142,8 +135,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/io/publish_fastqs"
|
||||
executable: "target/executable/io/publish_fastqs/publish_fastqs"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -153,7 +146,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# publish_fastqs save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -169,26 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "publish_fastqs save-params"
|
||||
echo ""
|
||||
echo "Publish the fastq files per well"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input_r1"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Directory to write R1 fastq data to"
|
||||
echo ""
|
||||
echo " --input_r2"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Directory to write R2 fastq data to"
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: \$id"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -470,9 +450,9 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
LABEL org.opencontainers.image.description="Companion container for running component io publish_fastqs"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:01Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -587,6 +567,48 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "publish_fastqs save-params"
|
||||
echo ""
|
||||
echo "Publish the fastq files per well"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Directory to write fastq data to"
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: \$id"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
@@ -612,37 +634,20 @@ while [[ $# -gt 0 ]]; do
|
||||
echo "publish_fastqs save-params"
|
||||
exit
|
||||
;;
|
||||
--input_r1)
|
||||
if [ -z "$VIASH_PAR_INPUT_R1" ]; then
|
||||
VIASH_PAR_INPUT_R1="$2"
|
||||
--input)
|
||||
if [ -z "$VIASH_PAR_INPUT" ]; then
|
||||
VIASH_PAR_INPUT="$2"
|
||||
else
|
||||
VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;""$2"
|
||||
VIASH_PAR_INPUT="$VIASH_PAR_INPUT;""$2"
|
||||
fi
|
||||
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r1. Use "--help" to get more information on the parameters. && exit 1
|
||||
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1
|
||||
shift 2
|
||||
;;
|
||||
--input_r1=*)
|
||||
if [ -z "$VIASH_PAR_INPUT_R1" ]; then
|
||||
VIASH_PAR_INPUT_R1=$(ViashRemoveFlags "$1")
|
||||
--input=*)
|
||||
if [ -z "$VIASH_PAR_INPUT" ]; then
|
||||
VIASH_PAR_INPUT=$(ViashRemoveFlags "$1")
|
||||
else
|
||||
VIASH_PAR_INPUT_R1="$VIASH_PAR_INPUT_R1;"$(ViashRemoveFlags "$1")
|
||||
fi
|
||||
shift 1
|
||||
;;
|
||||
--input_r2)
|
||||
if [ -z "$VIASH_PAR_INPUT_R2" ]; then
|
||||
VIASH_PAR_INPUT_R2="$2"
|
||||
else
|
||||
VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;""$2"
|
||||
fi
|
||||
[ $# -lt 2 ] && ViashError Not enough arguments passed to --input_r2. Use "--help" to get more information on the parameters. && exit 1
|
||||
shift 2
|
||||
;;
|
||||
--input_r2=*)
|
||||
if [ -z "$VIASH_PAR_INPUT_R2" ]; then
|
||||
VIASH_PAR_INPUT_R2=$(ViashRemoveFlags "$1")
|
||||
else
|
||||
VIASH_PAR_INPUT_R2="$VIASH_PAR_INPUT_R2;"$(ViashRemoveFlags "$1")
|
||||
VIASH_PAR_INPUT="$VIASH_PAR_INPUT;"$(ViashRemoveFlags "$1")
|
||||
fi
|
||||
shift 1
|
||||
;;
|
||||
@@ -829,12 +834,8 @@ fi
|
||||
|
||||
|
||||
# check whether required parameters exist
|
||||
if [ -z ${VIASH_PAR_INPUT_R1+x} ]; then
|
||||
ViashError '--input_r1' is a required argument. Use "--help" to get more information on the parameters.
|
||||
exit 1
|
||||
fi
|
||||
if [ -z ${VIASH_PAR_INPUT_R2+x} ]; then
|
||||
ViashError '--input_r2' is a required argument. Use "--help" to get more information on the parameters.
|
||||
if [ -z ${VIASH_PAR_INPUT+x} ]; then
|
||||
ViashError '--input' is a required argument. Use "--help" to get more information on the parameters.
|
||||
exit 1
|
||||
fi
|
||||
if [ -z ${VIASH_META_NAME+x} ]; then
|
||||
@@ -868,22 +869,10 @@ if [ -z ${VIASH_PAR_OUTPUT+x} ]; then
|
||||
fi
|
||||
|
||||
# check whether required files exist
|
||||
if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then
|
||||
if [ ! -z "$VIASH_PAR_INPUT" ]; then
|
||||
IFS=';'
|
||||
set -f
|
||||
for file in $VIASH_PAR_INPUT_R1; do
|
||||
unset IFS
|
||||
if [ ! -e "$file" ]; then
|
||||
ViashError "Input file '$file' does not exist."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
set +f
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then
|
||||
IFS=';'
|
||||
set -f
|
||||
for file in $VIASH_PAR_INPUT_R2; do
|
||||
for file in $VIASH_PAR_INPUT; do
|
||||
unset IFS
|
||||
if [ ! -e "$file" ]; then
|
||||
ViashError "Input file '$file' does not exist."
|
||||
@@ -984,27 +973,16 @@ fi
|
||||
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
||||
# detect volumes from file arguments
|
||||
VIASH_CHOWN_VARS=()
|
||||
if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then
|
||||
VIASH_TEST_INPUT_R1=()
|
||||
if [ ! -z "$VIASH_PAR_INPUT" ]; then
|
||||
VIASH_TEST_INPUT=()
|
||||
IFS=';'
|
||||
for var in $VIASH_PAR_INPUT_R1; do
|
||||
for var in $VIASH_PAR_INPUT; do
|
||||
unset IFS
|
||||
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
|
||||
var=$(ViashDockerAutodetectMount "$var")
|
||||
VIASH_TEST_INPUT_R1+=( "$var" )
|
||||
VIASH_TEST_INPUT+=( "$var" )
|
||||
done
|
||||
VIASH_PAR_INPUT_R1=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R1[*]}")
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then
|
||||
VIASH_TEST_INPUT_R2=()
|
||||
IFS=';'
|
||||
for var in $VIASH_PAR_INPUT_R2; do
|
||||
unset IFS
|
||||
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
|
||||
var=$(ViashDockerAutodetectMount "$var")
|
||||
VIASH_TEST_INPUT_R2+=( "$var" )
|
||||
done
|
||||
VIASH_PAR_INPUT_R2=$(IFS=';' ; echo "${VIASH_TEST_INPUT_R2[*]}")
|
||||
VIASH_PAR_INPUT=$(IFS=';' ; echo "${VIASH_TEST_INPUT[*]}")
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
|
||||
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" )
|
||||
@@ -1080,8 +1058,7 @@ trap interrupt INT SIGINT
|
||||
cat > "\$tempscript" << 'VIASHMAIN'
|
||||
## VIASH START
|
||||
# The following code has been auto-generated by Viash.
|
||||
$( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "${VIASH_PAR_INPUT_R1}" | sed "s#'#'\"'\"'#g;s#.*#par_input_r1='&'#" ; else echo "# par_input_r1="; fi )
|
||||
$( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "${VIASH_PAR_INPUT_R2}" | sed "s#'#'\"'\"'#g;s#.*#par_input_r2='&'#" ; else echo "# par_input_r2="; fi )
|
||||
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
||||
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\"'\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
|
||||
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
||||
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\"'\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
||||
@@ -1113,14 +1090,9 @@ mkdir -p "\$par_output" && echo "\$par_output created"
|
||||
|
||||
echo
|
||||
echo "Copying files..."
|
||||
IFS=";" read -ra input_r1 <<<\$par_input_r1
|
||||
IFS=";" read -ra input_r2 <<<\$par_input_r2
|
||||
IFS=";" read -ra input <<<\$par_input
|
||||
|
||||
for i in "\${input_r1[@]}"; do
|
||||
cp -rL "\$i" "\$par_output/"
|
||||
done
|
||||
|
||||
for i in "\${input_r2[@]}"; do
|
||||
for i in "\${input[@]}"; do
|
||||
cp -rL "\$i" "\$par_output/"
|
||||
done
|
||||
VIASHMAIN
|
||||
@@ -1133,31 +1105,18 @@ VIASHEOF
|
||||
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
||||
# strip viash automount from file paths
|
||||
|
||||
if [ ! -z "$VIASH_PAR_INPUT_R1" ]; then
|
||||
unset VIASH_TEST_INPUT_R1
|
||||
if [ ! -z "$VIASH_PAR_INPUT" ]; then
|
||||
unset VIASH_TEST_INPUT
|
||||
IFS=';'
|
||||
for var in $VIASH_PAR_INPUT_R1; do
|
||||
for var in $VIASH_PAR_INPUT; do
|
||||
unset IFS
|
||||
if [ -z "$VIASH_TEST_INPUT_R1" ]; then
|
||||
VIASH_TEST_INPUT_R1="$(ViashDockerStripAutomount "$var")"
|
||||
if [ -z "$VIASH_TEST_INPUT" ]; then
|
||||
VIASH_TEST_INPUT="$(ViashDockerStripAutomount "$var")"
|
||||
else
|
||||
VIASH_TEST_INPUT_R1="$VIASH_TEST_INPUT_R1;""$(ViashDockerStripAutomount "$var")"
|
||||
VIASH_TEST_INPUT="$VIASH_TEST_INPUT;""$(ViashDockerStripAutomount "$var")"
|
||||
fi
|
||||
done
|
||||
VIASH_PAR_INPUT_R1="$VIASH_TEST_INPUT_R1"
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_INPUT_R2" ]; then
|
||||
unset VIASH_TEST_INPUT_R2
|
||||
IFS=';'
|
||||
for var in $VIASH_PAR_INPUT_R2; do
|
||||
unset IFS
|
||||
if [ -z "$VIASH_TEST_INPUT_R2" ]; then
|
||||
VIASH_TEST_INPUT_R2="$(ViashDockerStripAutomount "$var")"
|
||||
else
|
||||
VIASH_TEST_INPUT_R2="$VIASH_TEST_INPUT_R2;""$(ViashDockerStripAutomount "$var")"
|
||||
fi
|
||||
done
|
||||
VIASH_PAR_INPUT_R2="$VIASH_TEST_INPUT_R2"
|
||||
VIASH_PAR_INPUT="$VIASH_TEST_INPUT"
|
||||
fi
|
||||
if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
|
||||
VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT")
|
||||
|
||||
@@ -91,6 +91,9 @@ resources:
|
||||
description: "Publish the results"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -186,8 +189,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/io/publish_results"
|
||||
executable: "target/executable/io/publish_results/publish_results"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -197,7 +200,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# publish_results save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -169,40 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "publish_results save-params"
|
||||
echo ""
|
||||
echo "Publish the results"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --star_output"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Output from mapping with STAR"
|
||||
echo ""
|
||||
echo " --nrReadsNrGenesPerChrom"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --star_qc_metrics"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --eset"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --f_data"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --p_data"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --html_report"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: \$id"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -484,9 +450,9 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
LABEL org.opencontainers.image.description="Companion container for running component io publish_results"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:57Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:02Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -601,6 +567,66 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "publish_results save-params"
|
||||
echo ""
|
||||
echo "Publish the results"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --star_output"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Output from mapping with STAR"
|
||||
echo ""
|
||||
echo " --nrReadsNrGenesPerChrom"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --star_qc_metrics"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --eset"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --f_data"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --p_data"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --html_report"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: \$id"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -164,6 +164,9 @@ test_resources:
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -278,8 +281,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/parallel_map"
|
||||
executable: "target/executable/parallel_map/parallel_map"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -289,7 +292,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# parallel_map save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -173,85 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "parallel_map save-params"
|
||||
echo ""
|
||||
echo "Map wells in batch, using STAR"
|
||||
echo "Spliced Transcripts Alignment to a Reference (C) Alexander Dobin"
|
||||
echo "https://github.com/alexdobin/STAR"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input_r1"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Input FASTQ files for the forward reads. All FASTQ file names must start"
|
||||
echo " with the prefix '{well_id}_R1', where"
|
||||
echo " 'well_id' can be found as the sequence identifier in the barcodes FASTA"
|
||||
echo " file (see 'barcodesFasta' argument)."
|
||||
echo " For each FASTQ file, a matching FASTQ file for the reverse reads must be"
|
||||
echo " provided to the 'input_r2' argument,"
|
||||
echo " meaning that their 'well_id' prefix must match. The number of items"
|
||||
echo " provided for 'input_r1' must be equal"
|
||||
echo " to the number of items for 'input_r2'."
|
||||
echo ""
|
||||
echo " --input_r2"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Input FASTQ files for the reverse reads. All FASTQ file names must start"
|
||||
echo " with the prefix '{well_id}_R2', where"
|
||||
echo " 'well_id' can be found as the sequence identifier in the barcodes FASTA"
|
||||
echo " file (see 'barcodesFasta' argument)."
|
||||
echo " For each FASTQ file, a matching FASTQ file for the reverse reads must be"
|
||||
echo " provided to the 'input_r1' argument,"
|
||||
echo " meaning that their 'well_id' prefix must match. The number of items"
|
||||
echo " provided for 'input_r1' must be equal"
|
||||
echo " to the number of items for 'input_r2'."
|
||||
echo ""
|
||||
echo " --genomeDir"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Reference genome to match to. Can be generated from genomic FASTA"
|
||||
echo " sequences and a genome annotation"
|
||||
echo " by using STAR with '--runMode genomeGenerate'."
|
||||
echo ""
|
||||
echo " --barcodesFasta"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " FASTA file where each entry specifies a unique barcode sequence present"
|
||||
echo " at the start of the forward input reads"
|
||||
echo " (input_r1). The IDs of each barcode (the start of the FASTA headers up"
|
||||
echo " until the first whitespace character) must"
|
||||
echo " match with the start of one input FASTQ pair."
|
||||
echo ""
|
||||
echo "Barcode arguments:"
|
||||
echo " --umiLength"
|
||||
echo " type: integer, required parameter"
|
||||
echo " Length of the Unique Molecular Identifiers (UMI). The UMI are expected"
|
||||
echo " to be located after the barcodes in the"
|
||||
echo " forwards reads."
|
||||
echo ""
|
||||
echo " --limitBAMsortRAM"
|
||||
echo " type: string"
|
||||
echo " default: 10000000000"
|
||||
echo ""
|
||||
echo "Runtime arguments:"
|
||||
echo " --runThreadN"
|
||||
echo " type: integer"
|
||||
echo " default: 1"
|
||||
echo " Number of threads to use for a single STAR execution."
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, required parameter, multiple values allowed, output, file"
|
||||
echo "must exist"
|
||||
echo " default: ./*"
|
||||
echo " A list of output folders which are the result of using STAR to map each"
|
||||
echo " input FASTQ pair STAR to the reference genome."
|
||||
echo " The order of the items DO NOT match with the order of the entries in the"
|
||||
echo " barcodes FASTA file or the input FASTQ pairs."
|
||||
echo ""
|
||||
echo " --joblog"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: execution_log.txt"
|
||||
echo " Where to store the log file listing all the jobs."
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -540,9 +461,9 @@ ENV STAR_BINARY=STAR
|
||||
COPY STAR /usr/local/bin/$STAR_BINARY
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Toni Verbeiren"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component parallel_map"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:58Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -657,6 +578,111 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "parallel_map save-params"
|
||||
echo ""
|
||||
echo "Map wells in batch, using STAR"
|
||||
echo "Spliced Transcripts Alignment to a Reference (C) Alexander Dobin"
|
||||
echo "https://github.com/alexdobin/STAR"
|
||||
echo ""
|
||||
echo "Input arguments:"
|
||||
echo " --input_r1"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Input FASTQ files for the forward reads. All FASTQ file names must start"
|
||||
echo " with the prefix '{well_id}_R1', where"
|
||||
echo " 'well_id' can be found as the sequence identifier in the barcodes FASTA"
|
||||
echo " file (see 'barcodesFasta' argument)."
|
||||
echo " For each FASTQ file, a matching FASTQ file for the reverse reads must be"
|
||||
echo " provided to the 'input_r2' argument,"
|
||||
echo " meaning that their 'well_id' prefix must match. The number of items"
|
||||
echo " provided for 'input_r1' must be equal"
|
||||
echo " to the number of items for 'input_r2'."
|
||||
echo ""
|
||||
echo " --input_r2"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Input FASTQ files for the reverse reads. All FASTQ file names must start"
|
||||
echo " with the prefix '{well_id}_R2', where"
|
||||
echo " 'well_id' can be found as the sequence identifier in the barcodes FASTA"
|
||||
echo " file (see 'barcodesFasta' argument)."
|
||||
echo " For each FASTQ file, a matching FASTQ file for the reverse reads must be"
|
||||
echo " provided to the 'input_r1' argument,"
|
||||
echo " meaning that their 'well_id' prefix must match. The number of items"
|
||||
echo " provided for 'input_r1' must be equal"
|
||||
echo " to the number of items for 'input_r2'."
|
||||
echo ""
|
||||
echo " --genomeDir"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " Reference genome to match to. Can be generated from genomic FASTA"
|
||||
echo " sequences and a genome annotation"
|
||||
echo " by using STAR with '--runMode genomeGenerate'."
|
||||
echo ""
|
||||
echo " --barcodesFasta"
|
||||
echo " type: file, required parameter, file must exist"
|
||||
echo " FASTA file where each entry specifies a unique barcode sequence present"
|
||||
echo " at the start of the forward input reads"
|
||||
echo " (input_r1). The IDs of each barcode (the start of the FASTA headers up"
|
||||
echo " until the first whitespace character) must"
|
||||
echo " match with the start of one input FASTQ pair."
|
||||
echo ""
|
||||
echo "Barcode arguments:"
|
||||
echo " --umiLength"
|
||||
echo " type: integer, required parameter"
|
||||
echo " Length of the Unique Molecular Identifiers (UMI). The UMI are expected"
|
||||
echo " to be located after the barcodes in the"
|
||||
echo " forwards reads."
|
||||
echo ""
|
||||
echo " --limitBAMsortRAM"
|
||||
echo " type: string"
|
||||
echo " default: 10000000000"
|
||||
echo ""
|
||||
echo "Runtime arguments:"
|
||||
echo " --runThreadN"
|
||||
echo " type: integer"
|
||||
echo " default: 1"
|
||||
echo " Number of threads to use for a single STAR execution."
|
||||
echo ""
|
||||
echo "Output arguments:"
|
||||
echo " --output"
|
||||
echo " type: file, required parameter, multiple values allowed, output, file"
|
||||
echo "must exist"
|
||||
echo " default: ./*"
|
||||
echo " A list of output folders which are the result of using STAR to map each"
|
||||
echo " input FASTQ pair STAR to the reference genome."
|
||||
echo " The order of the items DO NOT match with the order of the entries in the"
|
||||
echo " barcodes FASTA file or the input FASTQ pairs."
|
||||
echo ""
|
||||
echo " --joblog"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: execution_log.txt"
|
||||
echo " Where to store the log file listing all the jobs."
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
@@ -1421,7 +1447,8 @@ for barcode_index in "\${!barcodes[@]}"; do
|
||||
fi
|
||||
done
|
||||
echo "Did not find FASTQ files files for well \${well_id}! "\\
|
||||
"Make sure that the input files have the correct file name format."
|
||||
"Make sure that the input files have the correct file name format."\\
|
||||
"Input files: \${input_r1[@]}"
|
||||
exit 1
|
||||
done
|
||||
|
||||
|
||||
@@ -75,6 +75,9 @@ test_resources:
|
||||
path: "test_data"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -184,12 +187,14 @@ engines:
|
||||
- "install.packages(\"oaStyle\", repos = c(rdepot = \"https://repos.openanalytics.eu/repo/public\"\
|
||||
, getOption(\"repos\")))"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
test_setup:
|
||||
- type: "r"
|
||||
packages:
|
||||
- "testthat"
|
||||
- "R.utils"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
@@ -200,8 +205,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/report/create_report"
|
||||
executable: "target/executable/report/create_report/create_report"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -211,7 +216,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# create_report save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -173,20 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "create_report save-params"
|
||||
echo ""
|
||||
echo "Create a basic QC report in HTML format based on a number of esets."
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --eset"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --output_report"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo " example: report.html"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -467,18 +453,18 @@ RUN apt-get update && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y procps pandoc && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN Rscript -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
|
||||
Rscript -e 'if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
|
||||
Rscript -e 'if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \
|
||||
Rscript -e 'if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) BiocManager::install("ComplexHeatmap")' && \
|
||||
Rscript -e 'remotes::install_cran(c("ggplot2", "knitr", "gridExtra", "RColorBrewer", "processx", "whisker", "rmarkdown", "bookdown", "data.table", "platetools", "htmltools", "DT", "logger", "bit64"), repos = "https://cran.rstudio.com")' && \
|
||||
Rscript -e 'install.packages("oaStyle", repos = c(rdepot = "https://repos.openanalytics.eu/repo/public", getOption("repos")))'
|
||||
RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \
|
||||
Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")' && \
|
||||
Rscript -e 'options(warn = 2); if (!requireNamespace("Biobase", quietly = TRUE)) BiocManager::install("Biobase")' && \
|
||||
Rscript -e 'options(warn = 2); if (!requireNamespace("ComplexHeatmap", quietly = TRUE)) BiocManager::install("ComplexHeatmap")' && \
|
||||
Rscript -e 'options(warn = 2); remotes::install_cran(c("ggplot2", "knitr", "gridExtra", "RColorBrewer", "processx", "whisker", "rmarkdown", "bookdown", "data.table", "platetools", "htmltools", "DT", "logger", "bit64"), repos = "https://cran.rstudio.com")' && \
|
||||
Rscript -e 'options(warn = 2); install.packages("oaStyle", repos = c(rdepot = "https://repos.openanalytics.eu/repo/public", getOption("repos")))'
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component report create_report"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:58Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -593,6 +579,46 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "create_report save-params"
|
||||
echo ""
|
||||
echo "Create a basic QC report in HTML format based on a number of esets."
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --eset"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo ""
|
||||
echo " --output_report"
|
||||
echo " type: file, required parameter, output, file must exist"
|
||||
echo " example: report.html"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -91,6 +91,9 @@ test_resources:
|
||||
path: "test_data"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -197,8 +200,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/stats/combine_star_logs"
|
||||
executable: "target/executable/stats/combine_star_logs/combine_star_logs"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -208,7 +211,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# combine_star_logs save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -172,39 +172,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "combine_star_logs save-params"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --barcodes"
|
||||
echo " type: string, required parameter, multiple values allowed"
|
||||
echo " Barcodes responding to the respective log files."
|
||||
echo ""
|
||||
echo " --star_logs"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " example: Log.final.out"
|
||||
echo " Paths to the STAR log files (most frequently called Log.final.out)"
|
||||
echo ""
|
||||
echo " --gene_summary_logs"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " example: Summary.txt"
|
||||
echo " Paths to the Summary.csv files from the STAR Solo output. Can be found"
|
||||
echo " in"
|
||||
echo " the 'Solo.out/Gene' folder relative to the root of the STAR output"
|
||||
echo " directory."
|
||||
echo ""
|
||||
echo " --reads_per_gene_logs"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Paths to the 'ReadsPerGene.out.tab' files as output by STAR."
|
||||
echo ""
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: starLogs.txt"
|
||||
echo " Tab-delimited file describing for each barcode (as the rows), the"
|
||||
echo " metrics (as columns)"
|
||||
echo " gathered from the different input files."
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -490,9 +457,9 @@ RUN pip install --upgrade pip && \
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component stats combine_star_logs"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:01Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -607,6 +574,65 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "combine_star_logs save-params"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --barcodes"
|
||||
echo " type: string, required parameter, multiple values allowed"
|
||||
echo " Barcodes responding to the respective log files."
|
||||
echo ""
|
||||
echo " --star_logs"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " example: Log.final.out"
|
||||
echo " Paths to the STAR log files (most frequently called Log.final.out)"
|
||||
echo ""
|
||||
echo " --gene_summary_logs"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " example: Summary.txt"
|
||||
echo " Paths to the Summary.csv files from the STAR Solo output. Can be found"
|
||||
echo " in"
|
||||
echo " the 'Solo.out/Gene' folder relative to the root of the STAR output"
|
||||
echo " directory."
|
||||
echo ""
|
||||
echo " --reads_per_gene_logs"
|
||||
echo " type: file, required parameter, multiple values allowed, file must exist"
|
||||
echo " Paths to the 'ReadsPerGene.out.tab' files as output by STAR."
|
||||
echo ""
|
||||
echo " --output"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: starLogs.txt"
|
||||
echo " Tab-delimited file describing for each barcode (as the rows), the"
|
||||
echo " metrics (as columns)"
|
||||
echo " gathered from the different input files."
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -75,6 +75,9 @@ test_resources:
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -181,8 +184,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/stats/generate_pool_statistics"
|
||||
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -192,7 +195,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# generate_pool_statistics save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -173,31 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "generate_pool_statistics save-params"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --nrReadsNrGenesPerChrom"
|
||||
echo " type: file, multiple values allowed, file must exist"
|
||||
echo " default: processedBamFile_well1.tsv;processedBamfile_well2.tsv"
|
||||
echo " Path to an output file that contains a .tsv formatted table describing"
|
||||
echo " per chromosome the number of reads that were mapped to that chromosome"
|
||||
echo " (NumberOfReads"
|
||||
echo " column) and the number of genes on that chromosome that had at least one"
|
||||
echo " read mapped to it (NumberOfGenes)."
|
||||
echo ""
|
||||
echo " --nrReadsNrGenesPerChromPool"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " example: nrReadsNrGenesPerChrom.txt"
|
||||
echo " Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom"
|
||||
echo " files. Describes"
|
||||
echo " per chromosome (as columns) the number of reads, as well as the total"
|
||||
echo " number"
|
||||
echo " of reads per cell barcode and the percentage of nuclear, ERCC and"
|
||||
echo " mitochondrial"
|
||||
echo " reads."
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -483,9 +458,9 @@ RUN pip install --upgrade pip && \
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component stats generate_pool_statistics"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:02Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -600,6 +575,57 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "generate_pool_statistics save-params"
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --nrReadsNrGenesPerChrom"
|
||||
echo " type: file, multiple values allowed, file must exist"
|
||||
echo " default: processedBamFile_well1.tsv;processedBamfile_well2.tsv"
|
||||
echo " Path to an output file that contains a .tsv formatted table describing"
|
||||
echo " per chromosome the number of reads that were mapped to that chromosome"
|
||||
echo " (NumberOfReads"
|
||||
echo " column) and the number of genes on that chromosome that had at least one"
|
||||
echo " read mapped to it (NumberOfGenes)."
|
||||
echo ""
|
||||
echo " --nrReadsNrGenesPerChromPool"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " example: nrReadsNrGenesPerChrom.txt"
|
||||
echo " Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom"
|
||||
echo " files. Describes"
|
||||
echo " per chromosome (as columns) the number of reads, as well as the total"
|
||||
echo " number"
|
||||
echo " of reads per cell barcode and the percentage of nuclear, ERCC and"
|
||||
echo " mitochondrial"
|
||||
echo " reads."
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -146,6 +146,9 @@ test_resources:
|
||||
path: "empty.sam"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -263,8 +266,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/stats/generate_well_statistics"
|
||||
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -274,7 +277,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# generate_well_statistics save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -173,65 +173,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "generate_well_statistics save-params"
|
||||
echo ""
|
||||
echo "Generate summary statistics from BAM files generated by STAR solo."
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, file must exist"
|
||||
echo " example: input.bam"
|
||||
echo " The .bam file as returned by the mapping tool STAR."
|
||||
echo ""
|
||||
echo " --barcode"
|
||||
echo " type: string, required parameter"
|
||||
echo " The barcode for the well that is being processed. Is only used to add a"
|
||||
echo " metadata"
|
||||
echo " column to all output files."
|
||||
echo ""
|
||||
echo " --well_id"
|
||||
echo " type: string, required parameter"
|
||||
echo " ID of this well. Only used to add a metadata column to the output files."
|
||||
echo ""
|
||||
echo " --processedBAMFile"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: processedBamFile.txt"
|
||||
echo " Path to a .tsv file listing, per read in the BAM file,"
|
||||
echo " the value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the"
|
||||
echo " chromsome to which the read was mapped to."
|
||||
echo ""
|
||||
echo " --nrReadsNrGenesPerChrom"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: nrReadsNrGenesPerChrom.txt"
|
||||
echo " Path to an output file that contains a .tsv formatted table describing"
|
||||
echo " per chromosome the number of reads that were mapped to that chromosome"
|
||||
echo " (NumberOfReads"
|
||||
echo " column) and the number of genes on that chromosome that had at least one"
|
||||
echo " read mapped to it (NumberOfGenes)."
|
||||
echo ""
|
||||
echo " --nrReadsNrUMIsPerCB"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: nrReadsNrUMIsPerCB.txt"
|
||||
echo " Path to an output file that contains a .tsv formatted table describing"
|
||||
echo " per barcode the number of UMI's (nrUMIs) and the total number of reads"
|
||||
echo " (NumberOfReads)."
|
||||
echo ""
|
||||
echo " --umiFreqTop"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: umiFreqTop100.txt"
|
||||
echo " Path to an output file that contains a .tsv formatted table describing"
|
||||
echo " per UMI (column UB) the frequency at which they occur in the reads"
|
||||
echo " (column"
|
||||
echo " N). Only the top 100 UMIs are included."
|
||||
echo ""
|
||||
echo " --threads"
|
||||
echo " type: integer"
|
||||
echo " default: 1"
|
||||
echo " min: 1"
|
||||
echo " Number of threads to use for decompressing BAM files."
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -520,9 +461,9 @@ RUN pip install --upgrade pip && \
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component stats generate_well_statistics"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:56Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:01Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -637,6 +578,91 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "generate_well_statistics save-params"
|
||||
echo ""
|
||||
echo "Generate summary statistics from BAM files generated by STAR solo."
|
||||
echo ""
|
||||
echo "Arguments:"
|
||||
echo " --input"
|
||||
echo " type: file, file must exist"
|
||||
echo " example: input.bam"
|
||||
echo " The .bam file as returned by the mapping tool STAR."
|
||||
echo ""
|
||||
echo " --barcode"
|
||||
echo " type: string, required parameter"
|
||||
echo " The barcode for the well that is being processed. Is only used to add a"
|
||||
echo " metadata"
|
||||
echo " column to all output files."
|
||||
echo ""
|
||||
echo " --well_id"
|
||||
echo " type: string, required parameter"
|
||||
echo " ID of this well. Only used to add a metadata column to the output files."
|
||||
echo ""
|
||||
echo " --processedBAMFile"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: processedBamFile.txt"
|
||||
echo " Path to a .tsv file listing, per read in the BAM file,"
|
||||
echo " the value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the"
|
||||
echo " chromsome to which the read was mapped to."
|
||||
echo ""
|
||||
echo " --nrReadsNrGenesPerChrom"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: nrReadsNrGenesPerChrom.txt"
|
||||
echo " Path to an output file that contains a .tsv formatted table describing"
|
||||
echo " per chromosome the number of reads that were mapped to that chromosome"
|
||||
echo " (NumberOfReads"
|
||||
echo " column) and the number of genes on that chromosome that had at least one"
|
||||
echo " read mapped to it (NumberOfGenes)."
|
||||
echo ""
|
||||
echo " --nrReadsNrUMIsPerCB"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: nrReadsNrUMIsPerCB.txt"
|
||||
echo " Path to an output file that contains a .tsv formatted table describing"
|
||||
echo " per barcode the number of UMI's (nrUMIs) and the total number of reads"
|
||||
echo " (NumberOfReads)."
|
||||
echo ""
|
||||
echo " --umiFreqTop"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: umiFreqTop100.txt"
|
||||
echo " Path to an output file that contains a .tsv formatted table describing"
|
||||
echo " per UMI (column UB) the frequency at which they occur in the reads"
|
||||
echo " (column"
|
||||
echo " N). Only the top 100 UMIs are included."
|
||||
echo ""
|
||||
echo " --threads"
|
||||
echo " type: integer"
|
||||
echo " default: 1"
|
||||
echo " min: 1"
|
||||
echo " Number of threads to use for decompressing BAM files."
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
|
||||
@@ -13,12 +13,12 @@ argument_groups:
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--params"
|
||||
description: "The state to save\n"
|
||||
name: "--params_yaml"
|
||||
description: "base64 encoded yaml file containing the state\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
arguments:
|
||||
@@ -40,6 +40,9 @@ resources:
|
||||
description: "Save parameters to a file\n"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -140,8 +143,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/executable/utils/save_params"
|
||||
executable: "target/executable/utils/save_params/save_params"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -151,7 +154,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# save_params save-params
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -169,26 +169,6 @@ VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
||||
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "save_params save-params"
|
||||
echo ""
|
||||
echo "Save parameters to a file"
|
||||
echo ""
|
||||
echo "Inputs:"
|
||||
echo " --id"
|
||||
echo " type: string, required parameter"
|
||||
echo " The id of the job"
|
||||
echo ""
|
||||
echo " --params"
|
||||
echo " type: string, required parameter, multiple values allowed"
|
||||
echo " The state to save"
|
||||
echo ""
|
||||
echo "Outputs:"
|
||||
echo " --output"
|
||||
echo " type: string, required parameter"
|
||||
echo " The output file"
|
||||
}
|
||||
|
||||
# initialise variables
|
||||
VIASH_MODE='run'
|
||||
@@ -473,9 +453,9 @@ RUN pip install --upgrade pip && \
|
||||
pip install --upgrade --no-cache-dir "pyyaml"
|
||||
|
||||
LABEL org.opencontainers.image.description="Companion container for running component utils save_params"
|
||||
LABEL org.opencontainers.image.created="2025-03-25T15:33:58Z"
|
||||
LABEL org.opencontainers.image.created="2025-04-01T15:16:03Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="1561d769c65192a820053a565654dee8cbe55588"
|
||||
LABEL org.opencontainers.image.revision="05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
LABEL org.opencontainers.image.version="save-params"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -590,6 +570,52 @@ fi
|
||||
# initialise docker variables
|
||||
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
||||
|
||||
|
||||
# ViashHelp: Display helpful explanation about this executable
|
||||
function ViashHelp {
|
||||
echo "save_params save-params"
|
||||
echo ""
|
||||
echo "Save parameters to a file"
|
||||
echo ""
|
||||
echo "Inputs:"
|
||||
echo " --id"
|
||||
echo " type: string, required parameter"
|
||||
echo " The id of the job"
|
||||
echo ""
|
||||
echo " --params_yaml"
|
||||
echo " type: string, required parameter"
|
||||
echo " base64 encoded yaml file containing the state"
|
||||
echo ""
|
||||
echo "Outputs:"
|
||||
echo " --output"
|
||||
echo " type: string, required parameter"
|
||||
echo " The output file"
|
||||
echo ""
|
||||
echo "Viash built in Computational Requirements:"
|
||||
echo " ---cpus=INT"
|
||||
echo " Number of CPUs to use"
|
||||
echo " ---memory=STRING"
|
||||
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
||||
echo ""
|
||||
echo "Viash built in Docker:"
|
||||
echo " ---setup=STRATEGY"
|
||||
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
||||
echo " Default: ifneedbepullelsecachedbuild"
|
||||
echo " ---dockerfile"
|
||||
echo " Print the dockerfile to stdout."
|
||||
echo " ---docker_run_args=ARG"
|
||||
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
||||
echo " ---docker_image_id"
|
||||
echo " Print the docker image id to stdout."
|
||||
echo " ---debug"
|
||||
echo " Enter the docker container for debugging purposes."
|
||||
echo ""
|
||||
echo "Viash built in Engines:"
|
||||
echo " ---engine=ENGINE_ID"
|
||||
echo " Specify the engine to use. Options are: docker, native."
|
||||
echo " Default: docker"
|
||||
}
|
||||
|
||||
# initialise array
|
||||
VIASH_POSITIONAL_ARGS=''
|
||||
|
||||
@@ -626,21 +652,15 @@ while [[ $# -gt 0 ]]; do
|
||||
VIASH_PAR_ID=$(ViashRemoveFlags "$1")
|
||||
shift 1
|
||||
;;
|
||||
--params)
|
||||
if [ -z "$VIASH_PAR_PARAMS" ]; then
|
||||
VIASH_PAR_PARAMS="$2"
|
||||
else
|
||||
VIASH_PAR_PARAMS="$VIASH_PAR_PARAMS;""$2"
|
||||
fi
|
||||
[ $# -lt 2 ] && ViashError Not enough arguments passed to --params. Use "--help" to get more information on the parameters. && exit 1
|
||||
--params_yaml)
|
||||
[ -n "$VIASH_PAR_PARAMS_YAML" ] && ViashError Bad arguments for option \'--params_yaml\': \'$VIASH_PAR_PARAMS_YAML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
||||
VIASH_PAR_PARAMS_YAML="$2"
|
||||
[ $# -lt 2 ] && ViashError Not enough arguments passed to --params_yaml. Use "--help" to get more information on the parameters. && exit 1
|
||||
shift 2
|
||||
;;
|
||||
--params=*)
|
||||
if [ -z "$VIASH_PAR_PARAMS" ]; then
|
||||
VIASH_PAR_PARAMS=$(ViashRemoveFlags "$1")
|
||||
else
|
||||
VIASH_PAR_PARAMS="$VIASH_PAR_PARAMS;"$(ViashRemoveFlags "$1")
|
||||
fi
|
||||
--params_yaml=*)
|
||||
[ -n "$VIASH_PAR_PARAMS_YAML" ] && ViashError Bad arguments for option \'--params_yaml=*\': \'$VIASH_PAR_PARAMS_YAML\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
||||
VIASH_PAR_PARAMS_YAML=$(ViashRemoveFlags "$1")
|
||||
shift 1
|
||||
;;
|
||||
--output)
|
||||
@@ -830,8 +850,8 @@ if [ -z ${VIASH_PAR_ID+x} ]; then
|
||||
ViashError '--id' is a required argument. Use "--help" to get more information on the parameters.
|
||||
exit 1
|
||||
fi
|
||||
if [ -z ${VIASH_PAR_PARAMS+x} ]; then
|
||||
ViashError '--params' is a required argument. Use "--help" to get more information on the parameters.
|
||||
if [ -z ${VIASH_PAR_PARAMS_YAML+x} ]; then
|
||||
ViashError '--params_yaml' is a required argument. Use "--help" to get more information on the parameters.
|
||||
exit 1
|
||||
fi
|
||||
if [ -z ${VIASH_PAR_OUTPUT+x} ]; then
|
||||
@@ -1018,12 +1038,13 @@ trap interrupt INT SIGINT
|
||||
cat > "\$tempscript" << 'VIASHMAIN'
|
||||
import re
|
||||
import yaml
|
||||
import base64
|
||||
|
||||
## VIASH START
|
||||
# The following code has been auto-generated by Viash.
|
||||
par = {
|
||||
'id': $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "r'${VIASH_PAR_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
||||
'params': $( if [ ! -z ${VIASH_PAR_PARAMS+x} ]; then echo "r'${VIASH_PAR_PARAMS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ),
|
||||
'params_yaml': $( if [ ! -z ${VIASH_PAR_PARAMS_YAML+x} ]; then echo "r'${VIASH_PAR_PARAMS_YAML//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
||||
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi )
|
||||
}
|
||||
meta = {
|
||||
@@ -1056,27 +1077,41 @@ class Dumper(yaml.Dumper):
|
||||
def increase_indent(self, flow=False, indentless=False):
|
||||
return super(Dumper, self).increase_indent(flow, False)
|
||||
|
||||
params = {}
|
||||
|
||||
for param in par['params']:
|
||||
param = param.replace('\$id', par['id'])
|
||||
key, value = param.split('=')
|
||||
def decode_params_yaml(encoded_yaml):
|
||||
# Step 1: Decode from Base64
|
||||
yaml_bytes = base64.b64decode(encoded_yaml)
|
||||
|
||||
array_match = re.match(r'(.+)\\[(\\d+)\\]\$', key)
|
||||
if array_match:
|
||||
base_key = array_match.group(1)
|
||||
index = int(array_match.group(2))
|
||||
|
||||
if base_key not in params:
|
||||
params[base_key] = []
|
||||
|
||||
while len(params[base_key]) <= index:
|
||||
params[base_key].append(None)
|
||||
|
||||
params[base_key][index] = value
|
||||
else:
|
||||
params[key] = value
|
||||
|
||||
# Step 2: Convert bytes to string
|
||||
yaml_string = yaml_bytes.decode('utf-8')
|
||||
|
||||
# Step 3: Extract pattern for Java path objects
|
||||
# Find pattern: !!sun.nio.fs.UnixPath /path/to/file
|
||||
pattern = r'!!sun\\.nio\\.fs\\.UnixPath\\s+([^\\n]+)'
|
||||
|
||||
# Replace with the actual path string (captured group)
|
||||
yaml_string = re.sub(pattern, r'\\1', yaml_string)
|
||||
|
||||
# Handle any remaining empty UnixPath objects
|
||||
yaml_string = yaml_string.replace('!!sun.nio.fs.UnixPath {}', '""')
|
||||
|
||||
# Step 4: Parse YAML
|
||||
yaml_data = yaml.safe_load(yaml_string)
|
||||
|
||||
return yaml_data
|
||||
|
||||
def replace_id(value, sample_id):
|
||||
if isinstance(value, str):
|
||||
return value.replace('\$id', sample_id)
|
||||
elif isinstance(value, list):
|
||||
return [replace_id(item, sample_id) for item in value]
|
||||
return value
|
||||
|
||||
print(par['params_yaml'])
|
||||
|
||||
params = decode_params_yaml(par['params_yaml'])
|
||||
for key, value in params.items():
|
||||
params[key] = replace_id(value, par["id"])
|
||||
|
||||
with open(par["output"], 'w') as f:
|
||||
yaml.dump(params, f, default_flow_style=False, Dumper=Dumper)
|
||||
VIASHMAIN
|
||||
|
||||
@@ -94,6 +94,9 @@ test_resources:
|
||||
path: "mapping_dir"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -182,11 +185,13 @@ engines:
|
||||
bioc:
|
||||
- "Seurat"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
test_setup:
|
||||
- type: "r"
|
||||
cran:
|
||||
- "testthat"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
@@ -197,8 +202,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/eset/create_eset"
|
||||
executable: "target/nextflow/eset/create_eset/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -208,7 +213,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// create_eset save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1753,33 +1910,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2944,6 +3175,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3048,7 +3283,8 @@ meta = [
|
||||
"bioc" : [
|
||||
"Seurat"
|
||||
],
|
||||
"bioc_force_install" : false
|
||||
"bioc_force_install" : false,
|
||||
"warnings_as_errors" : true
|
||||
}
|
||||
],
|
||||
"test_setup" : [
|
||||
@@ -3057,7 +3293,8 @@ meta = [
|
||||
"cran" : [
|
||||
"testthat"
|
||||
],
|
||||
"bioc_force_install" : false
|
||||
"bioc_force_install" : false,
|
||||
"warnings_as_errors" : true
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -3071,8 +3308,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/eset/create_eset",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3087,7 +3324,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -70,6 +70,9 @@ test_resources:
|
||||
path: "test_annotation.gtf"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -176,8 +179,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/eset/create_fdata"
|
||||
executable: "target/nextflow/eset/create_fdata/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -187,7 +190,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// create_fdata save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1753,33 +1910,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2911,6 +3142,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3043,8 +3278,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/eset/create_fdata",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3059,7 +3294,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -84,6 +84,9 @@ test_resources:
|
||||
path: "starLogs.txt"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -190,8 +193,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/eset/create_pdata"
|
||||
executable: "target/nextflow/eset/create_pdata/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -201,7 +204,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// create_pdata save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1753,33 +1910,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2925,6 +3156,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3057,8 +3292,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/eset/create_pdata",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3073,7 +3308,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -50,6 +50,9 @@ resources:
|
||||
description: "This component test the ExpressionSet object as output by the main pipeline."
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -137,6 +140,7 @@ engines:
|
||||
bioc:
|
||||
- "Biobase"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
@@ -147,8 +151,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/integration_test_components/htrnaseq/check_eset"
|
||||
executable: "target/nextflow/integration_test_components/htrnaseq/check_eset/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -158,7 +162,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// check_eset save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1752,33 +1909,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2879,6 +3110,10 @@ meta = [
|
||||
],
|
||||
"description" : "This component test the ExpressionSet object as output by the main pipeline.",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -2982,7 +3217,8 @@ meta = [
|
||||
"bioc" : [
|
||||
"Biobase"
|
||||
],
|
||||
"bioc_force_install" : false
|
||||
"bioc_force_install" : false,
|
||||
"warnings_as_errors" : true
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -2996,8 +3232,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/integration_test_components/htrnaseq/check_eset",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3012,7 +3248,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -57,6 +57,9 @@ resources:
|
||||
description: "This component test the cutadapt output from the well_demultiplex subworkflow."
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -157,8 +160,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output"
|
||||
executable: "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -168,7 +171,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// check_cutadapt_output save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1752,33 +1909,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2886,6 +3117,10 @@ meta = [
|
||||
],
|
||||
"description" : "This component test the cutadapt output from the well_demultiplex subworkflow.",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3008,8 +3243,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/integration_test_components/well_demultiplexing/check_cutadapt_output",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3024,7 +3259,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -5,18 +5,8 @@ argument_groups:
|
||||
- name: "Input arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input_r1"
|
||||
description: "Directory to write R1 fastq data to"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input_r2"
|
||||
description: "Directory to write R2 fastq data to"
|
||||
name: "--input"
|
||||
description: "Directory to write fastq data to"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
@@ -47,6 +37,9 @@ resources:
|
||||
description: "Publish the fastq files per well"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -142,8 +135,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/io/publish_fastqs"
|
||||
executable: "target/nextflow/io/publish_fastqs/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -153,7 +146,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// publish_fastqs save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1749,33 +1906,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2813,19 +3044,8 @@ meta = [
|
||||
"arguments" : [
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--input_r1",
|
||||
"description" : "Directory to write R1 fastq data to",
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--input_r2",
|
||||
"description" : "Directory to write R2 fastq data to",
|
||||
"name" : "--input",
|
||||
"description" : "Directory to write fastq data to",
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
@@ -2868,6 +3088,10 @@ meta = [
|
||||
],
|
||||
"description" : "Publish the fastq files per well",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -2982,8 +3206,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/io/publish_fastqs",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -2998,7 +3222,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3036,8 +3260,7 @@ tempscript=".viash_script.sh"
|
||||
cat > "$tempscript" << VIASHMAIN
|
||||
## VIASH START
|
||||
# The following code has been auto-generated by Viash.
|
||||
$( if [ ! -z ${VIASH_PAR_INPUT_R1+x} ]; then echo "${VIASH_PAR_INPUT_R1}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_r1='&'#" ; else echo "# par_input_r1="; fi )
|
||||
$( if [ ! -z ${VIASH_PAR_INPUT_R2+x} ]; then echo "${VIASH_PAR_INPUT_R2}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input_r2='&'#" ; else echo "# par_input_r2="; fi )
|
||||
$( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "${VIASH_PAR_INPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_input='&'#" ; else echo "# par_input="; fi )
|
||||
$( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "${VIASH_PAR_OUTPUT}" | sed "s#'#'\\"'\\"'#g;s#.*#par_output='&'#" ; else echo "# par_output="; fi )
|
||||
$( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "${VIASH_META_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_name='&'#" ; else echo "# meta_name="; fi )
|
||||
$( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "${VIASH_META_FUNCTIONALITY_NAME}" | sed "s#'#'\\"'\\"'#g;s#.*#meta_functionality_name='&'#" ; else echo "# meta_functionality_name="; fi )
|
||||
@@ -3069,14 +3292,9 @@ mkdir -p "\\$par_output" && echo "\\$par_output created"
|
||||
|
||||
echo
|
||||
echo "Copying files..."
|
||||
IFS=";" read -ra input_r1 <<<\\$par_input_r1
|
||||
IFS=";" read -ra input_r2 <<<\\$par_input_r2
|
||||
IFS=";" read -ra input <<<\\$par_input
|
||||
|
||||
for i in "\\${input_r1[@]}"; do
|
||||
cp -rL "\\$i" "\\$par_output/"
|
||||
done
|
||||
|
||||
for i in "\\${input_r2[@]}"; do
|
||||
for i in "\\${input[@]}"; do
|
||||
cp -rL "\\$i" "\\$par_output/"
|
||||
done
|
||||
VIASHMAIN
|
||||
|
||||
@@ -14,21 +14,11 @@
|
||||
"properties": {
|
||||
|
||||
|
||||
"input_r1": {
|
||||
"input": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R1 fastq data to",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R1 fastq data to"
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"input_r2": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R2 fastq data to",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write R2 fastq data to"
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write fastq data to",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Directory to write fastq data to"
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -91,6 +91,9 @@ resources:
|
||||
description: "Publish the results"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -186,8 +189,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/io/publish_results"
|
||||
executable: "target/nextflow/io/publish_results/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -197,7 +200,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// publish_results save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1749,33 +1906,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2917,6 +3148,10 @@ meta = [
|
||||
],
|
||||
"description" : "Publish the results",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3031,8 +3266,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/io/publish_results",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3047,7 +3282,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -164,6 +164,9 @@ test_resources:
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -278,8 +281,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/parallel_map"
|
||||
executable: "target/nextflow/parallel_map/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -289,7 +292,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// parallel_map save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1753,33 +1910,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -3005,6 +3236,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3143,8 +3378,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/parallel_map",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3159,7 +3394,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3331,7 +3566,8 @@ for barcode_index in "\\${!barcodes[@]}"; do
|
||||
fi
|
||||
done
|
||||
echo "Did not find FASTQ files files for well \\${well_id}! "\\\\
|
||||
"Make sure that the input files have the correct file name format."
|
||||
"Make sure that the input files have the correct file name format."\\\\
|
||||
"Input files: \\${input_r1[@]}"
|
||||
exit 1
|
||||
done
|
||||
|
||||
|
||||
@@ -75,6 +75,9 @@ test_resources:
|
||||
path: "test_data"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -184,12 +187,14 @@ engines:
|
||||
- "install.packages(\"oaStyle\", repos = c(rdepot = \"https://repos.openanalytics.eu/repo/public\"\
|
||||
, getOption(\"repos\")))"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
test_setup:
|
||||
- type: "r"
|
||||
packages:
|
||||
- "testthat"
|
||||
- "R.utils"
|
||||
bioc_force_install: false
|
||||
warnings_as_errors: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
@@ -200,8 +205,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/report/create_report"
|
||||
executable: "target/nextflow/report/create_report/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -211,7 +216,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// create_report save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1753,33 +1910,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2924,6 +3155,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3052,7 +3287,8 @@ meta = [
|
||||
"script" : [
|
||||
"install.packages(\\"oaStyle\\", repos = c(rdepot = \\"https://repos.openanalytics.eu/repo/public\\", getOption(\\"repos\\")))"
|
||||
],
|
||||
"bioc_force_install" : false
|
||||
"bioc_force_install" : false,
|
||||
"warnings_as_errors" : true
|
||||
}
|
||||
],
|
||||
"test_setup" : [
|
||||
@@ -3062,7 +3298,8 @@ meta = [
|
||||
"testthat",
|
||||
"R.utils"
|
||||
],
|
||||
"bioc_force_install" : false
|
||||
"bioc_force_install" : false,
|
||||
"warnings_as_errors" : true
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -3076,8 +3313,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/report/create_report",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3092,7 +3329,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -91,6 +91,9 @@ test_resources:
|
||||
path: "test_data"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -197,8 +200,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/stats/combine_star_logs"
|
||||
executable: "target/nextflow/stats/combine_star_logs/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -208,7 +211,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// combine_star_logs save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1752,33 +1909,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2927,6 +3158,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3059,8 +3294,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/combine_star_logs",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3075,7 +3310,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -75,6 +75,9 @@ test_resources:
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -181,8 +184,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/stats/generate_pool_statistics"
|
||||
executable: "target/nextflow/stats/generate_pool_statistics/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -192,7 +195,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// generate_pool_statistics save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1753,33 +1910,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2911,6 +3142,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3043,8 +3278,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/generate_pool_statistics",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3059,7 +3294,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -146,6 +146,9 @@ test_resources:
|
||||
path: "empty.sam"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -263,8 +266,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/stats/generate_well_statistics"
|
||||
executable: "target/nextflow/stats/generate_well_statistics/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -274,7 +277,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// generate_well_statistics save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1753,33 +1910,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2992,6 +3223,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3138,8 +3373,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/generate_well_statistics",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3154,7 +3389,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
193
target/nextflow/utils/concatRuns/.config.vsh.yaml
Normal file
193
target/nextflow/utils/concatRuns/.config.vsh.yaml
Normal file
@@ -0,0 +1,193 @@
|
||||
name: "concatRuns"
|
||||
namespace: "utils"
|
||||
version: "save-params"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input_r1"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input_r2"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--sample_id"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_r1"
|
||||
description: "Path to read 1 fastq/fasta file"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_r2"
|
||||
description: "Path to read 2 fastq/fasta file"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "nextflow_script"
|
||||
path: "main.nf"
|
||||
is_executable: true
|
||||
entrypoint: "run_wf"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Concatenate well FASTQ files from different runs in order to increase\
|
||||
\ sequencing depth.\n"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
dependencies:
|
||||
- name: "concat_text"
|
||||
repository:
|
||||
type: "vsh"
|
||||
repo: "craftbox"
|
||||
tag: "v0.1.0"
|
||||
repositories:
|
||||
- type: "vsh"
|
||||
name: "cb"
|
||||
repo: "craftbox"
|
||||
tag: "v0.1.0"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "native"
|
||||
id: "native"
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/utils/concatRuns/config.vsh.yaml"
|
||||
runner: "nextflow"
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/utils/concatRuns"
|
||||
executable: "target/nextflow/utils/concatRuns/main.nf"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
dependencies:
|
||||
- "target/dependencies/vsh/vsh/craftbox/v0.1.0/nextflow/concat_text"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "save-params"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'save-params'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
3541
target/nextflow/utils/concatRuns/main.nf
Normal file
3541
target/nextflow/utils/concatRuns/main.nf
Normal file
File diff suppressed because it is too large
Load Diff
125
target/nextflow/utils/concatRuns/nextflow.config
Normal file
125
target/nextflow/utils/concatRuns/nextflow.config
Normal file
@@ -0,0 +1,125 @@
|
||||
manifest {
|
||||
name = 'utils/concatRuns'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'save-params'
|
||||
description = 'Concatenate well FASTQ files from different runs in order to increase sequencing depth.\n'
|
||||
}
|
||||
|
||||
process.container = 'nextflow/bash:latest'
|
||||
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
profiles {
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
docker {
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
singularity {
|
||||
singularity.enabled = true
|
||||
singularity.autoMounts = true
|
||||
docker.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
podman {
|
||||
podman.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
shifter {
|
||||
shifter.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
charliecloud {
|
||||
charliecloud.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
}
|
||||
}
|
||||
|
||||
process{
|
||||
withLabel: mem1gb { memory = 1000000000.B }
|
||||
withLabel: mem2gb { memory = 2000000000.B }
|
||||
withLabel: mem5gb { memory = 5000000000.B }
|
||||
withLabel: mem10gb { memory = 10000000000.B }
|
||||
withLabel: mem20gb { memory = 20000000000.B }
|
||||
withLabel: mem50gb { memory = 50000000000.B }
|
||||
withLabel: mem100gb { memory = 100000000000.B }
|
||||
withLabel: mem200gb { memory = 200000000000.B }
|
||||
withLabel: mem500gb { memory = 500000000000.B }
|
||||
withLabel: mem1tb { memory = 1000000000000.B }
|
||||
withLabel: mem2tb { memory = 2000000000000.B }
|
||||
withLabel: mem5tb { memory = 5000000000000.B }
|
||||
withLabel: mem10tb { memory = 10000000000000.B }
|
||||
withLabel: mem20tb { memory = 20000000000000.B }
|
||||
withLabel: mem50tb { memory = 50000000000000.B }
|
||||
withLabel: mem100tb { memory = 100000000000000.B }
|
||||
withLabel: mem200tb { memory = 200000000000000.B }
|
||||
withLabel: mem500tb { memory = 500000000000000.B }
|
||||
withLabel: mem1gib { memory = 1073741824.B }
|
||||
withLabel: mem2gib { memory = 2147483648.B }
|
||||
withLabel: mem4gib { memory = 4294967296.B }
|
||||
withLabel: mem8gib { memory = 8589934592.B }
|
||||
withLabel: mem16gib { memory = 17179869184.B }
|
||||
withLabel: mem32gib { memory = 34359738368.B }
|
||||
withLabel: mem64gib { memory = 68719476736.B }
|
||||
withLabel: mem128gib { memory = 137438953472.B }
|
||||
withLabel: mem256gib { memory = 274877906944.B }
|
||||
withLabel: mem512gib { memory = 549755813888.B }
|
||||
withLabel: mem1tib { memory = 1099511627776.B }
|
||||
withLabel: mem2tib { memory = 2199023255552.B }
|
||||
withLabel: mem4tib { memory = 4398046511104.B }
|
||||
withLabel: mem8tib { memory = 8796093022208.B }
|
||||
withLabel: mem16tib { memory = 17592186044416.B }
|
||||
withLabel: mem32tib { memory = 35184372088832.B }
|
||||
withLabel: mem64tib { memory = 70368744177664.B }
|
||||
withLabel: mem128tib { memory = 140737488355328.B }
|
||||
withLabel: mem256tib { memory = 281474976710656.B }
|
||||
withLabel: mem512tib { memory = 562949953421312.B }
|
||||
withLabel: cpu1 { cpus = 1 }
|
||||
withLabel: cpu2 { cpus = 2 }
|
||||
withLabel: cpu5 { cpus = 5 }
|
||||
withLabel: cpu10 { cpus = 10 }
|
||||
withLabel: cpu20 { cpus = 20 }
|
||||
withLabel: cpu50 { cpus = 50 }
|
||||
withLabel: cpu100 { cpus = 100 }
|
||||
withLabel: cpu200 { cpus = 200 }
|
||||
withLabel: cpu500 { cpus = 500 }
|
||||
withLabel: cpu1000 { cpus = 1000 }
|
||||
}
|
||||
|
||||
includeConfig("nextflow_labels.config")
|
||||
108
target/nextflow/utils/concatRuns/nextflow_labels.config
Normal file
108
target/nextflow/utils/concatRuns/nextflow_labels.config
Normal file
@@ -0,0 +1,108 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 32.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
112
target/nextflow/utils/concatRuns/nextflow_schema.json
Normal file
112
target/nextflow/utils/concatRuns/nextflow_schema.json
Normal file
@@ -0,0 +1,112 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema",
|
||||
"title": "concatRuns",
|
||||
"description": "Concatenate well FASTQ files from different runs in order to increase sequencing depth.\n",
|
||||
"type": "object",
|
||||
"definitions": {
|
||||
|
||||
|
||||
|
||||
"arguments" : {
|
||||
"title": "Arguments",
|
||||
"type": "object",
|
||||
"description": "No description",
|
||||
"properties": {
|
||||
|
||||
|
||||
"input_r1": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. ",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. "
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"input_r2": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. ",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. "
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"sample_id": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, required. ",
|
||||
"help_text": "Type: `string`, required. "
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"output_r1": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, default: `$id.$key.output_r1_*.output_r1_*`, multiple_sep: `\";\"`. Path to read 1 fastq/fasta file",
|
||||
"help_text": "Type: List of `file`, default: `$id.$key.output_r1_*.output_r1_*`, multiple_sep: `\";\"`. Path to read 1 fastq/fasta file"
|
||||
,
|
||||
"default":"$id.$key.output_r1_*.output_r1_*"
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"output_r2": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, default: `$id.$key.output_r2_*.output_r2_*`, multiple_sep: `\";\"`. Path to read 2 fastq/fasta file",
|
||||
"help_text": "Type: List of `file`, default: `$id.$key.output_r2_*.output_r2_*`, multiple_sep: `\";\"`. Path to read 2 fastq/fasta file"
|
||||
,
|
||||
"default":"$id.$key.output_r2_*.output_r2_*"
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
"nextflow input-output arguments" : {
|
||||
"title": "Nextflow input-output arguments",
|
||||
"type": "object",
|
||||
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
||||
"properties": {
|
||||
|
||||
|
||||
"publish_dir": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
||||
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"param_list": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
||||
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
||||
"hidden": true
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
|
||||
{
|
||||
"$ref": "#/definitions/arguments"
|
||||
},
|
||||
|
||||
{
|
||||
"$ref": "#/definitions/nextflow input-output arguments"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -83,6 +83,9 @@ resources:
|
||||
description: "List the contents of a directory and parse contained fastq files"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -164,8 +167,8 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/utils/listInputDir"
|
||||
executable: "target/nextflow/utils/listInputDir/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -175,7 +178,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// listInputDir save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1749,33 +1906,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2907,6 +3138,10 @@ meta = [
|
||||
],
|
||||
"description" : "List the contents of a directory and parse contained fastq files",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3003,8 +3238,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/utils/listInputDir",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3019,7 +3254,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -13,12 +13,12 @@ argument_groups:
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--params"
|
||||
description: "The state to save\n"
|
||||
name: "--params_yaml"
|
||||
description: "base64 encoded yaml file containing the state\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
arguments:
|
||||
@@ -40,6 +40,9 @@ resources:
|
||||
description: "Save parameters to a file\n"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -140,8 +143,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/utils/save_params"
|
||||
executable: "target/nextflow/utils/save_params/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -151,7 +154,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// save_params save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1749,33 +1906,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2822,11 +3053,11 @@ meta = [
|
||||
},
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--params",
|
||||
"description" : "The state to save\n",
|
||||
"name" : "--params_yaml",
|
||||
"description" : "base64 encoded yaml file containing the state\n",
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : true,
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
@@ -2860,6 +3091,10 @@ meta = [
|
||||
],
|
||||
"description" : "Save parameters to a file\n",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -2982,8 +3217,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/utils/save_params",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -2998,7 +3233,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3036,12 +3271,13 @@ tempscript=".viash_script.sh"
|
||||
cat > "$tempscript" << VIASHMAIN
|
||||
import re
|
||||
import yaml
|
||||
import base64
|
||||
|
||||
## VIASH START
|
||||
# The following code has been auto-generated by Viash.
|
||||
par = {
|
||||
'id': $( if [ ! -z ${VIASH_PAR_ID+x} ]; then echo "r'${VIASH_PAR_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'params': $( if [ ! -z ${VIASH_PAR_PARAMS+x} ]; then echo "r'${VIASH_PAR_PARAMS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ),
|
||||
'params_yaml': $( if [ ! -z ${VIASH_PAR_PARAMS_YAML+x} ]; then echo "r'${VIASH_PAR_PARAMS_YAML//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
|
||||
}
|
||||
meta = {
|
||||
@@ -3074,27 +3310,41 @@ class Dumper(yaml.Dumper):
|
||||
def increase_indent(self, flow=False, indentless=False):
|
||||
return super(Dumper, self).increase_indent(flow, False)
|
||||
|
||||
params = {}
|
||||
|
||||
for param in par['params']:
|
||||
param = param.replace('\\$id', par['id'])
|
||||
key, value = param.split('=')
|
||||
def decode_params_yaml(encoded_yaml):
|
||||
# Step 1: Decode from Base64
|
||||
yaml_bytes = base64.b64decode(encoded_yaml)
|
||||
|
||||
array_match = re.match(r'(.+)\\\\[(\\\\d+)\\\\]\\$', key)
|
||||
if array_match:
|
||||
base_key = array_match.group(1)
|
||||
index = int(array_match.group(2))
|
||||
|
||||
if base_key not in params:
|
||||
params[base_key] = []
|
||||
|
||||
while len(params[base_key]) <= index:
|
||||
params[base_key].append(None)
|
||||
|
||||
params[base_key][index] = value
|
||||
else:
|
||||
params[key] = value
|
||||
|
||||
# Step 2: Convert bytes to string
|
||||
yaml_string = yaml_bytes.decode('utf-8')
|
||||
|
||||
# Step 3: Extract pattern for Java path objects
|
||||
# Find pattern: !!sun.nio.fs.UnixPath /path/to/file
|
||||
pattern = r'!!sun\\\\.nio\\\\.fs\\\\.UnixPath\\\\s+([^\\\\n]+)'
|
||||
|
||||
# Replace with the actual path string (captured group)
|
||||
yaml_string = re.sub(pattern, r'\\\\1', yaml_string)
|
||||
|
||||
# Handle any remaining empty UnixPath objects
|
||||
yaml_string = yaml_string.replace('!!sun.nio.fs.UnixPath {}', '""')
|
||||
|
||||
# Step 4: Parse YAML
|
||||
yaml_data = yaml.safe_load(yaml_string)
|
||||
|
||||
return yaml_data
|
||||
|
||||
def replace_id(value, sample_id):
|
||||
if isinstance(value, str):
|
||||
return value.replace('\\$id', sample_id)
|
||||
elif isinstance(value, list):
|
||||
return [replace_id(item, sample_id) for item in value]
|
||||
return value
|
||||
|
||||
print(par['params_yaml'])
|
||||
|
||||
params = decode_params_yaml(par['params_yaml'])
|
||||
for key, value in params.items():
|
||||
params[key] = replace_id(value, par["id"])
|
||||
|
||||
with open(par["output"], 'w') as f:
|
||||
yaml.dump(params, f, default_flow_style=False, Dumper=Dumper)
|
||||
VIASHMAIN
|
||||
|
||||
@@ -24,11 +24,11 @@
|
||||
|
||||
|
||||
,
|
||||
"params": {
|
||||
"params_yaml": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `string`, required, multiple_sep: `\";\"`. The state to save\n",
|
||||
"help_text": "Type: List of `string`, required, multiple_sep: `\";\"`. The state to save\n"
|
||||
"description": "Type: `string`, required. base64 encoded yaml file containing the state\n",
|
||||
"help_text": "Type: `string`, required. base64 encoded yaml file containing the state\n"
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -20,9 +20,9 @@ argument_groups:
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input_r1"
|
||||
description: "Forward reads in FASTQ format. Multiple files can be provided which\
|
||||
\ will\nbe demultiplexed separately before joining the results for each individual\
|
||||
\ well.\n"
|
||||
description: "Forward reads in FASTQ format. Multiple files corresponding to different\
|
||||
\ lanes can be provided which will\nbe demultiplexed separately before joining\
|
||||
\ the results for each individual well.\n"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
@@ -32,9 +32,9 @@ argument_groups:
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--input_r2"
|
||||
description: "Reverse reads in FASTQ format. Multiple files can be provided which\
|
||||
\ will\nbe demultiplexed separately before joining the results for each individual\
|
||||
\ well.\n"
|
||||
description: "Reverse reads in FASTQ format. Multiple files corresponding to different\
|
||||
\ lanes can be provided which will\nbe demultiplexed separately before joining\
|
||||
\ the results for each individual well.\n"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
@@ -80,26 +80,25 @@ argument_groups:
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--sample_id"
|
||||
description: "Sample ID for the provided input files. If not provided, the value\
|
||||
\ of --id\nwill be used. Input files will allways be demultiplexed separately,\n\
|
||||
but the FASTQs for wells with matching sample IDs will be concatenated before\
|
||||
\ mapping.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Output arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--fastq_output_r1"
|
||||
description: "List of demultiplexed fastq files"
|
||||
name: "--fastq_output"
|
||||
description: "Directory containing output fastq files"
|
||||
info: null
|
||||
default:
|
||||
- "fastq/*_R1_001.fastq"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--fastq_output_r2"
|
||||
description: "List of demultiplexed fastq files"
|
||||
info: null
|
||||
default:
|
||||
- "fastq/*_R2_001.fastq"
|
||||
- "fastq/*"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
@@ -203,6 +202,9 @@ test_resources:
|
||||
entrypoint: "test_wf2"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -237,6 +239,9 @@ dependencies:
|
||||
- name: "report/create_report"
|
||||
repository:
|
||||
type: "local"
|
||||
- name: "utils/concatRuns"
|
||||
repository:
|
||||
type: "local"
|
||||
- name: "utils/save_params"
|
||||
repository:
|
||||
type: "local"
|
||||
@@ -325,8 +330,8 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/workflows/htrnaseq"
|
||||
executable: "target/nextflow/workflows/htrnaseq/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
dependencies:
|
||||
- "target/nextflow/stats/combine_star_logs"
|
||||
@@ -339,6 +344,7 @@ build_info:
|
||||
- "target/nextflow/eset/create_fdata"
|
||||
- "target/nextflow/eset/create_pdata"
|
||||
- "target/nextflow/report/create_report"
|
||||
- "target/nextflow/utils/concatRuns"
|
||||
- "target/nextflow/utils/save_params"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -348,7 +354,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// htrnaseq save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1752,33 +1909,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2840,7 +3071,7 @@ meta = [
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--input_r1",
|
||||
"description" : "Forward reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n",
|
||||
"description" : "Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n",
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
@@ -2851,7 +3082,7 @@ meta = [
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--input_r2",
|
||||
"description" : "Reverse reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n",
|
||||
"description" : "Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n",
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
@@ -2901,6 +3132,15 @@ meta = [
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--sample_id",
|
||||
"description" : "Sample ID for the provided input files. If not provided, the value of --id\nwill be used. Input files will allways be demultiplexed separately,\nbut the FASTQs for wells with matching sample IDs will be concatenated before mapping.\n",
|
||||
"required" : false,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -2909,24 +3149,10 @@ meta = [
|
||||
"arguments" : [
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--fastq_output_r1",
|
||||
"description" : "List of demultiplexed fastq files",
|
||||
"name" : "--fastq_output",
|
||||
"description" : "Directory containing output fastq files",
|
||||
"default" : [
|
||||
"fastq/*_R1_001.fastq"
|
||||
],
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
"direction" : "output",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--fastq_output_r2",
|
||||
"description" : "List of demultiplexed fastq files",
|
||||
"default" : [
|
||||
"fastq/*_R2_001.fastq"
|
||||
"fastq/*"
|
||||
],
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
@@ -3058,6 +3284,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3124,6 +3354,12 @@ meta = [
|
||||
"type" : "local"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "utils/concatRuns",
|
||||
"repository" : {
|
||||
"type" : "local"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "utils/save_params",
|
||||
"repository" : {
|
||||
@@ -3234,8 +3470,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/workflows/htrnaseq",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3250,7 +3486,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3289,70 +3525,154 @@ include { create_eset } from "${meta.resources_dir}/../../../nextflow/eset/creat
|
||||
include { create_fdata } from "${meta.resources_dir}/../../../nextflow/eset/create_fdata/main.nf"
|
||||
include { create_pdata } from "${meta.resources_dir}/../../../nextflow/eset/create_pdata/main.nf"
|
||||
include { create_report } from "${meta.resources_dir}/../../../nextflow/report/create_report/main.nf"
|
||||
include { concatRuns } from "${meta.resources_dir}/../../../nextflow/utils/concatRuns/main.nf"
|
||||
include { save_params } from "${meta.resources_dir}/../../../nextflow/utils/save_params/main.nf"
|
||||
|
||||
// inner workflow
|
||||
// user-provided Nextflow code
|
||||
workflow run_wf {
|
||||
take:
|
||||
input_ch
|
||||
raw_ch
|
||||
|
||||
main:
|
||||
input_ch = raw_ch
|
||||
// Use the event ID as the default for the sample ID
|
||||
| map {id, state ->
|
||||
def sample_id = state.sample_id ?: id
|
||||
def newState = state + ["sample_id": sample_id, "run_id": id]
|
||||
return [id, newState]
|
||||
}
|
||||
|
||||
// The featureData only has one requirement: the genome annotation.
|
||||
// It can be generated straight away.
|
||||
// It can be generated straight away. Most of the time, there is one shared
|
||||
// annotation for all of the inputs and the fData should only be calculated once.
|
||||
// The state is manpulated in such a way that there is one event created per unique
|
||||
// input annotation file. In turn, the featureData file can joined into the original input
|
||||
// channel which allows it to be shared across events if required.
|
||||
f_data_ch = input_ch
|
||||
|
||||
| save_params.run(
|
||||
fromState: {id, state ->
|
||||
// Convert state to list of key=value parameters
|
||||
def params_list = []
|
||||
|
||||
// Add each parameter as key=value
|
||||
state.each { key, value ->
|
||||
if (value != null) {
|
||||
// Handle different types of values
|
||||
if (value instanceof Collection) {
|
||||
// For collections, add multiple entries with array notation
|
||||
value.eachWithIndex { item, index ->
|
||||
params_list.add("${key}[${index}]=${item}")
|
||||
}
|
||||
} else {
|
||||
// For simple values, just add key=value
|
||||
params_list.add("${key}=${value}")
|
||||
}
|
||||
}
|
||||
fromState: {id, state ->
|
||||
// Define the function before using it
|
||||
def convertPaths
|
||||
convertPaths = { value ->
|
||||
if (value instanceof java.nio.file.Path)
|
||||
return value.toUriString()
|
||||
else if (value instanceof Collection)
|
||||
return value.collect { convertPaths(it) }
|
||||
else
|
||||
return value
|
||||
}
|
||||
|
||||
// Apply conversion to all state values
|
||||
def convertedState = state.collectEntries { k, v -> [(k): convertPaths(v)] }
|
||||
|
||||
def yaml = new org.yaml.snakeyaml.Yaml()
|
||||
def yamlString = yaml.dump(convertedState)
|
||||
def encodedYaml = yamlString.bytes.encodeBase64().toString()
|
||||
|
||||
return [
|
||||
"id": id,
|
||||
"params": params_list,
|
||||
"params_yaml": encodedYaml,
|
||||
"output": "${id}_parameters.yaml"
|
||||
]
|
||||
},
|
||||
toState: ["parameters": "output"]
|
||||
)
|
||||
|
||||
| toSortedList()
|
||||
| flatMap {ids_and_states ->
|
||||
def annotation_files = ids_and_states.inject([:]){ old_state, id_and_state ->
|
||||
def (id, state) = id_and_state
|
||||
def annotation_file = state.annotation
|
||||
def new_state = old_state + [(annotation_file): (old_state.getOrDefault(annotation_file, []) + [id])]
|
||||
return new_state
|
||||
}
|
||||
def file_names = annotation_files.keySet().collect{it.name}
|
||||
assert (file_names.toSet().size() == file_names.size()):
|
||||
"Please make sure that the annotation files have unique file names."
|
||||
def new_states = annotation_files.collect{annotation_file, value ->
|
||||
def new_state = [annotation_file.name , ["annotation": annotation_file, "event_ids": value]]
|
||||
return new_state
|
||||
}
|
||||
return new_states
|
||||
}
|
||||
| create_fdata.run(
|
||||
directives: [label: ["lowmem", "lowcpu"]],
|
||||
fromState: [
|
||||
"gtf": "annotation",
|
||||
"output": "f_data"
|
||||
],
|
||||
toState: {id, result, state -> ["f_data": result.output]}
|
||||
toState: ["f_data": "output"]
|
||||
)
|
||||
| flatMap {_, state ->
|
||||
def new_states = state.event_ids.collect{event_id ->
|
||||
[event_id, ["f_data": state.f_data]]
|
||||
}
|
||||
return new_states
|
||||
}
|
||||
|
||||
// Perform mapping of each well.
|
||||
mapping_ch = input_ch
|
||||
demultiplex_ch = input_ch
|
||||
| well_demultiplex.run(
|
||||
fromState: [
|
||||
"input_r1": "input_r1",
|
||||
"input_r2": "input_r2",
|
||||
"barcodesFasta": "barcodesFasta",
|
||||
],
|
||||
toState: [
|
||||
"input_r1": "output_r1",
|
||||
"input_r2": "output_r2",
|
||||
]
|
||||
toState: {id, result, state ->
|
||||
def all_fastq = result.output_r1 + result.output_r2
|
||||
def output_dir = all_fastq.collect{it.parent}.unique()
|
||||
assert output_dir.size() == 1: "Expected output from well demultiplexing to reside into one directory."
|
||||
def new_state = state + [
|
||||
"input_r1": result.output_r1,
|
||||
"input_r2": result.output_r2,
|
||||
"fastq_output_directory": output_dir[0],
|
||||
]
|
||||
return new_state
|
||||
}
|
||||
)
|
||||
|
||||
fastq_output_directory_ch = demultiplex_ch
|
||||
| map {id, state ->
|
||||
def new_event = [state.sample_id, state]
|
||||
return new_event
|
||||
}
|
||||
| groupTuple(by: 0, sort: "hash")
|
||||
| map {id, states ->
|
||||
def fastq_output_dirs = states.collect{it.fastq_output_directory}
|
||||
def new_state = ["fastq_output_directory": fastq_output_dirs]
|
||||
def new_event = [id, new_state]
|
||||
return [id, new_state]
|
||||
}
|
||||
|
||||
|
||||
concat_samples_ch = demultiplex_ch.join(f_data_ch)
|
||||
| map {id, demutliplex_state, f_data_state ->
|
||||
def newState = demutliplex_state + ["f_data": f_data_state["f_data"]]
|
||||
[id, newState]
|
||||
}
|
||||
| concatRuns.run(
|
||||
fromState: [
|
||||
"input_r1": "input_r1",
|
||||
"input_r2": "input_r2",
|
||||
"sample_id": "sample_id",
|
||||
],
|
||||
toState: {id, result, state ->
|
||||
def state_overwite = [
|
||||
"input_r1": result.output_r1,
|
||||
"input_r2": result.output_r2,
|
||||
"_meta": ["join_id": state.run_id]
|
||||
]
|
||||
return state + state_overwite
|
||||
}
|
||||
)
|
||||
|
||||
pool_ch = concat_samples_ch.join(fastq_output_directory_ch)
|
||||
| map {id, demux_state, fastq_output_directory_state ->
|
||||
def new_state = demux_state + fastq_output_directory_state
|
||||
return [id, new_state]
|
||||
}
|
||||
| parallel_map.run(
|
||||
directives: ["label": ["highmem", "lowcpu"]],
|
||||
fromState: {id, state ->
|
||||
@@ -3369,9 +3689,6 @@ workflow run_wf {
|
||||
"star_output": "output",
|
||||
]
|
||||
)
|
||||
|
||||
// From the mapped wells, create statistics based on the BAM files.
|
||||
pool_ch = mapping_ch
|
||||
// Split the events from 1 event per pool into events per well
|
||||
// and add extra metadata about the wells to the state.
|
||||
| well_metadata.run(
|
||||
@@ -3425,10 +3742,10 @@ workflow run_wf {
|
||||
// Gather the keys from all states. for some state items,
|
||||
// we need gather all the different items from across the states
|
||||
def barcodes = states.collect{it.barcode}
|
||||
assert barcodes.clone().unique().size() == barcodes.size(), \
|
||||
assert barcodes.clone().unique().size() == barcodes.size(): \
|
||||
"Error when gathering information for pool ${id}, barcodes are not unique!"
|
||||
def well_ids = states.collect{it.well_id}
|
||||
assert well_ids.clone().unique().size() == well_ids.size(), \
|
||||
assert well_ids.clone().unique().size() == well_ids.size(): \
|
||||
"Error when gathering information for pool ${id}, well IDs are not unique!"
|
||||
def custom_state = [
|
||||
"input_r1": states.collect{it.input_r1},
|
||||
@@ -3449,7 +3766,7 @@ workflow run_wf {
|
||||
// All other state should have a unique value
|
||||
def old_state_items = other_state_keys.inject([:]){ old_state, argument_name ->
|
||||
argument_values = states.collect{it.get(argument_name)}.unique()
|
||||
assert argument_values.size() == 1, "Arguments should be the same across modalities. Please report this \
|
||||
assert argument_values.size() == 1: "Arguments should be the same across modalities. Please report this \
|
||||
as a bug. Argument name: $argument_name, \
|
||||
argument value: $argument_values"
|
||||
def argument_value
|
||||
@@ -3492,7 +3809,7 @@ workflow run_wf {
|
||||
]
|
||||
)
|
||||
|
||||
p_data_ch = star_logs_ch.join(pool_statistics_ch, remainder: true)
|
||||
eset_ch = star_logs_ch.join(pool_statistics_ch, remainder: true)
|
||||
| map {id, star_logs_state, pool_statistics_state ->
|
||||
def newState = star_logs_state + ["nrReadsNrGenesPerChromPool": pool_statistics_state.nrReadsNrGenesPerChromPool]
|
||||
return [id, newState]
|
||||
@@ -3506,12 +3823,6 @@ workflow run_wf {
|
||||
],
|
||||
toState: ["p_data": "output"],
|
||||
)
|
||||
|
||||
eset_ch = p_data_ch.join(f_data_ch, remainder: true)
|
||||
| map {id, p_data_state, f_data_state ->
|
||||
def newState = p_data_state + ["f_data": f_data_state["f_data"]]
|
||||
[id, newState]
|
||||
}
|
||||
| create_eset.run(
|
||||
directives: [label: ["lowmem", "lowcpu"]],
|
||||
fromState: [
|
||||
@@ -3553,13 +3864,14 @@ workflow run_wf {
|
||||
|
||||
output_ch = eset_ch.join(report_channel)
|
||||
| map {id, state_eset, state_report ->
|
||||
def new_state = state_eset + ["html_report": state_report.html_report]
|
||||
def new_state = state_eset + [
|
||||
"html_report": state_report.html_report,
|
||||
]
|
||||
[id, new_state]
|
||||
}
|
||||
| setState([
|
||||
"star_output": "star_output",
|
||||
"fastq_output_r1": "input_r1",
|
||||
"fastq_output_r2": "input_r2",
|
||||
"star_output": "star_output",
|
||||
"fastq_output": "fastq_output_directory",
|
||||
"star_output": "star_output",
|
||||
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChromPool",
|
||||
"star_qc_metrics": "star_qc_metrics",
|
||||
@@ -3567,7 +3879,8 @@ workflow run_wf {
|
||||
"f_data": "f_data",
|
||||
"p_data": "p_data",
|
||||
"html_report": "html_report",
|
||||
"parameters": "parameters"
|
||||
"parameters": "parameters",
|
||||
"_meta": "_meta",
|
||||
])
|
||||
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n"
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Forward reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n"
|
||||
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format. Multiple files can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n"
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Reverse reads in FASTQ format. Multiple files corresponding to different lanes can be provided which will\nbe demultiplexed separately before joining the results for each individual well.\n"
|
||||
|
||||
}
|
||||
|
||||
@@ -74,6 +74,16 @@
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"sample_id": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`. Sample ID for the provided input files",
|
||||
"help_text": "Type: `string`. Sample ID for the provided input files. If not provided, the value of --id\nwill be used. Input files will allways be demultiplexed separately,\nbut the FASTQs for wells with matching sample IDs will be concatenated before mapping.\n"
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
@@ -85,24 +95,13 @@
|
||||
"properties": {
|
||||
|
||||
|
||||
"fastq_output_r1": {
|
||||
"fastq_output": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, default: `$id.$key.fastq_output_r1_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files",
|
||||
"help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_r1_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files"
|
||||
"description": "Type: List of `file`, required, default: `$id.$key.fastq_output_*.fastq_output_*`, multiple_sep: `\";\"`. Directory containing output fastq files",
|
||||
"help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_*.fastq_output_*`, multiple_sep: `\";\"`. Directory containing output fastq files"
|
||||
,
|
||||
"default":"$id.$key.fastq_output_r1_*.fastq"
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"fastq_output_r2": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, default: `$id.$key.fastq_output_r2_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files",
|
||||
"help_text": "Type: List of `file`, required, default: `$id.$key.fastq_output_r2_*.fastq`, multiple_sep: `\";\"`. List of demultiplexed fastq files"
|
||||
,
|
||||
"default":"$id.$key.fastq_output_r2_*.fastq"
|
||||
"default":"$id.$key.fastq_output_*.fastq_output_*"
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ argument_groups:
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--barcodesFasta"
|
||||
@@ -123,6 +123,9 @@ resources:
|
||||
description: "Runner for HT RNA-seq pipeline"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -217,8 +220,8 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/workflows/runner"
|
||||
executable: "target/nextflow/workflows/runner/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
dependencies:
|
||||
- "target/nextflow/utils/listInputDir"
|
||||
@@ -233,7 +236,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// runner save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -173,7 +173,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -192,15 +192,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -213,6 +206,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1666,6 +1669,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1723,8 +1882,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1737,7 +1894,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1749,33 +1906,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1806,13 +1947,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1829,7 +1967,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1860,13 +1998,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1874,18 +2008,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2559,7 +2692,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2716,12 +2850,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2734,19 +2892,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2755,23 +2988,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2819,7 +3050,7 @@ meta = [
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : true,
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
@@ -2959,6 +3190,10 @@ meta = [
|
||||
],
|
||||
"description" : "Runner for HT RNA-seq pipeline",
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3081,8 +3316,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/workflows/runner",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3097,7 +3332,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3143,19 +3378,13 @@ workflow run_wf {
|
||||
input_ch
|
||||
|
||||
main:
|
||||
output_ch = input_ch
|
||||
// Multiple runs can be provided, and the reads for these runs will
|
||||
// be concatenated. Here, we gather the FASTQ files from each input directory first.
|
||||
| flatMap {id, state ->
|
||||
// Create an input event per input directory
|
||||
def new_state = state.input.withIndex().collect{input_dir, id_index ->
|
||||
def state_item = state + ["input": input_dir, "index": id_index, "run_id": id]
|
||||
return ["${id}_${id_index}".toString(), state_item]
|
||||
}
|
||||
return new_state
|
||||
}
|
||||
htrnaseq_ch = input_ch
|
||||
// List the FASTQ files per input directory
|
||||
// Be careful: an event per lane is created!
|
||||
| map {id, state ->
|
||||
def new_state = state + ["run_id": id]
|
||||
return [id, new_state]
|
||||
}
|
||||
| listInputDir.run(
|
||||
fromState: [
|
||||
"input": "input",
|
||||
@@ -3173,13 +3402,11 @@ workflow run_wf {
|
||||
// there might be multiple FASTQs for a single sample that correspond to the
|
||||
// lanes. So the fastq files must be gathered across lanes and input folders
|
||||
// in order to create an input lists for R1 and R2.
|
||||
| map {id, state -> [state.sample_id, state]}
|
||||
| groupTuple(by: 0, sort: { state1, state2 ->
|
||||
if (state1.index == state2.index) {
|
||||
return state1.lane <=> state2.lane
|
||||
}
|
||||
return state1.index <=> state2.index
|
||||
})
|
||||
// The ID of the event here is important! It determines the name of the output
|
||||
// folders for the FASTQ files and these folders are published as-is later.
|
||||
// The folder where the FASTQ files are stored in should be named after the run ID.
|
||||
| map {id, state -> ["${state.sample_id}/${state.run_id}".toString(), state]}
|
||||
| groupTuple(by: 0, sort: "hash")
|
||||
| map {id, states ->
|
||||
def new_r1 = states.collect{it.r1_output}
|
||||
def new_r2 = states.collect{it.r2_output}
|
||||
@@ -3188,7 +3415,7 @@ workflow run_wf {
|
||||
// TODO: this can be asserted.
|
||||
def new_state = states[0] + [
|
||||
"r1": new_r1,
|
||||
"r2": new_r2
|
||||
"r2": new_r2,
|
||||
]
|
||||
return [id, new_state]
|
||||
}
|
||||
@@ -3197,8 +3424,7 @@ workflow run_wf {
|
||||
f_data: 'fData/$id.txt',
|
||||
p_data: 'pData/$id.txt',
|
||||
star_output: 'star_output/$id/*',
|
||||
fastq_output_r1: 'fastq/*_R1_001.fastq',
|
||||
fastq_output_r2: 'fastq/*_R1_001.fastq',
|
||||
fastq_output: 'fastq/*',
|
||||
eset: 'esets/$id.rds',
|
||||
nrReadsNrGenesPerChrom: 'nrReadsNrGenesPerChrom/$id.txt',
|
||||
star_qc_metrics: 'starLogs/$id.txt',
|
||||
@@ -3211,32 +3437,32 @@ workflow run_wf {
|
||||
genomeDir: "genomeDir",
|
||||
annotation: "annotation",
|
||||
umi_length: "umi_length",
|
||||
sample_id: "sample_id",
|
||||
],
|
||||
toState: { id, result, state -> state + result }
|
||||
)
|
||||
|
||||
// The HT-RNAseq workflow outputs multiple events, one per 'pool' (usually a plate)
|
||||
// but for publishing the results, this is not handy because we want to use the $id
|
||||
// variable as a pointer to the target data.
|
||||
//
|
||||
// So, we should combine everything together
|
||||
//
|
||||
// project_id / experiment_id / date_workflow
|
||||
|
||||
// project_id / experiment_id / "data_processed" / date_workflow
|
||||
grouped_ch = htrnaseq_ch
|
||||
| toSortedList
|
||||
|
||||
| map{ vs ->
|
||||
def all_fastqs
|
||||
[
|
||||
vs[0][1].run_id, // The original ID
|
||||
[
|
||||
star_output: reduce_paths(vs.collect{ it[1].star_output }.flatten()),
|
||||
fastq_output_r1: reduce_paths(vs.collect{ it[1].fastq_output_r1 }.flatten(), 1),
|
||||
fastq_output_r2: reduce_paths(vs.collect{ it[1].fastq_output_r2 }.flatten(), 1),
|
||||
nrReadsNrGenesPerChrom: reduce_paths(vs.collect{ it[1].nrReadsNrGenesPerChrom }),
|
||||
star_qc_metrics: reduce_paths(vs.collect{ it[1].star_qc_metrics }),
|
||||
eset: reduce_paths(vs.collect{ it[1].eset }),
|
||||
f_data: reduce_paths(vs.collect{ it[1].f_data }),
|
||||
p_data: reduce_paths(vs.collect{ it[1].p_data }),
|
||||
fastq_output: vs.collect{ it[1].fastq_output }.flatten().unique(),
|
||||
html_report: vs.collect{ it[1].html_report }[0], // The report is for all pools
|
||||
plain_output: vs.collect{ it[1].plain_output }[0],
|
||||
project_id: vs.collect{ it[1].project_id }[0],
|
||||
@@ -3245,12 +3471,13 @@ workflow run_wf {
|
||||
]
|
||||
}
|
||||
|
||||
results_publish_ch = grouped_ch
|
||||
| publish_results.run(
|
||||
fromState: { id, state ->
|
||||
def project = (state.plain_output) ? id : "${state.project_id}"
|
||||
def experiment = (state.plain_output) ? id : "${state.experiment_id}"
|
||||
def id0 = "${project}/${experiment}"
|
||||
def id1 = (state.plain_output) ? id : "${id0}/${date}"
|
||||
def id1 = (state.plain_output) ? id : "${id0}/data_processed/${date}"
|
||||
def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}"
|
||||
|
||||
if (id == id2) {
|
||||
@@ -3281,14 +3508,24 @@ workflow run_wf {
|
||||
]
|
||||
)
|
||||
|
||||
fastq_publish_ch = grouped_ch
|
||||
| flatMap{id, state ->
|
||||
def new_states = state.fastq_output.collect{fastq_dir ->
|
||||
def new_id = fastq_dir.name // The folder name corresponds to the run
|
||||
def fastq_files = fastq_dir.listFiles()
|
||||
def new_state = [
|
||||
"fastq_output": fastq_files
|
||||
]
|
||||
return [new_id, new_state]
|
||||
}
|
||||
return new_states
|
||||
}
|
||||
| publish_fastqs.run(
|
||||
fromState: { id, state ->
|
||||
def id0 = "${id}"
|
||||
def id1 = (state.plain_output) ? id : "${id0}/${date}"
|
||||
def id2 = (state.plain_output) ? id : "${id1}_htrnaseq_${version}"
|
||||
|
||||
println(state.plain_output)
|
||||
|
||||
if (id == id2) {
|
||||
println("Publising fastqs to ${params.fastq_publish_dir}")
|
||||
} else {
|
||||
@@ -3296,8 +3533,7 @@ workflow run_wf {
|
||||
}
|
||||
|
||||
[
|
||||
input_r1: state.fastq_output_r1,
|
||||
input_r2: state.fastq_output_r2,
|
||||
input: state.fastq_output,
|
||||
output: "${id2}",
|
||||
]
|
||||
},
|
||||
@@ -3312,7 +3548,7 @@ workflow run_wf {
|
||||
)
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
grouped_ch
|
||||
| map{ id, state -> [ id, [ _meta: [ join_id: state.run_id ] ] ] }
|
||||
}
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@
|
||||
"input": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`"
|
||||
"description": "Type: `file`, required. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`",
|
||||
"help_text": "Type: `file`, required. Base directory of the form `s3:/\u003cbucket\u003e/Sequencing/\u003cSequencer\u003e/\u003cRunID\u003e/\u003cdemultiplex_dir\u003e`"
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -109,6 +109,9 @@ test_resources:
|
||||
entrypoint: "test_wf2"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -210,8 +213,8 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/workflows/well_demultiplex"
|
||||
executable: "target/nextflow/workflows/well_demultiplex/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
dependencies:
|
||||
- "target/dependencies/vsh/vsh/biobox/v0.3.0/nextflow/cutadapt"
|
||||
@@ -224,7 +227,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// well_demultiplex save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -177,7 +177,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -196,15 +196,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -217,6 +210,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1670,6 +1673,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1727,8 +1886,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1741,7 +1898,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1753,33 +1910,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,13 +1951,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1833,7 +1971,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1864,13 +2002,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1878,18 +2012,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2563,7 +2696,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2720,12 +2854,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2738,19 +2896,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2759,23 +2992,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2955,6 +3186,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3083,8 +3318,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/workflows/well_demultiplex",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3099,7 +3334,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3195,6 +3430,8 @@ workflow run_wf {
|
||||
output: new_output,
|
||||
error_rate: 0.10,
|
||||
demultiplex_mode: "single",
|
||||
output_r1: state.output_r1,
|
||||
output_r2: state.output_r2,
|
||||
]
|
||||
},
|
||||
toState: { id, result, state ->
|
||||
|
||||
@@ -127,6 +127,9 @@ resources:
|
||||
dest: "nextflow_labels.config"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
@@ -208,8 +211,8 @@ build_info:
|
||||
engine: "native|native"
|
||||
output: "target/nextflow/workflows/well_metadata"
|
||||
executable: "target/nextflow/workflows/well_metadata/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1561d769c65192a820053a565654dee8cbe55588"
|
||||
viash_version: "0.9.2"
|
||||
git_commit: "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
@@ -219,7 +222,7 @@ package_config:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
viash_version: "0.9.2"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// well_metadata save-params
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -176,7 +176,7 @@ def _checkArgumentType(String stage, Map par, Object value, String errorIdentifi
|
||||
Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.required) {
|
||||
if (arg.required && arg.direction == "input") {
|
||||
assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing"
|
||||
}
|
||||
@@ -195,15 +195,8 @@ Map _processInputValues(Map inputs, Map config, String id, String key) {
|
||||
}
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf'
|
||||
Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
|
||||
outputs = outputs.collectEntries { name, value ->
|
||||
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
|
||||
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
|
||||
@@ -216,6 +209,16 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
|
||||
return outputs
|
||||
}
|
||||
|
||||
void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
|
||||
if (!workflow.stubRun) {
|
||||
config.allArguments.each { arg ->
|
||||
if (arg.direction == "output" && arg.required) {
|
||||
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
|
||||
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf'
|
||||
class IDChecker {
|
||||
final def items = [] as Set
|
||||
@@ -1669,6 +1672,162 @@ def joinStates(Closure apply_) {
|
||||
}
|
||||
return joinStatesWf
|
||||
}
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf'
|
||||
def publishFiles(Map args) {
|
||||
def key_ = args.get("key")
|
||||
|
||||
assert key_ != null : "publishFiles: key must be specified"
|
||||
|
||||
workflow publishFilesWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1]
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
[id_, inputFiles_, outputFilenames_]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesWf
|
||||
}
|
||||
|
||||
process publishFilesProc {
|
||||
// todo: check publishpath?
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
output:
|
||||
tuple val(id), path{outputFiles}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
// this assumes that the state contains no other values other than those specified in the config
|
||||
def publishFilesByConfig(Map args) {
|
||||
def config = args.get("config")
|
||||
assert config != null : "publishFilesByConfig: config must be specified"
|
||||
|
||||
def key_ = args.get("key", config.name)
|
||||
assert key_ != null : "publishFilesByConfig: key must be specified"
|
||||
|
||||
workflow publishFilesSimpleWf {
|
||||
take: input_ch
|
||||
main:
|
||||
input_ch
|
||||
| map { tup ->
|
||||
def id_ = tup[0]
|
||||
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
|
||||
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']
|
||||
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
.collectMany { par ->
|
||||
def plainName_ = par.plainName
|
||||
// if the state does not contain the key, it's an
|
||||
// optional argument for which the component did
|
||||
// not generate any output OR multiple channels were emitted
|
||||
// and the output was just not added to using the channel
|
||||
// that is now being parsed
|
||||
if (!state_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def value = state_[plainName_]
|
||||
// if the parameter is not a file, it should be stored
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[inputPath: [], outputFilename: []]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
// that it should not be returned as a state
|
||||
if (!origState_.containsKey(plainName_)) {
|
||||
return []
|
||||
}
|
||||
def filenameTemplate = origState_[plainName_]
|
||||
// if the pararameter is multiple: true, fetch the template
|
||||
if (par.multiple && filenameTemplate instanceof List) {
|
||||
filenameTemplate = filenameTemplate[0]
|
||||
}
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
// the index of the file
|
||||
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
|
||||
def outputPerFile = value.withIndex().collect{ val, ix ->
|
||||
def filename_ix = filename.replace("*", ix.toString())
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[inputPath: inputPath, outputFilename: filename_ix]
|
||||
}
|
||||
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[inputPath: [inputPath], outputFilename: [filename]]]
|
||||
}
|
||||
}
|
||||
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
|
||||
[id_, inputPaths, outputFilenames]
|
||||
}
|
||||
| publishFilesProc
|
||||
emit: input_ch
|
||||
}
|
||||
return publishFilesSimpleWf
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf'
|
||||
def collectFiles(obj) {
|
||||
if (obj instanceof java.io.File || obj instanceof Path) {
|
||||
@@ -1726,8 +1885,6 @@ def publishStates(Map args) {
|
||||
|
||||
// the input files and the target output filenames
|
||||
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
|
||||
def inputFiles_ = inputoutputFilenames_[0]
|
||||
def outputFilenames_ = inputoutputFilenames_[1]
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
@@ -1740,7 +1897,7 @@ def publishStates(Map args) {
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -1752,33 +1909,17 @@ process publishStatesProc {
|
||||
publishDir path: "${getPublishDir()}/", mode: "copy"
|
||||
tag "$id"
|
||||
input:
|
||||
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
|
||||
tuple val(id), val(yamlBlob), val(yamlFile)
|
||||
output:
|
||||
tuple val(id), path{[yamlFile] + outputFiles}
|
||||
tuple val(id), path{[yamlFile]}
|
||||
script:
|
||||
def copyCommands = [
|
||||
inputFiles instanceof List ? inputFiles : [inputFiles],
|
||||
outputFiles instanceof List ? outputFiles : [outputFiles]
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
[]
|
||||
}
|
||||
}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
echo '${yamlBlob}' > '${yamlFile}'
|
||||
echo "Copying output files to destination folder"
|
||||
${copyCommands.join("\n ")}
|
||||
"""
|
||||
mkdir -p "\$(dirname '${yamlFile}')"
|
||||
echo "Storing state as yaml"
|
||||
cat > '${yamlFile}' << HERE
|
||||
${yamlBlob}
|
||||
HERE
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
@@ -1809,13 +1950,10 @@ def publishStatesByConfig(Map args) {
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
// the processed state is a list of [key, value] tuples, where
|
||||
// - key is a String
|
||||
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
|
||||
// - inputPath is a List[Path]
|
||||
// - outputFilename is a List[String]
|
||||
// - (key, value) are the tuples that will be saved to the state.yaml file
|
||||
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
|
||||
def processedState =
|
||||
config.allArguments
|
||||
.findAll { it.direction == "output" }
|
||||
@@ -1832,7 +1970,7 @@ def publishStatesByConfig(Map args) {
|
||||
// in the state as-is, but is not something that needs
|
||||
// to be copied from the source path to the dest path
|
||||
if (par.type != "file") {
|
||||
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
|
||||
return [[key: plainName_, value: value]]
|
||||
}
|
||||
// if the orig state does not contain this filename,
|
||||
// it's an optional argument for which the user specified
|
||||
@@ -1863,13 +2001,9 @@ def publishStatesByConfig(Map args) {
|
||||
if (yamlDir != null) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = val instanceof File ? val.toPath() : val
|
||||
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
|
||||
return value_
|
||||
}
|
||||
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
|
||||
[key, outputPerFile.collect{dic -> dic[key]}]
|
||||
}
|
||||
return [[key: plainName_] + transposedOutputs]
|
||||
return [["key": plainName_, "value": outputPerFile]]
|
||||
} else {
|
||||
def value_ = java.nio.file.Paths.get(filename)
|
||||
// if id contains a slash
|
||||
@@ -1877,18 +2011,17 @@ def publishStatesByConfig(Map args) {
|
||||
value_ = yamlDir.relativize(value_)
|
||||
}
|
||||
def inputPath = value instanceof File ? value.toPath() : value
|
||||
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
|
||||
return [["key": plainName_, value: value_]]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def updatedState_ = processedState.collectEntries{[it.key, it.value]}
|
||||
def inputPaths = processedState.collectMany{it.inputPath}
|
||||
def outputFilenames = processedState.collectMany{it.outputFilename}
|
||||
|
||||
// convert state to yaml blob
|
||||
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)
|
||||
|
||||
[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
|
||||
[id_, yamlBlob_, yamlFilename]
|
||||
}
|
||||
| publishStatesProc
|
||||
emit: input_ch
|
||||
@@ -2562,7 +2695,8 @@ def _debug(workflowArgs, debugKey) {
|
||||
def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta)
|
||||
def key_ = workflowArgs["key"]
|
||||
|
||||
def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName}
|
||||
|
||||
workflow workflowInstance {
|
||||
take: input_
|
||||
|
||||
@@ -2719,12 +2853,36 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
// TODO: move some of the _meta.join_id wrangling to the safeJoin() function.
|
||||
def chInitialOutput = chArgsWithDefaults
|
||||
def chInitialOutputMulti = chArgsWithDefaults
|
||||
| _debug(workflowArgs, "processed")
|
||||
// run workflow
|
||||
| innerWorkflowFactory(workflowArgs)
|
||||
// check output tuple
|
||||
| map { id_, output_ ->
|
||||
def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti]
|
||||
assert chInitialOutputList.size() > 0: "should have emitted at least one output channel"
|
||||
// Add a channel ID to the events, which designates the channel the event was emitted from as a running number
|
||||
// This number is used to sort the events later when the events are gathered from across the channels.
|
||||
def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex ->
|
||||
def newChannel = channel
|
||||
| map {tuple ->
|
||||
assert tuple instanceof List :
|
||||
"Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" +
|
||||
" Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" +
|
||||
" Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}"
|
||||
|
||||
def newEvent = [channelIndex] + tuple
|
||||
return newEvent
|
||||
}
|
||||
return newChannel
|
||||
}
|
||||
// Put the events into 1 channel, cover case where there is only one channel is emitted
|
||||
def chInitialOutput = chInitialOutputList.size() > 1 ? \
|
||||
chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \
|
||||
chInitialOutputListWithIndexedEvents[0]
|
||||
def chInitialOutputProcessed = chInitialOutput
|
||||
| map { tuple ->
|
||||
def channelId = tuple[0]
|
||||
def id_ = tuple[1]
|
||||
def output_ = tuple[2]
|
||||
|
||||
// see if output map contains metadata
|
||||
def meta_ =
|
||||
@@ -2737,19 +2895,94 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
output_ = output_.findAll{k, v -> k != "_meta"}
|
||||
|
||||
// check value types
|
||||
output_ = _processOutputValues(output_, meta.config, id_, key_)
|
||||
output_ = _checkValidOutputArgument(output_, meta.config, id_, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && output_.size() == 1) {
|
||||
output_ = output_.values()[0]
|
||||
}
|
||||
|
||||
[join_id, id_, output_]
|
||||
[join_id, channelId, id_, output_]
|
||||
}
|
||||
// | view{"chInitialOutput: ${it.take(3)}"}
|
||||
|
||||
// join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, channel_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
def new_state = workflowArgs.toState(tup.drop(2).take(3))
|
||||
tup.take(3) + [new_state] + tup.drop(5)
|
||||
}
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublishFiles = chPublishWithPreviousState
|
||||
// input tuple format: [join_id, channel_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, channel_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(4)
|
||||
}
|
||||
|
||||
safeJoin(chPublishFiles, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, channel_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(2).take(3)
|
||||
}
|
||||
| publishFilesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
// Join the state from the events that were emitted from different channels
|
||||
def chJoined = chInitialOutputProcessed
|
||||
| map {tuple ->
|
||||
def join_id = tuple[0]
|
||||
def channel_id = tuple[1]
|
||||
def id = tuple[2]
|
||||
def other = tuple.drop(3)
|
||||
// Below, groupTuple is used to join the events. To make sure resuming a workflow
|
||||
// keeps working, the output state must be deterministic. This means the state needs to be
|
||||
// sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash',
|
||||
// but hashing the state when it is large can be problematic in terms of performance.
|
||||
// Therefore, a custom comparator function is provided. We add the channel ID to the
|
||||
// states so that we can use the channel ID to sort the items.
|
||||
def stateWithChannelID = [[channel_id] * other.size(), other].transpose()
|
||||
// A comparator that is provided to groupTuple's 'sort' argument is applied
|
||||
// to all elements of the event tuple (that is not the 'id'). The comparator
|
||||
// closure that is used below expects the input to be List. So the join_id and
|
||||
// channel_id must also be wrapped in a list.
|
||||
[[join_id], [channel_id], id] + stateWithChannelID
|
||||
}
|
||||
| groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true)
|
||||
| map {join_ids, _, id, statesWithChannelID ->
|
||||
// Remove the channel IDs from the states
|
||||
def states = statesWithChannelID.collect{it[1]}
|
||||
def newJoinId = join_ids.flatten().unique{a, b -> a <=> b}
|
||||
assert newJoinId.size() == 1: "Multiple events were emitted for '$id'."
|
||||
def newJoinIdUnique = newJoinId[0]
|
||||
|
||||
// Merge the states from the different channels
|
||||
def newState = states.inject([:]){ old_state, state_to_add ->
|
||||
return old_state + state_to_add.collectEntries{k, v ->
|
||||
if (!multipleArgs.contains(k)) {
|
||||
// if the key is not a multiple argument, we expect only one value
|
||||
if (old_state.containsKey(k)) {
|
||||
assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted."
|
||||
}
|
||||
[k, v]
|
||||
} else {
|
||||
// if the key is a multiple argument, append the different values into one list
|
||||
def prevValue = old_state.getOrDefault(k, [])
|
||||
def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue]
|
||||
[k, prevValueAsList + v]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_checkAllRequiredOuputsPresent(newState, meta.config, id, key_)
|
||||
|
||||
// simplify output if need be
|
||||
if (workflowArgs.auto.simplifyOutput && newState.size() == 1) {
|
||||
newState = newState.values()[0]
|
||||
}
|
||||
|
||||
return [newJoinIdUnique, id, newState]
|
||||
}
|
||||
|
||||
// join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...]
|
||||
def chNewState = safeJoin(chInitialOutput, chRunFiltered, key_)
|
||||
def chNewState = safeJoin(chJoined, chRunFiltered, key_)
|
||||
// input tuple format: [join_id, id, output, prev_state, ...]
|
||||
// output tuple format: [join_id, id, new_state, ...]
|
||||
| map{ tup ->
|
||||
@@ -2758,23 +2991,21 @@ def workflowFactory(Map args, Map defaultWfArgs, Map meta) {
|
||||
}
|
||||
|
||||
if (workflowArgs.auto.publish == "state") {
|
||||
def chPublish = chNewState
|
||||
def chPublishStates = chNewState
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
// output tuple format: [join_id, id, new_state]
|
||||
| map{ tup ->
|
||||
tup.take(3)
|
||||
}
|
||||
|
||||
safeJoin(chPublish, chArgsWithDefaults, key_)
|
||||
safeJoin(chPublishStates, chArgsWithDefaults, key_)
|
||||
// input tuple format: [join_id, id, new_state, orig_state, ...]
|
||||
// output tuple format: [id, new_state, orig_state]
|
||||
| map { tup ->
|
||||
tup.drop(1).take(3)
|
||||
}
|
||||
}
|
||||
| publishStatesByConfig(key: key_, config: meta.config)
|
||||
}
|
||||
|
||||
// remove join_id and meta
|
||||
chReturn = chNewState
|
||||
| map { tup ->
|
||||
// input tuple format: [join_id, id, new_state, ...]
|
||||
@@ -2967,6 +3198,10 @@ meta = [
|
||||
}
|
||||
],
|
||||
"status" : "enabled",
|
||||
"scope" : {
|
||||
"image" : "public",
|
||||
"target" : "public"
|
||||
},
|
||||
"requirements" : {
|
||||
"commands" : [
|
||||
"ps"
|
||||
@@ -3063,8 +3298,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/workflows/well_metadata",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "1561d769c65192a820053a565654dee8cbe55588",
|
||||
"viash_version" : "0.9.2",
|
||||
"git_commit" : "05ac6a3d2432d2da9a410388f33c45a78fd0cb1a",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3079,7 +3314,7 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"viash_version" : "0.9.2",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
|
||||
Reference in New Issue
Block a user