Build branch main with version main (65dd41d)
Build pipeline: viash-hub.htrnaseq.main-vhms8
Source commit: 65dd41d8b1
Source message: Optimize spawning of processes
This commit is contained in:
@@ -8,7 +8,7 @@ argument_groups:
|
||||
name: "--barcodes"
|
||||
description: "Barcodes responding to the respective log files.\n"
|
||||
info: null
|
||||
required: false
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
@@ -20,7 +20,7 @@ argument_groups:
|
||||
- "Log.final.out"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
@@ -34,7 +34,7 @@ argument_groups:
|
||||
- "Summary.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
@@ -44,7 +44,7 @@ argument_groups:
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
@@ -182,15 +182,18 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/stats/combine_star_logs"
|
||||
executable: "target/nextflow/stats/combine_star_logs/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
|
||||
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info: null
|
||||
viash_version: "0.9.0-RC7"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// combine_star_logs main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0-RC7 and is thus a
|
||||
// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from
|
||||
// Data Intuitive.
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
// The component may contain files which fall under a different license. The
|
||||
// authors of this component should specify the license in the header of such
|
||||
@@ -1728,7 +1728,9 @@ def publishStates(Map args) {
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
|
||||
// TODO: do the pathnames in state_ match up with the outputFilenames_?
|
||||
|
||||
@@ -1799,7 +1801,9 @@ def publishStatesByConfig(Map args) {
|
||||
def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml'
|
||||
def yamlFilename = yamlTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
@@ -1841,7 +1845,9 @@ def publishStatesByConfig(Map args) {
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
@@ -2809,7 +2815,7 @@ meta = [
|
||||
"type" : "string",
|
||||
"name" : "--barcodes",
|
||||
"description" : "Barcodes responding to the respective log files.\n",
|
||||
"required" : false,
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
@@ -2823,7 +2829,7 @@ meta = [
|
||||
],
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : false,
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
@@ -2837,7 +2843,7 @@ meta = [
|
||||
],
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : false,
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
@@ -2848,7 +2854,7 @@ meta = [
|
||||
"description" : "Paths to the 'ReadsPerGene.out.tab' files as output by STAR.\n",
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : false,
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
@@ -3026,15 +3032,23 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/combine_star_logs",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
|
||||
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "65dd41d8b1b4a307735c72320c96c0880c75f17f",
|
||||
"git_remote" : "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "htrnaseq",
|
||||
"version" : "main",
|
||||
"description" : "High-throughput pipeline [WIP]\n",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"info" : {
|
||||
"test_resources" : [
|
||||
{
|
||||
"path" : "gs://viash-hub-test-data/htrnaseq/v1/",
|
||||
"dest" : "resources_test"
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3072,7 +3086,6 @@ tempscript=".viash_script.sh"
|
||||
cat > "$tempscript" << VIASHMAIN
|
||||
import logging
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from itertools import batched, starmap
|
||||
|
||||
### VIASH START
|
||||
@@ -3119,7 +3132,7 @@ logger.setLevel(logging.DEBUG)
|
||||
def handle_percentages(column_value):
|
||||
# TODO: handle this more gracefully
|
||||
if column_value:
|
||||
return np.float64(column_value.strip('%'))
|
||||
return column_value.strip('%')
|
||||
return column_value
|
||||
|
||||
def star_log_to_dataframe(barcode: str, log_path) -> pd.DataFrame:
|
||||
@@ -3137,7 +3150,7 @@ def summary_to_dataframe(barcode: str, summary_path) -> pd.DataFrame:
|
||||
logger.info("Reading summary log %s for barcode %s", summary_path, barcode)
|
||||
result = pd.read_table(summary_path, sep=",",
|
||||
header=None, names=["Category", "Value"],
|
||||
index_col=0)
|
||||
index_col=0, dtype=pd.StringDtype())
|
||||
logger.info("Read %d row(s) and %d column(s) from summary file at %s",
|
||||
*result.shape, summary_path)
|
||||
return result
|
||||
@@ -3146,9 +3159,14 @@ def summary_to_dataframe(barcode: str, summary_path) -> pd.DataFrame:
|
||||
def reads_per_gene_to_dataframe(barcode, read_per_gene_path) -> pd.DataFrame:
|
||||
logger.info("Reading reads per gene file %s for barcode %s", read_per_gene_path, barcode)
|
||||
result = pd.read_table(read_per_gene_path, skiprows=[0, 1, 2, 3], header=None, sep="\\\\t",
|
||||
dtype={"geneID": pd.StringDtype(),
|
||||
"Unstranded": pd.Int64Dtype(),
|
||||
"posStrand": pd.Int64Dtype(),
|
||||
"negStrand": pd.Int64Dtype()},
|
||||
index_col=0, names=["geneID", "Unstranded", "posStrand", "negStrand"])
|
||||
result = result[["Unstranded"]] # Do not use .loc here because we need a DataFrame, not a Series
|
||||
df = pd.DataFrame({"Value": result.sum()})
|
||||
df = df.rename({"Unstranded": "NumberOfCountedReads"}, errors="raise")
|
||||
df.index.name = "Category"
|
||||
logger.info("Read %d row(s) and %d column(s) from reads per gene file at %s",
|
||||
*df.shape, read_per_gene_path)
|
||||
@@ -3177,12 +3195,16 @@ def star_log_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame
|
||||
"\\\\n\\\\t".join(to_keep[~to_keep].index.to_list()))
|
||||
result = df.loc[to_keep]
|
||||
|
||||
# Replace % by pect, remove columns, use camel case and remove spaces
|
||||
# You might be tempted to use .title() to make everything uppercase,
|
||||
# but characters which are already uppercase should stay that way.
|
||||
# (example: NumberOfUMIs and not NumberOfUmis)
|
||||
result.index = result.index.str.replace("%", "pect")\\\\
|
||||
.str.replace(":", "")\\\\
|
||||
.str.replace(r"(?:^|\\\\s).", lambda m:m.group(0).upper(), regex=True)\\\\
|
||||
.str.replace(" ", "")
|
||||
result = result.rename({"UniquelyMappedReadsNumber": "NumberOfMappedReads",
|
||||
"UniquelyMappedReadsPect": "pctMappedReads"}, errors="raise")
|
||||
"UniquelyMappedReadsPect": "PctMappedReads"}, errors="raise")
|
||||
logger.info("Done filtering STAR logs for barcode %s. Result has %d row(s) and %d column(s). "
|
||||
"Found entries:\\\\n\\\\t%s",
|
||||
barcode, *result.shape, "\\\\n\\\\t".join(result.index.to_list()))
|
||||
@@ -3198,13 +3220,9 @@ def summary_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame)
|
||||
"Reads Mapped to Genome: Unique",
|
||||
"Reads Mapped to Transcriptome: Unique Genes",
|
||||
"Reads in Cells Mapped to Unique Genes",
|
||||
"Mean Reads per Cell",
|
||||
"Median UMI per Cell",
|
||||
"Median Genes per Cell",
|
||||
"Q30 Bases in CB+UMI",
|
||||
"Reads Mapped to Genome: Unique+Multiple",
|
||||
"Reads Mapped to Transcriptome: Unique+Multipe Genes",
|
||||
"Fraction of Reads in Cells",
|
||||
"Median Reads per Cell",
|
||||
"Mean UMI per Cell",
|
||||
"Mean Genes per Cell",
|
||||
@@ -3217,8 +3235,15 @@ def summary_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame)
|
||||
result = df.loc[to_keep]
|
||||
result.index = result.index.str.replace(r"(?:^|\\\\s).", lambda m:m.group(0).upper(),
|
||||
regex=True).str.replace(" ", "")
|
||||
result = result.rename({"UMIsInCells": "NumberOfUMIs",
|
||||
"TotalGenesDetected": "NumberOfGenes"}, errors="raise")
|
||||
to_rename = {"UMIsInCells": "NumberOfUMIs",
|
||||
"TotalGenesDetected": "NumberOfGenes"}
|
||||
try:
|
||||
result = result.rename(to_rename, errors="raise")
|
||||
except KeyError as e:
|
||||
raise KeyError(f"Tried to rename log entries ({','.join(to_rename)}) in the summary "
|
||||
f"log for barcode {barcode}, but an entry was not found in the file. "
|
||||
"Make sure that you are using the correct version of STAR."
|
||||
f"Available entries: {", ".join(result.index.to_list())}") from e
|
||||
logger.info("Done filtering summary logs for barcode %s. Result has %d row(s) and %d column(s). "
|
||||
"Found entries:\\\\n\\\\t%s",
|
||||
barcode, *result.shape, "\\\\n\\\\t".join(result.index.to_list()))
|
||||
@@ -3267,13 +3292,40 @@ def main(par):
|
||||
all_stats = pd.concat(all_logs_data, axis=1)
|
||||
logger.info("Log statistics were gathered for the following barcodes: %s",
|
||||
", ".join(all_stats.index.to_list()))
|
||||
dtypes = {
|
||||
'NumberOfInputReads': pd.UInt64Dtype(),
|
||||
'NumberOfMappedReads': pd.UInt64Dtype(),
|
||||
'PctMappedReads': pd.Float64Dtype(),
|
||||
'NumberOfReadsMappedToMultipleLoci': pd.UInt64Dtype(),
|
||||
'PectOfReadsMappedToMultipleLoci': pd.Float64Dtype(),
|
||||
'NumberOfReadsMappedToTooManyLoci': pd.UInt64Dtype(),
|
||||
'PectOfReadsMappedToTooManyLoci': pd.Float64Dtype(),
|
||||
'NumberOfReadsUnmappedTooManyMismatches': pd.UInt64Dtype(),
|
||||
'PectOfReadsUnmappedTooManyMismatches': pd.Float64Dtype(),
|
||||
'NumberOfReadsUnmappedTooShort': pd.UInt64Dtype(),
|
||||
'PectOfReadsUnmappedTooShort': pd.Float64Dtype(),
|
||||
'NumberOfReadsUnmappedOther': pd.UInt64Dtype(),
|
||||
'PectOfReadsUnmappedOther': pd.Float64Dtype(),
|
||||
'ReadsWithValidBarcodes': pd.Float64Dtype(),
|
||||
'SequencingSaturation': pd.Float64Dtype(),
|
||||
'Q30BasesInCB+UMI': pd.Float64Dtype(),
|
||||
'ReadsMappedToTranscriptome:Unique+MultipeGenes': pd.Float64Dtype(),
|
||||
'EstimatedNumberOfCells': pd.UInt64Dtype(),
|
||||
'FractionOfReadsInCells': pd.Float64Dtype(),
|
||||
'MeanReadsPerCell': pd.UInt64Dtype(),
|
||||
'NumberOfUMIs': pd.UInt64Dtype(),
|
||||
'NumberOfGenes': pd.UInt64Dtype(),
|
||||
'NumberOfCountedReads': pd.UInt64Dtype(),
|
||||
}
|
||||
all_stats = all_stats.astype(dtypes)
|
||||
# batched() is used here to print a limited amount of columnns at a time
|
||||
# to make sure that they are all displayed (pandas might limit the view for readability)
|
||||
logger.info("Summary of final output:\\\\n%s\\\\n",
|
||||
"\\\\n".join(repr(all_stats.loc[:,columns].describe())
|
||||
for columns in batched(all_stats.columns, 3)))
|
||||
logger.info("Writing output to %s", par["output"])
|
||||
all_stats.reset_index("WellBC").to_csv(par["output"], sep="\\\\t", header=True, index=False)
|
||||
all_stats.reset_index("WellBC").to_csv(par["output"], sep="\\\\t", header=True,
|
||||
index=False, float_format='%g')
|
||||
logger.info("Finished %s.", meta["name"])
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -3358,7 +3410,11 @@ def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) {
|
||||
val = val.join(par.multiple_sep)
|
||||
}
|
||||
if (par.direction == "output" && par.type == "file") {
|
||||
val = val.replaceAll('\\$id', id).replaceAll('\\$key', key)
|
||||
val = val
|
||||
.replaceAll('\\$id', id)
|
||||
.replaceAll('\\$\\{id\\}', id)
|
||||
.replaceAll('\\$key', key)
|
||||
.replaceAll('\\$\\{key\\}', key)
|
||||
}
|
||||
[parName, val]
|
||||
}
|
||||
@@ -3489,7 +3545,8 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def createParentStr = meta.config.allArguments
|
||||
.findAll { it.type == "file" && it.direction == "output" && it.create_parent }
|
||||
.collect { par ->
|
||||
"\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }"
|
||||
def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]"
|
||||
"\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }"
|
||||
}
|
||||
.join("\n")
|
||||
|
||||
@@ -3497,8 +3554,8 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def inputFileExports = meta.config.allArguments
|
||||
.findAll { it.type == "file" && it.direction.toLowerCase() == "input" }
|
||||
.collect { par ->
|
||||
def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})"
|
||||
"\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}"
|
||||
def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}"
|
||||
"\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}"
|
||||
}
|
||||
|
||||
// NOTE: if using docker, use /tmp instead of tmpDir!
|
||||
@@ -3535,6 +3592,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def procStr =
|
||||
"""nextflow.enable.dsl=2
|
||||
|
|
||||
|def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") }
|
||||
|process $procKey {$drctvStrs
|
||||
|input:
|
||||
| tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources")
|
||||
@@ -3546,10 +3604,9 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
|$stub
|
||||
|\"\"\"
|
||||
|script:$assertStr
|
||||
|def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') }
|
||||
|def parInject = args
|
||||
| .findAll{key, value -> value != null}
|
||||
| .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""}
|
||||
| .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"}
|
||||
| .join("\\n")
|
||||
|\"\"\"
|
||||
|# meta exports
|
||||
|
||||
@@ -1,26 +1,88 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
|
||||
@@ -17,8 +17,8 @@
|
||||
"barcodes": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `string`, multiple_sep: `\";\"`. Barcodes responding to the respective log files",
|
||||
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. Barcodes responding to the respective log files.\n"
|
||||
"description": "Type: List of `string`, required, multiple_sep: `\";\"`. Barcodes responding to the respective log files",
|
||||
"help_text": "Type: List of `string`, required, multiple_sep: `\";\"`. Barcodes responding to the respective log files.\n"
|
||||
|
||||
}
|
||||
|
||||
@@ -27,8 +27,8 @@
|
||||
"star_logs": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, example: `Log.final.out`, multiple_sep: `\";\"`. Paths to the STAR log files (most frequently called Log",
|
||||
"help_text": "Type: List of `file`, example: `Log.final.out`, multiple_sep: `\";\"`. Paths to the STAR log files (most frequently called Log.final.out)\n"
|
||||
"description": "Type: List of `file`, required, example: `Log.final.out`, multiple_sep: `\";\"`. Paths to the STAR log files (most frequently called Log",
|
||||
"help_text": "Type: List of `file`, required, example: `Log.final.out`, multiple_sep: `\";\"`. Paths to the STAR log files (most frequently called Log.final.out)\n"
|
||||
|
||||
}
|
||||
|
||||
@@ -37,8 +37,8 @@
|
||||
"gene_summary_logs": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, example: `Summary.txt`, multiple_sep: `\";\"`. Paths to the Summary",
|
||||
"help_text": "Type: List of `file`, example: `Summary.txt`, multiple_sep: `\";\"`. Paths to the Summary.csv files from the STAR Solo output. Can be found in\nthe \u0027Solo.out/Gene\u0027 folder relative to the root of the STAR output directory. \n"
|
||||
"description": "Type: List of `file`, required, example: `Summary.txt`, multiple_sep: `\";\"`. Paths to the Summary",
|
||||
"help_text": "Type: List of `file`, required, example: `Summary.txt`, multiple_sep: `\";\"`. Paths to the Summary.csv files from the STAR Solo output. Can be found in\nthe \u0027Solo.out/Gene\u0027 folder relative to the root of the STAR output directory. \n"
|
||||
|
||||
}
|
||||
|
||||
@@ -47,8 +47,8 @@
|
||||
"reads_per_gene_logs": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, multiple_sep: `\";\"`. Paths to the \u0027ReadsPerGene",
|
||||
"help_text": "Type: List of `file`, multiple_sep: `\";\"`. Paths to the \u0027ReadsPerGene.out.tab\u0027 files as output by STAR.\n"
|
||||
"description": "Type: List of `file`, required, multiple_sep: `\";\"`. Paths to the \u0027ReadsPerGene",
|
||||
"help_text": "Type: List of `file`, required, multiple_sep: `\";\"`. Paths to the \u0027ReadsPerGene.out.tab\u0027 files as output by STAR.\n"
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -124,7 +124,7 @@ runners:
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.11-slim"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
@@ -154,15 +154,18 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/stats/generate_pool_statistics"
|
||||
executable: "target/nextflow/stats/generate_pool_statistics/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
|
||||
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info: null
|
||||
viash_version: "0.9.0-RC7"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// generate_pool_statistics main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0-RC7 and is thus a
|
||||
// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from
|
||||
// Data Intuitive.
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
// The component may contain files which fall under a different license. The
|
||||
// authors of this component should specify the license in the header of such
|
||||
@@ -1728,7 +1728,9 @@ def publishStates(Map args) {
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
|
||||
// TODO: do the pathnames in state_ match up with the outputFilenames_?
|
||||
|
||||
@@ -1799,7 +1801,9 @@ def publishStatesByConfig(Map args) {
|
||||
def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml'
|
||||
def yamlFilename = yamlTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
@@ -1841,7 +1845,9 @@ def publishStatesByConfig(Map args) {
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
@@ -2947,7 +2953,7 @@ meta = [
|
||||
{
|
||||
"type" : "docker",
|
||||
"id" : "docker",
|
||||
"image" : "python:3.11-slim",
|
||||
"image" : "python:3.12-slim",
|
||||
"target_registry" : "images.viash-hub.com",
|
||||
"target_tag" : "main",
|
||||
"namespace_separator" : "/",
|
||||
@@ -2989,15 +2995,23 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/generate_pool_statistics",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
|
||||
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "65dd41d8b1b4a307735c72320c96c0880c75f17f",
|
||||
"git_remote" : "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "htrnaseq",
|
||||
"version" : "main",
|
||||
"description" : "High-throughput pipeline [WIP]\n",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"info" : {
|
||||
"test_resources" : [
|
||||
{
|
||||
"path" : "gs://viash-hub-test-data/htrnaseq/v1/",
|
||||
"dest" : "resources_test"
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3075,9 +3089,13 @@ if __name__ == "__main__":
|
||||
nr_reads_nr_genes_wells = []
|
||||
for nr_reads_nr_genes_file in par["nrReadsNrGenesPerChrom"]:
|
||||
nr_reads_nr_genes_wells.append(pd.read_csv(nr_reads_nr_genes_file,
|
||||
header=0, delimiter="\\\\t"))
|
||||
nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True)
|
||||
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr",
|
||||
header=0, delimiter="\\\\t",
|
||||
dtype={"WellBC": pd.StringDtype(),
|
||||
"Chr": pd.StringDtype(),
|
||||
"NumberOfReads": pd.UInt64Dtype(),
|
||||
"NumberOfGenes": pd.UInt64Dtype()}))
|
||||
nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True,)
|
||||
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr",
|
||||
values=["NumberOfReads"], fill_value=0,
|
||||
aggfunc="sum").droplevel(0, axis=1)
|
||||
total_nr_reads_per_chromosome.columns.name = None
|
||||
@@ -3118,19 +3136,25 @@ if __name__ == "__main__":
|
||||
total_chromosomal_reads = total_nr_reads_per_chromosome.loc[:,matching_chromosomes].sum(axis=1)
|
||||
percentage_chromosomal_reads = round(total_chromosomal_reads / total_sum_of_reads * 100, 2)
|
||||
|
||||
cols_to_add = {
|
||||
"pctChrom": percentage_chromosomal_reads,
|
||||
"pctMT": percentage_mitochondrial_reads,
|
||||
"pctERCC": percentage_ercc_reads,
|
||||
"SumReads": total_sum_of_reads,
|
||||
"NumberOfGenes": total_nr_genes,
|
||||
"NumberOfERCCReads": total_ercc_reads,
|
||||
"NumberOfChromReads": total_chromosomal_reads,
|
||||
"NumberOfMTReads": mitochondrial_reads,
|
||||
}
|
||||
total_nr_reads_per_chromosome = total_nr_reads_per_chromosome.assign(
|
||||
pctChrom=percentage_chromosomal_reads,
|
||||
pctMT=percentage_mitochondrial_reads,
|
||||
pctERCC=percentage_ercc_reads,
|
||||
SumReads=total_sum_of_reads,
|
||||
NumberOfGenes=total_nr_genes,
|
||||
**cols_to_add
|
||||
)
|
||||
|
||||
total_nr_reads_per_chromosome.reset_index(names="WellBC")\\\\
|
||||
.to_csv(par["nrReadsNrGenesPerChromPool"], sep="\\\\t",
|
||||
header=True, index=False,
|
||||
columns=("WellBC",) + tuple(chromosome_names) + \\\\
|
||||
("SumReads", "pctMT", "pctERCC", "pctChrom", "NumberOfGenes"))
|
||||
header=True, index=False, float_format="%g",
|
||||
columns=("WellBC",) + tuple(chromosome_names) + tuple(cols_to_add.keys())
|
||||
)
|
||||
VIASHMAIN
|
||||
python -B "$tempscript"
|
||||
'''
|
||||
@@ -3211,7 +3235,11 @@ def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) {
|
||||
val = val.join(par.multiple_sep)
|
||||
}
|
||||
if (par.direction == "output" && par.type == "file") {
|
||||
val = val.replaceAll('\\$id', id).replaceAll('\\$key', key)
|
||||
val = val
|
||||
.replaceAll('\\$id', id)
|
||||
.replaceAll('\\$\\{id\\}', id)
|
||||
.replaceAll('\\$key', key)
|
||||
.replaceAll('\\$\\{key\\}', key)
|
||||
}
|
||||
[parName, val]
|
||||
}
|
||||
@@ -3342,7 +3370,8 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def createParentStr = meta.config.allArguments
|
||||
.findAll { it.type == "file" && it.direction == "output" && it.create_parent }
|
||||
.collect { par ->
|
||||
"\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }"
|
||||
def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]"
|
||||
"\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }"
|
||||
}
|
||||
.join("\n")
|
||||
|
||||
@@ -3350,8 +3379,8 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def inputFileExports = meta.config.allArguments
|
||||
.findAll { it.type == "file" && it.direction.toLowerCase() == "input" }
|
||||
.collect { par ->
|
||||
def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})"
|
||||
"\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}"
|
||||
def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}"
|
||||
"\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}"
|
||||
}
|
||||
|
||||
// NOTE: if using docker, use /tmp instead of tmpDir!
|
||||
@@ -3388,6 +3417,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def procStr =
|
||||
"""nextflow.enable.dsl=2
|
||||
|
|
||||
|def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") }
|
||||
|process $procKey {$drctvStrs
|
||||
|input:
|
||||
| tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources")
|
||||
@@ -3399,10 +3429,9 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
|$stub
|
||||
|\"\"\"
|
||||
|script:$assertStr
|
||||
|def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') }
|
||||
|def parInject = args
|
||||
| .findAll{key, value -> value != null}
|
||||
| .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""}
|
||||
| .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"}
|
||||
| .join("\\n")
|
||||
|\"\"\"
|
||||
|# meta exports
|
||||
|
||||
@@ -1,26 +1,88 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
|
||||
@@ -225,15 +225,18 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/stats/generate_well_statistics"
|
||||
executable: "target/nextflow/stats/generate_well_statistics/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
|
||||
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info: null
|
||||
viash_version: "0.9.0-RC7"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/htrnaseq/v1/"
|
||||
dest: "resources_test"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// generate_well_statistics main
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.0-RC7 and is thus a
|
||||
// derivative work thereof. This software comes with ABSOLUTELY NO WARRANTY from
|
||||
// Data Intuitive.
|
||||
// This wrapper script is auto-generated by viash 0.9.0 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
// The component may contain files which fall under a different license. The
|
||||
// authors of this component should specify the license in the header of such
|
||||
@@ -1728,7 +1728,9 @@ def publishStates(Map args) {
|
||||
|
||||
def yamlFilename = yamlTemplate_
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
|
||||
// TODO: do the pathnames in state_ match up with the outputFilenames_?
|
||||
|
||||
@@ -1799,7 +1801,9 @@ def publishStatesByConfig(Map args) {
|
||||
def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml'
|
||||
def yamlFilename = yamlTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()
|
||||
|
||||
// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
|
||||
@@ -1841,7 +1845,9 @@ def publishStatesByConfig(Map args) {
|
||||
// instantiate the template
|
||||
def filename = filenameTemplate
|
||||
.replaceAll('\\$id', id_)
|
||||
.replaceAll('\\$\\{id\\}', id_)
|
||||
.replaceAll('\\$key', key_)
|
||||
.replaceAll('\\$\\{key\\}', key_)
|
||||
if (par.multiple) {
|
||||
// if the parameter is multiple: true, the filename
|
||||
// should contain a wildcard '*' that is replaced with
|
||||
@@ -3071,15 +3077,23 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/generate_well_statistics",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
|
||||
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "65dd41d8b1b4a307735c72320c96c0880c75f17f",
|
||||
"git_remote" : "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "htrnaseq",
|
||||
"version" : "main",
|
||||
"description" : "High-throughput pipeline [WIP]\n",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"info" : {
|
||||
"test_resources" : [
|
||||
{
|
||||
"path" : "gs://viash-hub-test-data/htrnaseq/v1/",
|
||||
"dest" : "resources_test"
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.0",
|
||||
"source" : "src",
|
||||
"target" : "target",
|
||||
"config_mods" : [
|
||||
@@ -3297,7 +3311,11 @@ def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) {
|
||||
val = val.join(par.multiple_sep)
|
||||
}
|
||||
if (par.direction == "output" && par.type == "file") {
|
||||
val = val.replaceAll('\\$id', id).replaceAll('\\$key', key)
|
||||
val = val
|
||||
.replaceAll('\\$id', id)
|
||||
.replaceAll('\\$\\{id\\}', id)
|
||||
.replaceAll('\\$key', key)
|
||||
.replaceAll('\\$\\{key\\}', key)
|
||||
}
|
||||
[parName, val]
|
||||
}
|
||||
@@ -3428,7 +3446,8 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def createParentStr = meta.config.allArguments
|
||||
.findAll { it.type == "file" && it.direction == "output" && it.create_parent }
|
||||
.collect { par ->
|
||||
"\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"] : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }"
|
||||
def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]"
|
||||
"\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }"
|
||||
}
|
||||
.join("\n")
|
||||
|
||||
@@ -3436,8 +3455,8 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def inputFileExports = meta.config.allArguments
|
||||
.findAll { it.type == "file" && it.direction.toLowerCase() == "input" }
|
||||
.collect { par ->
|
||||
def viash_par_contents = "(viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName})"
|
||||
"\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}=\\\"\" + ${viash_par_contents} + \"\\\"\"}"
|
||||
def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}"
|
||||
"\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}"
|
||||
}
|
||||
|
||||
// NOTE: if using docker, use /tmp instead of tmpDir!
|
||||
@@ -3474,6 +3493,7 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
def procStr =
|
||||
"""nextflow.enable.dsl=2
|
||||
|
|
||||
|def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") }
|
||||
|process $procKey {$drctvStrs
|
||||
|input:
|
||||
| tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources")
|
||||
@@ -3485,10 +3505,9 @@ def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) {
|
||||
|$stub
|
||||
|\"\"\"
|
||||
|script:$assertStr
|
||||
|def escapeText = { s -> s.toString().replaceAll('([`"])', '\\\\\\\\\$1') }
|
||||
|def parInject = args
|
||||
| .findAll{key, value -> value != null}
|
||||
| .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}=\\\"\${escapeText(value)}\\\""}
|
||||
| .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"}
|
||||
| .join("\\n")
|
||||
|\"\"\"
|
||||
|# meta exports
|
||||
|
||||
@@ -1,26 +1,88 @@
|
||||
executor {
|
||||
$k8s {
|
||||
submitRateLimit = '10sec'
|
||||
pollInterval = '1 sec'
|
||||
}
|
||||
}
|
||||
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
container = 'nextflow/bash:latest'
|
||||
|
||||
// default resources
|
||||
memory = { 8.Gb * task.attempt }
|
||||
cpus = 8
|
||||
maxForks = 36
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
maxMemory = 192.GB
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 8 }
|
||||
withLabel: midcpu { cpus = 16 }
|
||||
withLabel: highcpu { cpus = 32 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
withLabel: verylowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 16.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 64.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
profiles {
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
docker {
|
||||
docker.fixOwnership = true
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 20.GB * task.attempt ) } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
|
||||
Reference in New Issue
Block a user