Build branch main with version main (cf97972)

Build pipeline: viash-hub.htrnaseq.main-6f8s8

Source commit: cf9797232d

Source message: WIP: add component to parse STAR logs (#7)

* WIP: add component to parse STAR logs

* Add tests and logging
This commit is contained in:
CI
2024-09-17 09:02:15 +00:00
parent bde35f120c
commit b8abf8c490
37 changed files with 6473 additions and 40 deletions

View File

@@ -0,0 +1,65 @@
name: combine_star_logs
namespace: "stats"
argument_groups:
- name: "Arguments"
arguments:
- name: "--barcodes"
type: string
multiple: true
description: |
Barcodes responding to the respective log files.
- name: "--star_logs"
type: file
multiple: true
description: |
Paths to the STAR log files (most frequently called Log.final.out)
direction: input
example: "Log.final.out"
- name: "--gene_summary_logs"
direction: input
type: file
multiple: true
description: |
Paths to the Summary.csv files from the STAR Solo output. Can be found in
the 'Solo.out/Gene' folder relative to the root of the STAR output directory.
example: "Summary.txt"
- name: "--reads_per_gene_logs"
direction: input
type: file
multiple: true
description: |
Paths to the 'ReadsPerGene.out.tab' files as output by STAR.
- name: "--output"
type: file
direction: output
default: "starLogs.txt"
description: |
Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)
gathered from the different input files.
resources:
- type: python_script
path: script.py
test_resources:
- type: python_script
path: test.py
- path: test_data
engines:
- type: docker
image: python:3.12-slim
setup:
- type: apt
packages:
- procps
- type: python
packages:
- pandas
test_setup:
- type: python
packages:
- viashpy
runners:
- type: executable
- type: nextflow

View File

@@ -0,0 +1,190 @@
import logging
import pandas as pd
import numpy as np
from itertools import batched, starmap
### VIASH START
meta = {
"name": "combine_star_logs",
}
par = {
"star_logs": ["testData/STAR/ACGCCTTCGT/Log.final.out",
"testData/STAR/GTCTCGAGTG/Log.final.out"],
"gene_summary_logs": ["testData/STAR/ACGCCTTCGT/Solo.out/Gene/Summary.csv",
"testData/STAR/GTCTCGAGTG/Solo.out/Gene/Summary.csv"],
"reads_per_gene_logs": ["testData/STAR/ACGCCTTCGT/ReadsPerGene.out.tab",
"testData/STAR/GTCTCGAGTG/ReadsPerGene.out.tab"],
"output": "output.txt",
"barcodes": ["ACGG", "TTTT"],
}
### VIASH END
logger = logging.getLogger()
console_handler = logging.StreamHandler()
logger.addHandler(console_handler)
logger.setLevel(logging.DEBUG)
def handle_percentages(column_value):
# TODO: handle this more gracefully
if column_value:
return np.float64(column_value.strip('%'))
return column_value
def star_log_to_dataframe(barcode: str, log_path) -> pd.DataFrame:
logger.info("Reading STAR log %s for barcode '%s'", log_path, barcode)
result = pd.read_table(log_path, sep=r"\|\t+", converters={"Value": handle_percentages},
engine="python", header=None, skip_blank_lines=True,
skipinitialspace=True, names=["Category", "Value"], index_col=0,
skiprows=[0, 1, 2])
logger.info("Read %d row(s) and %d column(s) from STAR logs at %s",
*result.shape, log_path)
return result
def summary_to_dataframe(barcode: str, summary_path) -> pd.DataFrame:
logger.info("Reading summary log %s for barcode %s", summary_path, barcode)
result = pd.read_table(summary_path, sep=",",
header=None, names=["Category", "Value"],
index_col=0)
logger.info("Read %d row(s) and %d column(s) from summary file at %s",
*result.shape, summary_path)
return result
def reads_per_gene_to_dataframe(barcode, read_per_gene_path) -> pd.DataFrame:
logger.info("Reading reads per gene file %s for barcode %s", read_per_gene_path, barcode)
result = pd.read_table(read_per_gene_path, skiprows=[0, 1, 2, 3], header=None, sep="\t",
index_col=0, names=["geneID", "Unstranded", "posStrand", "negStrand"])
result = result[["Unstranded"]] # Do not use .loc here because we need a DataFrame, not a Series
df = pd.DataFrame({"Value": result.sum()})
df.index.name = "Category"
logger.info("Read %d row(s) and %d column(s) from reads per gene file at %s",
*df.shape, read_per_gene_path)
return df
def star_log_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame) -> pd.DataFrame:
"""
For a single star log (Log.final.out) in dataframe format, filter out the
entries that are not needed and format the labels for some metrics:
- Replace '%' with 'pect' in the labels.
- Remove labels ending with ':'
(mostly the section separators like 'MULTI-MAPPING READS:' and 'UNMAPPED READS:')
- Remove the metrics we do no need based on the following keywords:
Mapping speed, Average, Number of splices, per base, chimeric reads, average
The dataframe provided as input must have an index with 1 level with the metric names.
"""
# Remove index values ending with ':' (rows like 'MULTI-MAPPING READS:','UNIQUE READS:')
logger.info("Filtering STAR logs for barcode %s. Starting with %d row(s) and %d column(s)", barcode, *df.shape)
to_keep = ~df.index.to_series().str.endswith(":")
# Remove index values where the values contain any of these substrings
regex_columns_to_remove = "Mapping speed|Average|Number of splices|per base|chimeric reads|average"
to_keep = to_keep & ~df.index.to_series().str.contains(regex_columns_to_remove, regex=True)
logger.info("Removed the following log entries for barcode '%s':\n\t%s",
barcode,
"\n\t".join(to_keep[~to_keep].index.to_list()))
result = df.loc[to_keep]
result.index = result.index.str.replace("%", "pect")\
.str.replace(":", "")\
.str.replace(r"(?:^|\s).", lambda m:m.group(0).upper(), regex=True)\
.str.replace(" ", "")
result = result.rename({"UniquelyMappedReadsNumber": "NumberOfMappedReads",
"UniquelyMappedReadsPect": "pctMappedReads"}, errors="raise")
logger.info("Done filtering STAR logs for barcode %s. Result has %d row(s) and %d column(s). "
"Found entries:\n\t%s",
barcode, *result.shape, "\n\t".join(result.index.to_list()))
return result
def summary_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame) -> pd.DataFrame:
logger.info("Filtering and formatting summary logs for barcode %s. "
"Starting with %d row(s) and %d column(s)", barcode, *df.shape)
columns_to_remove = (
"Number of Reads",
"Q30 Bases in RNA read",
"Reads Mapped to Genome: Unique",
"Reads Mapped to Transcriptome: Unique Genes",
"Reads in Cells Mapped to Unique Genes",
"Mean Reads per Cell",
"Median UMI per Cell",
"Median Genes per Cell",
"Q30 Bases in CB+UMI",
"Reads Mapped to Genome: Unique+Multiple",
"Reads Mapped to Transcriptome: Unique+Multipe Genes",
"Fraction of Reads in Cells",
"Median Reads per Cell",
"Mean UMI per Cell",
"Mean Genes per Cell",
)
to_keep = ~df.index.isin(columns_to_remove)
logger.info("Removed the following summary entries for barcode '%s':\n\t%s",
barcode,
"\n\t".join(df.loc[~to_keep].index.to_list()))
result = df.loc[to_keep]
result.index = result.index.str.replace(r"(?:^|\s).", lambda m:m.group(0).upper(),
regex=True).str.replace(" ", "")
result = result.rename({"UMIsInCells": "NumberOfUMIs",
"TotalGenesDetected": "NumberOfGenes"}, errors="raise")
logger.info("Done filtering summary logs for barcode %s. Result has %d row(s) and %d column(s). "
"Found entries:\n\t%s",
barcode, *result.shape, "\n\t".join(result.index.to_list()))
return result
def join_dfs(df_list, barcodes) -> pd.DataFrame:
# Combine the dataframes together and add the barcodes as a level to the dataframe
# in order to make a 2-level index (first level the barcodes and second level the metrics).
result = pd.concat(dict(zip(barcodes, df_list)), names=["WellBC"])
# Pivot the table by moving the metrics to the columns. Its added as an extra level,
# so we can just frop the 'Values' level that was already there
result = result.unstack(level="Category").droplevel(0, axis="columns")
return result
def main(par):
logger.info("Component started.")
# Provide an overview of the parameters in the logs
parameters_str = [f'\t{param}: {param_val}\n' for param, param_val in par.items()]
logger.info("Parameters:\n%s", "".join(parameters_str).rstrip())
star_logs, gene_summary_logs, reads_per_gene_logs, barcodes = par["star_logs"], \
par["gene_summary_logs"], par["reads_per_gene_logs"], par["barcodes"]
number_of_inputs = tuple(len(i) for i in (star_logs, gene_summary_logs,
reads_per_gene_logs, barcodes))
if len(set(number_of_inputs)) != 1:
raise ValueError("Expected the same number of inputs for 'star_logs' (%d), "
"'gene_summary_logs' (%d), 'reads_per_gene_logs' (%d) "
"and 'barcodes' (%d)." % number_of_inputs)
logs_to_process = [
(star_log_to_dataframe, star_log_remove_unwanted_entries_and_adjust_format, star_logs),
(summary_to_dataframe, summary_remove_unwanted_entries_and_adjust_format, gene_summary_logs),
(reads_per_gene_to_dataframe, None, reads_per_gene_logs),
]
logger.info("Formatting the contents of the log files.")
all_logs_data = []
for df_generator, formatter, data in logs_to_process:
data_as_df = list(starmap(df_generator, zip(barcodes, data)))
data_formatted = data_as_df
if formatter:
data_formatted = list(starmap(formatter, zip(barcodes, data_as_df)))
data_joined = join_dfs(data_formatted, barcodes)
all_logs_data.append(data_joined)
logger.info("Joining entries across the different logs together.")
all_stats = pd.concat(all_logs_data, axis=1)
logger.info("Log statistics were gathered for the following barcodes: %s",
", ".join(all_stats.index.to_list()))
# batched() is used here to print a limited amount of columnns at a time
# to make sure that they are all displayed (pandas might limit the view for readability)
logger.info("Summary of final output:\n%s\n",
"\n".join(repr(all_stats.loc[:,columns].describe())
for columns in batched(all_stats.columns, 3)))
logger.info("Writing output to %s", par["output"])
all_stats.reset_index("WellBC").to_csv(par["output"], sep="\t", header=True, index=False)
logger.info("Finished %s.", meta["name"])
if __name__ == "__main__":
main(par)

View File

@@ -0,0 +1,115 @@
import pytest
import sys
import re
import pandas as pd
from pathlib import Path
from uuid import uuid4
from subprocess import CalledProcessError
### VIASH START
meta = {
"resources_dir": "./src/stats/combine_star_logs/",
"executable": "target/executable/stats/combine_star_logs/combine_star_logs",
"config": "src/stats/combine_star_logs/config.vsh.yaml"
}
### VIASH END
@pytest.fixture
def test_resources_path():
return Path(meta["resources_dir"]) / "test_data"
@pytest.fixture
def barcode_1_star_log(test_resources_path):
return test_resources_path / "barcode_1" / "Log.final.out"
@pytest.fixture
def barcode_1_reads_per_gene_file(test_resources_path):
return test_resources_path / "barcode_1" / "ReadsPerGene.out.tab"
@pytest.fixture
def barcode_1_summary(test_resources_path):
return test_resources_path / "barcode_1" / "summary.csv"
@pytest.fixture
def barcode_2_star_log(test_resources_path):
return test_resources_path / "barcode_2" / "Log.final.out"
@pytest.fixture
def barcode_2_reads_per_gene_file(test_resources_path):
return test_resources_path / "barcode_2" / "ReadsPerGene.out.tab"
@pytest.fixture
def barcode_2_summary(test_resources_path):
return test_resources_path / "barcode_2" / "summary.csv"
@pytest.fixture
def random_path(tmp_path):
def wrapper(extension=None):
extension = "" if not extension else f".{extension}"
return tmp_path / f"{uuid4()}{extension}"
return wrapper
def test_incorrect_number_of_inputs_raises(run_component,
barcode_1_star_log, barcode_2_star_log,
barcode_1_reads_per_gene_file, barcode_2_reads_per_gene_file,
barcode_1_summary, barcode_2_summary,
random_path):
output_path = random_path("txt")
with pytest.raises(CalledProcessError) as err:
run_component([
"--barcodes", "foo;bar",
"--star_logs", f"{barcode_1_star_log}",
"--reads_per_gene_logs", f"{barcode_1_reads_per_gene_file};{barcode_2_reads_per_gene_file}",
"--gene_summary_logs", f"{barcode_1_summary};{barcode_2_summary}",
"--output", output_path,
])
assert re.search(r"ValueError: Expected the same number of inputs for 'star_logs' \(1\), "
r"'gene_summary_logs' \(2\), 'reads_per_gene_logs' \(2\) and 'barcodes' \(2\)\.",
err.value.stdout.decode('utf-8'))
def test_equal_number_of_argument(run_component,
barcode_1_star_log, barcode_2_star_log,
barcode_1_reads_per_gene_file, barcode_2_reads_per_gene_file,
barcode_1_summary, barcode_2_summary,
random_path):
output_path = random_path("txt")
run_component([
"--barcodes", "foo;bar",
"--star_logs", f"{barcode_1_star_log};{barcode_2_star_log}",
"--reads_per_gene_logs", f"{barcode_1_reads_per_gene_file};{barcode_2_reads_per_gene_file}",
"--gene_summary_logs", f"{barcode_1_summary};{barcode_2_summary}",
"--output", output_path,
])
expected_dict = {
'NumberOfInputReads': [96398.0, 10155.0],
'NumberOfMappedReads': [70824.0, 7179.0],
'pctMappedReads': [73.47, 70.69],
'NumberOfReadsMappedToMultipleLoci': [0.0, 0.0],
'PectOfReadsMappedToMultipleLoci': [0.0, 0.0],
'NumberOfReadsMappedToTooManyLoci': [22281.0, 2248.0],
'PectOfReadsMappedToTooManyLoci': [23.11, 22.14],
'NumberOfReadsUnmappedTooManyMismatches': [0.0, 0.0],
'PectOfReadsUnmappedTooManyMismatches': [0.0, 0.0],
'NumberOfReadsUnmappedTooShort': [2697.0, 553.0],
'PectOfReadsUnmappedTooShort': [2.8, 5.45],
'NumberOfReadsUnmappedOther': [596.0, 175.0],
'PectOfReadsUnmappedOther': [0.62, 1.72],
'ReadsWithValidBarcodes': [0.999782, 0.999803],
'SequencingSaturation': [0.0602963, 0.0539344],
'EstimatedNumberOfCells': [1.0, 1.0],
'NumberOfUMIs': [50370.0, 4701.0],
'NumberOfGenes': [8767.0, 2397.0],
'Unstranded': [17, 15],
}
expected = pd.DataFrame.from_dict(expected_dict)
expected.index = pd.Index(["foo", "bar"], name="WellBC")
assert output_path.is_file()
contents = pd.read_csv(output_path, sep="\t", index_col=0)
pd.testing.assert_frame_equal(contents, expected)
if __name__ == '__main__':
sys.exit(pytest.main([__file__]))

View File

@@ -0,0 +1,37 @@
Started job on | Jun 26 09:38:11
Started mapping on | Jun 26 09:38:14
Finished on | Jun 26 09:38:23
Mapping speed, Million of reads per hour | 38.56
Number of input reads | 96398
Average input read length | 57
UNIQUE READS:
Uniquely mapped reads number | 70824
Uniquely mapped reads % | 73.47%
Average mapped length | 56.93
Number of splices: Total | 6432
Number of splices: Annotated (sjdb) | 6285
Number of splices: GT/AG | 6331
Number of splices: GC/AG | 33
Number of splices: AT/AC | 2
Number of splices: Non-canonical | 66
Mismatch rate per base, % | 0.61%
Deletion rate per base | 0.01%
Deletion average length | 1.38
Insertion rate per base | 0.00%
Insertion average length | 1.24
MULTI-MAPPING READS:
Number of reads mapped to multiple loci | 0
% of reads mapped to multiple loci | 0.00%
Number of reads mapped to too many loci | 22281
% of reads mapped to too many loci | 23.11%
UNMAPPED READS:
Number of reads unmapped: too many mismatches | 0
% of reads unmapped: too many mismatches | 0.00%
Number of reads unmapped: too short | 2697
% of reads unmapped: too short | 2.80%
Number of reads unmapped: other | 596
% of reads unmapped: other | 0.62%
CHIMERIC READS:
Number of chimeric reads | 0
% of chimeric reads | 0.00%

View File

@@ -0,0 +1,8 @@
N_unmapped 11111 22222 33333
N_multimapping 0 0 0
N_noFeature 44444 55555 66666
N_ambiguous 77777 88888 99999
gene1 2 0 0
gene2 0 0 0
gene3 6 0 6
gene5 9 6 3

View File

@@ -0,0 +1,20 @@
Number of Reads,96398
Reads With Valid Barcodes,0.999782
Sequencing Saturation,0.0602963
Q30 Bases in CB+UMI,0.980096
Q30 Bases in RNA read,0.799904
Reads Mapped to Genome: Unique+Multiple,0.734704
Reads Mapped to Genome: Unique,0.734704
Reads Mapped to Transcriptome: Unique+Multipe Genes,0.60411
Reads Mapped to Transcriptome: Unique Genes,0.556049
Estimated Number of Cells,1
Reads in Cells Mapped to Unique Genes,53602
Fraction of Reads in Cells,1
Mean Reads per Cell,53602
Median Reads per Cell,53602
UMIs in Cells,50370
Mean UMI per Cell,50370
Median UMI per Cell,50370
Mean Genes per Cell,8767
Median Genes per Cell,8767
Total Genes Detected,8767
1 Number of Reads 96398
2 Reads With Valid Barcodes 0.999782
3 Sequencing Saturation 0.0602963
4 Q30 Bases in CB+UMI 0.980096
5 Q30 Bases in RNA read 0.799904
6 Reads Mapped to Genome: Unique+Multiple 0.734704
7 Reads Mapped to Genome: Unique 0.734704
8 Reads Mapped to Transcriptome: Unique+Multipe Genes 0.60411
9 Reads Mapped to Transcriptome: Unique Genes 0.556049
10 Estimated Number of Cells 1
11 Reads in Cells Mapped to Unique Genes 53602
12 Fraction of Reads in Cells 1
13 Mean Reads per Cell 53602
14 Median Reads per Cell 53602
15 UMIs in Cells 50370
16 Mean UMI per Cell 50370
17 Median UMI per Cell 50370
18 Mean Genes per Cell 8767
19 Median Genes per Cell 8767
20 Total Genes Detected 8767

View File

@@ -0,0 +1,37 @@
Started job on | Jun 26 09:38:56
Started mapping on | Jun 26 09:39:00
Finished on | Jun 26 09:39:02
Mapping speed, Million of reads per hour | 18.28
Number of input reads | 10155
Average input read length | 57
UNIQUE READS:
Uniquely mapped reads number | 7179
Uniquely mapped reads % | 70.69%
Average mapped length | 56.36
Number of splices: Total | 526
Number of splices: Annotated (sjdb) | 495
Number of splices: GT/AG | 502
Number of splices: GC/AG | 4
Number of splices: AT/AC | 1
Number of splices: Non-canonical | 19
Mismatch rate per base, % | 0.85%
Deletion rate per base | 0.00%
Deletion average length | 1.09
Insertion rate per base | 0.00%
Insertion average length | 1.07
MULTI-MAPPING READS:
Number of reads mapped to multiple loci | 0
% of reads mapped to multiple loci | 0.00%
Number of reads mapped to too many loci | 2248
% of reads mapped to too many loci | 22.14%
UNMAPPED READS:
Number of reads unmapped: too many mismatches | 0
% of reads unmapped: too many mismatches | 0.00%
Number of reads unmapped: too short | 553
% of reads unmapped: too short | 5.45%
Number of reads unmapped: other | 175
% of reads unmapped: other | 1.72%
CHIMERIC READS:
Number of chimeric reads | 0
% of chimeric reads | 0.00%

View File

@@ -0,0 +1,8 @@
N_unmapped 101010 202020 303030
N_multimapping 0 0 0
N_noFeature 404040 505050 606060
N_ambiguous 707070 808080 909090
gene1 0 0 0
gene2 0 0 0
gene6 5 5 0
gene4 10 2 8

View File

@@ -0,0 +1,20 @@
Number of Reads,10155
Reads With Valid Barcodes,0.999803
Sequencing Saturation,0.0539344
Q30 Bases in CB+UMI,0.984461
Q30 Bases in RNA read,0.786064
Reads Mapped to Genome: Unique+Multiple,0.706942
Reads Mapped to Genome: Unique,0.706942
Reads Mapped to Transcriptome: Unique+Multipe Genes,0.530871
Reads Mapped to Transcriptome: Unique Genes,0.489316
Estimated Number of Cells,1
Reads in Cells Mapped to Unique Genes,4969
Fraction of Reads in Cells,1
Mean Reads per Cell,4969
Median Reads per Cell,4969
UMIs in Cells,4701
Mean UMI per Cell,4701
Median UMI per Cell,4701
Mean Genes per Cell,2397
Median Genes per Cell,2397
Total Genes Detected,2397
1 Number of Reads 10155
2 Reads With Valid Barcodes 0.999803
3 Sequencing Saturation 0.0539344
4 Q30 Bases in CB+UMI 0.984461
5 Q30 Bases in RNA read 0.786064
6 Reads Mapped to Genome: Unique+Multiple 0.706942
7 Reads Mapped to Genome: Unique 0.706942
8 Reads Mapped to Transcriptome: Unique+Multipe Genes 0.530871
9 Reads Mapped to Transcriptome: Unique Genes 0.489316
10 Estimated Number of Cells 1
11 Reads in Cells Mapped to Unique Genes 4969
12 Fraction of Reads in Cells 1
13 Mean Reads per Cell 4969
14 Median Reads per Cell 4969
15 UMIs in Cells 4701
16 Mean UMI per Cell 4701
17 Median UMI per Cell 4701
18 Mean Genes per Cell 2397
19 Median Genes per Cell 2397
20 Total Genes Detected 2397

View File

@@ -236,8 +236,8 @@ build_info:
output: "target/executable/parallel_map"
executable: "target/executable/parallel_map/parallel_map"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -516,9 +516,9 @@ RUN wget -O $STAR_TARGET $STAR_SOURCE && \
rm $STAR_TARGET && rm -rf /tmp/STAR_$STAR_VERSION
LABEL org.opencontainers.image.description="Companion container for running component parallel_map"
LABEL org.opencontainers.image.created="2024-08-29T12:31:01Z"
LABEL org.opencontainers.image.created="2024-09-17T08:52:47Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="b98f6367d672368af134843711a46d3b53717187"
LABEL org.opencontainers.image.revision="cf9797232db1306bfd5696287928cababe317d99"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -0,0 +1,214 @@
name: "combine_star_logs"
namespace: "stats"
version: "main"
argument_groups:
- name: "Arguments"
arguments:
- type: "string"
name: "--barcodes"
description: "Barcodes responding to the respective log files.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--star_logs"
description: "Paths to the STAR log files (most frequently called Log.final.out)\n"
info: null
example:
- "Log.final.out"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--gene_summary_logs"
description: "Paths to the Summary.csv files from the STAR Solo output. Can be\
\ found in\nthe 'Solo.out/Gene' folder relative to the root of the STAR output\
\ directory. \n"
info: null
example:
- "Summary.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--reads_per_gene_logs"
description: "Paths to the 'ReadsPerGene.out.tab' files as output by STAR.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output"
description: "Tab-delimited file describing for each barcode (as the rows), the\
\ metrics (as columns)\ngathered from the different input files. \n"
info: null
default:
- "starLogs.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/combine_star_logs/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/stats/combine_star_logs"
executable: "target/executable/stats/combine_star_logs/combine_star_logs"
viash_version: "0.9.0-RC7"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"
description: "High-throughput pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,43 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// Resource labels
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -155,8 +155,8 @@ build_info:
output: "target/executable/stats/generate_pool_statistics"
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -478,9 +478,9 @@ RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "pandas"
LABEL org.opencontainers.image.description="Companion container for running component stats generate_pool_statistics"
LABEL org.opencontainers.image.created="2024-08-29T12:31:02Z"
LABEL org.opencontainers.image.created="2024-09-17T08:52:49Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="b98f6367d672368af134843711a46d3b53717187"
LABEL org.opencontainers.image.revision="cf9797232db1306bfd5696287928cababe317d99"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -226,8 +226,8 @@ build_info:
output: "target/executable/stats/generate_well_statistics"
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -511,9 +511,9 @@ RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "pysam" "pandas"
LABEL org.opencontainers.image.description="Companion container for running component stats generate_well_statistics"
LABEL org.opencontainers.image.created="2024-08-29T12:31:02Z"
LABEL org.opencontainers.image.created="2024-09-17T08:52:48Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="b98f6367d672368af134843711a46d3b53717187"
LABEL org.opencontainers.image.revision="cf9797232db1306bfd5696287928cababe317d99"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -236,8 +236,8 @@ build_info:
output: "target/nextflow/parallel_map"
executable: "target/nextflow/parallel_map/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -3087,8 +3087,8 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/parallel_map",
"viash_version" : "0.9.0-RC7",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
},
"package_config" : {
"name" : "htrnaseq",

View File

@@ -0,0 +1,214 @@
name: "combine_star_logs"
namespace: "stats"
version: "main"
argument_groups:
- name: "Arguments"
arguments:
- type: "string"
name: "--barcodes"
description: "Barcodes responding to the respective log files.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--star_logs"
description: "Paths to the STAR log files (most frequently called Log.final.out)\n"
info: null
example:
- "Log.final.out"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--gene_summary_logs"
description: "Paths to the Summary.csv files from the STAR Solo output. Can be\
\ found in\nthe 'Solo.out/Gene' folder relative to the root of the STAR output\
\ directory. \n"
info: null
example:
- "Summary.txt"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--reads_per_gene_logs"
description: "Paths to the 'ReadsPerGene.out.tab' files as output by STAR.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output"
description: "Tab-delimited file describing for each barcode (as the rows), the\
\ metrics (as columns)\ngathered from the different input files. \n"
info: null
default:
- "starLogs.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "test_data"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/combine_star_logs/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/stats/combine_star_logs"
executable: "target/nextflow/stats/combine_star_logs/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"
description: "High-throughput pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,124 @@
manifest {
name = 'stats/combine_star_logs'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}
includeConfig("nextflow_labels.config")

View File

@@ -0,0 +1,43 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// Resource labels
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,111 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "combine_star_logs",
"description": "No description",
"type": "object",
"definitions": {
"arguments" : {
"title": "Arguments",
"type": "object",
"description": "No description",
"properties": {
"barcodes": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\";\"`. Barcodes responding to the respective log files",
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. Barcodes responding to the respective log files.\n"
}
,
"star_logs": {
"type":
"string",
"description": "Type: List of `file`, example: `Log.final.out`, multiple_sep: `\";\"`. Paths to the STAR log files (most frequently called Log",
"help_text": "Type: List of `file`, example: `Log.final.out`, multiple_sep: `\";\"`. Paths to the STAR log files (most frequently called Log.final.out)\n"
}
,
"gene_summary_logs": {
"type":
"string",
"description": "Type: List of `file`, example: `Summary.txt`, multiple_sep: `\";\"`. Paths to the Summary",
"help_text": "Type: List of `file`, example: `Summary.txt`, multiple_sep: `\";\"`. Paths to the Summary.csv files from the STAR Solo output. Can be found in\nthe \u0027Solo.out/Gene\u0027 folder relative to the root of the STAR output directory. \n"
}
,
"reads_per_gene_logs": {
"type":
"string",
"description": "Type: List of `file`, multiple_sep: `\";\"`. Paths to the \u0027ReadsPerGene",
"help_text": "Type: List of `file`, multiple_sep: `\";\"`. Paths to the \u0027ReadsPerGene.out.tab\u0027 files as output by STAR.\n"
}
,
"output": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.output.txt`. Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)\ngathered from the different input files",
"help_text": "Type: `file`, default: `$id.$key.output.txt`. Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)\ngathered from the different input files. \n"
,
"default": "$id.$key.output.txt"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -155,8 +155,8 @@ build_info:
output: "target/nextflow/stats/generate_pool_statistics"
executable: "target/nextflow/stats/generate_pool_statistics/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -2990,8 +2990,8 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/stats/generate_pool_statistics",
"viash_version" : "0.9.0-RC7",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
},
"package_config" : {
"name" : "htrnaseq",

View File

@@ -226,8 +226,8 @@ build_info:
output: "target/nextflow/stats/generate_well_statistics"
executable: "target/nextflow/stats/generate_well_statistics/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -3072,8 +3072,8 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/stats/generate_well_statistics",
"viash_version" : "0.9.0-RC7",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
},
"package_config" : {
"name" : "htrnaseq",

View File

@@ -215,8 +215,8 @@ build_info:
output: "target/nextflow/workflows/htrnaseq"
executable: "target/nextflow/workflows/htrnaseq/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
dependencies:
- "target/nextflow/stats/generate_pool_statistics"
- "target/nextflow/stats/generate_well_statistics"

View File

@@ -3079,8 +3079,8 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/workflows/htrnaseq",
"viash_version" : "0.9.0-RC7",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
},
"package_config" : {
"name" : "htrnaseq",

View File

@@ -161,8 +161,8 @@ build_info:
output: "target/nextflow/workflows/parallel_map_wf"
executable: "target/nextflow/workflows/parallel_map_wf/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
dependencies:
- "target/nextflow/parallel_map"
- "target/nextflow/workflows/utils/groupWells"

View File

@@ -2996,8 +2996,8 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/workflows/parallel_map_wf",
"viash_version" : "0.9.0-RC7",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
},
"package_config" : {
"name" : "htrnaseq",

View File

@@ -171,8 +171,8 @@ build_info:
output: "target/nextflow/workflows/utils/groupWells"
executable: "target/nextflow/workflows/utils/groupWells/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -3007,8 +3007,8 @@ meta = [
"engine" : "native",
"output" : "target/nextflow/workflows/utils/groupWells",
"viash_version" : "0.9.0-RC7",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
},
"package_config" : {
"name" : "htrnaseq",

View File

@@ -197,8 +197,8 @@ build_info:
output: "target/nextflow/workflows/well_demultiplex"
executable: "target/nextflow/workflows/well_demultiplex/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
git_commit: "cf9797232db1306bfd5696287928cababe317d99"
git_remote: "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
dependencies:
- "target/dependencies/vsh/vsh/biobox/v0.1.0/nextflow/cutadapt"
package_config:

View File

@@ -3044,8 +3044,8 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/workflows/well_demultiplex",
"viash_version" : "0.9.0-RC7",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
"git_commit" : "cf9797232db1306bfd5696287928cababe317d99",
"git_remote" : "https://x-access-token:ghs_KjB7pWu8DQM3iFulLu7RI06qnt5K8S1A0eaE@github.com/viash-hub/htrnaseq"
},
"package_config" : {
"name" : "htrnaseq",