Build branch main with version main (b98f636)

Build pipeline: viash-hub.htrnaseq.main-48gzk

Source commit: b98f6367d6

Source message: Add BAM statistics calculations on pool and well level (#6)
This commit is contained in:
CI
2024-08-29 12:41:46 +00:00
parent 044a3af7a9
commit bde35f120c
43 changed files with 12072 additions and 43 deletions

6
.gitignore vendored
View File

@@ -4,4 +4,8 @@ testData
# Nextflow related files
.nextflow
.nextflow.log*
work
work
# Python related files
*__pycache__*
.venv

View File

@@ -1,7 +1,7 @@
set -eo pipefail
## VIASH START
meta_executable="target/executable/parallel_map/parallel_map"
meta_executable=$(realpath "target/executable/parallel_map/parallel_map")
## VIASH END
# Some helper functions

View File

@@ -0,0 +1,51 @@
name: generate_pool_statistics
namespace: "stats"
argument_groups:
- name: "Arguments"
arguments:
- name: "--nrReadsNrGenesPerChrom"
type: file
multiple: true
description: |
Path to an output file that contains a .tsv formatted table describing
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads
column) and the number of genes on that chromosome that had at least one
read mapped to it (NumberOfGenes).
direction: input
default: [processedBamFile_well1.tsv, processedBamfile_well2.tsv]
- name: "--nrReadsNrGenesPerChromPool"
direction: output
type: file
multiple: false
description: |
Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files. Describes
per chromosome (as columns) the number of reads, as well as the total number
of reads per cell barcode and the percentage of nuclear, ERCC and mitochondrial
reads.
example: "nrReadsNrGenesPerChrom.txt"
resources:
- type: python_script
path: script.py
test_resources:
- type: python_script
path: test.py
engines:
- type: docker
image: python:3.11-slim
setup:
- type: apt
packages:
- procps
- type: python
packages:
- pandas
test_setup:
- type: python
packages:
- viashpy
runners:
- type: executable
- type: nextflow

View File

@@ -0,0 +1,75 @@
import pandas as pd
import re
### VIASH START
par = {
"nrReadsNrGenesPerChrom": ["test/nrReadsNrGenesPerChrom_2.txt", "test/nrReadsNrGenesPerChrom.txt"],
"nrReadsNrGenesPerChromPool": "nrReadsNrGenesPerChrom_pool.txt"
}
### VIASH END
if __name__ == "__main__":
#########
# nrReadsNrGenesPerChrom file
#########
nr_reads_nr_genes_wells = []
for nr_reads_nr_genes_file in par["nrReadsNrGenesPerChrom"]:
nr_reads_nr_genes_wells.append(pd.read_csv(nr_reads_nr_genes_file,
header=0, delimiter="\t"))
nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True)
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr",
values=["NumberOfReads"], fill_value=0,
aggfunc="sum").droplevel(0, axis=1)
total_nr_reads_per_chromosome.columns.name = None
##### Total number of genes from all chromosomes
total_nr_genes = nr_reads_nr_genes_pool.loc[:,['WellBC', 'NumberOfGenes']].groupby("WellBC").sum()
##### Total counts across (irrespective of chromosome)
total_sum_of_reads = total_nr_reads_per_chromosome.sum(numeric_only=True, axis=1)
##### Logic to split up chromosome per type
chromosome_names = total_nr_reads_per_chromosome.columns.to_list()
chr_regex = re.compile(r"^(chr)?\d+")
matching_chromosomes = [chr_name for chr_name
in chromosome_names
if chr_regex.match(chr_name)]
sex_chromosome_names = ["X", "Y"]
mitochondrial_chr_name = "MT"
# This is logic from the original HT pipeline,
# only when all of the matched chromosomes start with "chr", the mitochonrial, X and Y
# chromosomes should also start with 'chr'
if all(chr_name.startswith("chr") for chr_name in matching_chromosomes):
sex_chromosome_names += ["chrX", "chrY"]
mitochondrial_chr_name = "chrM"
###### Counts for mitochondrial reads
try:
mitochondrial_reads = total_nr_reads_per_chromosome.loc[:,mitochondrial_chr_name]
except KeyError:
mitochondrial_reads = 0
percentage_mitochondrial_reads = round(mitochondrial_reads / total_sum_of_reads * 100, 2)
###### Counts for ERCC reads
total_ercc_reads = total_nr_reads_per_chromosome.filter(regex=r"^ERCC").sum(axis=1)
percentage_ercc_reads = round(total_ercc_reads / total_sum_of_reads * 100, 2)
###### Counts for nuclear chromosomes
total_chromosomal_reads = total_nr_reads_per_chromosome.loc[:,matching_chromosomes].sum(axis=1)
percentage_chromosomal_reads = round(total_chromosomal_reads / total_sum_of_reads * 100, 2)
total_nr_reads_per_chromosome = total_nr_reads_per_chromosome.assign(
pctChrom=percentage_chromosomal_reads,
pctMT=percentage_mitochondrial_reads,
pctERCC=percentage_ercc_reads,
SumReads=total_sum_of_reads,
NumberOfGenes=total_nr_genes,
)
total_nr_reads_per_chromosome.reset_index(names="WellBC")\
.to_csv(par["nrReadsNrGenesPerChromPool"], sep="\t",
header=True, index=False,
columns=("WellBC",) + tuple(chromosome_names) + \
("SumReads", "pctMT", "pctERCC", "pctChrom", "NumberOfGenes"))

View File

@@ -0,0 +1,98 @@
from uuid import uuid4
from textwrap import dedent
from io import StringIO
import pandas as pd
import pytest
import sys
### VIASH START
meta = {
"resources_dir": "./src/stats/generate_pool_statistics/",
"executable": "target/executable/stats/generate_pool_statistics/generate_pool_statistics",
"config": "src/stats/generate_pool_statistics/config.vsh.yaml"
}
### VIASH END
@pytest.fixture
def random_path(tmp_path):
def wrapper(extension=None):
extension = "" if not extension else f".{extension}"
return tmp_path / f"{uuid4()}{extension}"
return wrapper
@pytest.fixture
def random_tsv_path(random_path):
def wrapper():
return random_path(".tsv")
return wrapper
@pytest.fixture
def simple_input_file_one(random_tsv_path, request):
prefix = request.param
mito_name = f"{prefix}M{'T' if not prefix else ''}"
contents = dedent(
f"""\
WellBC Chr NumberOfReads NumberOfGenes
AGG {prefix}1 2 1
AGG {prefix}2 3 2
AGG {prefix}3 4 2
AGG {mito_name} 4 2
AGG {prefix}X 2 3
AGG ERCC-1 1 1
AGG ERCC-2 1 1
""")
output_file = random_tsv_path()
with output_file.open("w") as open_file:
open_file.write(contents)
return output_file
@pytest.fixture
def simple_input_file_two(random_tsv_path, request):
prefix = request.param
contents = dedent(
f"""\
WellBC Chr NumberOfReads NumberOfGenes
CCC {prefix}2 2 1
CCC {prefix}3 3 2
CCC {prefix}5 4 2
CCC {prefix}1 4 2
CCC {prefix}Y 2 3
CCC {prefix}X 2 3
CCC ERCC-3 1 1
CCC ERCC-2 1 1
""")
output_file = random_tsv_path()
with output_file.open("w") as open_file:
open_file.write(contents)
return output_file
@pytest.mark.parametrize("simple_input_file_one,simple_input_file_two,expected", [("chr", "chr", "chr"), ("", "", "")],
indirect=["simple_input_file_one", "simple_input_file_two"])
def test_generate_pool_statistics_simple(run_component, simple_input_file_one,
simple_input_file_two, random_tsv_path, expected):
output_path = random_tsv_path()
run_component([
"--nrReadsNrGenesPerChrom", simple_input_file_one,
"--nrReadsNrGenesPerChrom", simple_input_file_two,
"--nrReadsNrGenesPerChromPool", output_path
])
mito_name = f"{expected}M{'T' if not expected else ''}"
expected_output = StringIO(dedent(
f"""\
WellBC ERCC-1 ERCC-2 ERCC-3 {expected}1 {expected}2 {expected}3 {expected}5 {mito_name} {expected}X {expected}Y SumReads pctMT pctERCC pctChrom NumberOfGenes
AGG 1 1 0 2 3 4 0 4 2 0 17 23.53 11.76 52.94 12
CCC 0 1 1 4 2 3 4 0 2 2 19 0.0 10.53 68.42 15
"""))
assert output_path.is_file()
contents = pd.read_csv(output_path, sep="\t")
expected_frame = pd.read_csv(expected_output, sep="\t")
pd.testing.assert_frame_equal(contents, expected_frame, check_like=True)
if __name__ == '__main__':
sys.exit(pytest.main([__file__]))

View File

@@ -0,0 +1,92 @@
name: generate_well_statistics
namespace: "stats"
description: Generate summary statistics from BAM files generated by STAR solo.
argument_groups:
- name: "Arguments"
arguments:
- name: "--input"
type: file
description: "The .bam file as returned by the mapping tool STAR."
direction: input
example: "input.bam"
- name: "--barcode"
type: string
description: |
The barcode for the well that is being processed. Is only used to add a metadata
column to all output files.
required: true
- name: "--processedBAMFile"
type: file
description: |
Path to a .tsv file listing, per read in the BAM file,
the value for the "CB", "UX", "GX" and "GN" tag, together with the
chromsome to which the read was mapped to.
direction: output
default: processedBamFile.txt
- name: "--nrReadsNrGenesPerChrom"
type: file
description: |
Path to an output file that contains a .tsv formatted table describing
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads
column) and the number of genes on that chromosome that had at least one
read mapped to it (NumberOfGenes).
default: nrReadsNrGenesPerChrom.txt
direction: output
- name: "--nrReadsNrUMIsPerCB"
type: file
description: |
Path to an output file that contains a .tsv formatted table describing
per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).
direction: output
default: nrReadsNrUMIsPerCB.txt
- name: "--umiFreqTop"
type: file
description: |
Path to an output file that contains a .tsv formatted table describing
per UMI (column UB) the frequency at which they occur in the reads (column
N). Only the top 100 UMIs are included.
default: umiFreqTop100.txt
direction: output
- name: "--threads"
type: integer
description: |
Number of threads to use for decompressing BAM files.
min: 1
default: 1
resources:
- type: python_script
path: script.py
test_resources:
- type: python_script
path: test.py
- path: test.sam
engines:
- type: docker
image: debian:stable-slim
setup:
- type: docker
env:
- PIP_BREAK_SYSTEM_PACKAGES=1
- HTSLIB_LIBRARY_DIR=/usr/lib/
- HTSLIB_INCLUDE_DIR=/usr/include/
- type: apt
packages:
- python3
- python3-pip
- python3-venv
- python-is-python3
- libhts-dev
- procps
- type: python
packages:
- pysam
- pandas
test_setup:
- type: python
packages:
- viashpy
runners:
- type: executable
- type: nextflow

View File

@@ -0,0 +1,77 @@
import pysam
import pandas as pd
import logging
### VIASH START
par = {
"input": "src/stats/generate_well_statistics/test.sam",
"processedBAMFile": "processedBamFile.txt",
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom.txt",
"nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB.txt",
"umiFreqTop": "umiFreqTop.txt",
"threads": 1,
"barcode": "ACGT"
}
### VIASH END
logger = logging.getLogger()
console_handler = logging.StreamHandler()
logger.addHandler(console_handler)
logger.setLevel(logging.DEBUG)
if __name__ == "__main__":
logger.info("Component started.")
parameters_str = [f'\t{param}: {param_val}\n' for param, param_val in par.items()]
logger.info("Parameters:\n%s", "".join(parameters_str).rstrip())
logger.info("Opening '%s'", par["input"])
samfile = pysam.AlignmentFile(par["input"], "rb", threads=par["threads"])
all_tags = []
index = []
tags_selection = ("CB", "UB", "GX", "GN")
for aligned_segment in samfile:
tags = dict(aligned_segment.get_tags())
all_tags.append(tags)
reference_name = aligned_segment.reference_name
index.append("*" if not reference_name else reference_name)
tag_dataframe = pd.DataFrame.from_records(all_tags, index=index,
columns=tags_selection)
tag_dataframe_to_write = tag_dataframe.copy()
logger.info("Done reading BAM file. Found %i entries", tag_dataframe.shape[0])
tag_dataframe.assign(WellBC=par["barcode"])\
.reset_index(names="Chr")\
.to_csv(par["processedBAMFile"], sep="\t", na_rep="",
header=True, index=False,
columns=("WellBC", "Chr") + tags_selection)
logger.info("Constructing of dataframe done.")
# Number of genes that had a read mapped to them per chromosome,
# and the number of reads mapped to those genes per chromosome.
nr_reads_nr_genes = tag_dataframe.dropna(subset=["GX"]).groupby(level=0).agg(
NumberOfReads=pd.NamedAgg("GX", aggfunc="size"),
NumberOfGenes=pd.NamedAgg(column="GX", aggfunc="nunique")
)
logger.info("Done calculating number of reads per gene and per chromesome. Writing to %s",
par['nrReadsNrGenesPerChrom'])
nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"])\
.to_csv(par["nrReadsNrGenesPerChrom"], sep="\t",
header=True, index=False,
columns=("WellBC", "Chr", "NumberOfReads", "NumberOfGenes"))
# Number of reads mapped to the reference, grouped by UMI
nr_read_per_umi = tag_dataframe.groupby('UB').size()\
.drop("", errors="ignore").sort_values(ascending=False).head(100)
nr_read_per_umi_df = nr_read_per_umi.to_frame(name="N")
logger.info("Done calculating number of mapped reads per UMI, writing to %s", par["umiFreqTop"])
nr_read_per_umi_df.assign(WellBC=par["barcode"]).reset_index(names="UB")\
.to_csv(par["umiFreqTop"], header=True, sep="\t",
index=False, columns=("WellBC", "UB", "N"))
# Total number of mapped reads and total number of UMIs (not grouped per chromosome)
nr_reads_and_umi_per_barcode = tag_dataframe.groupby(by="CB").agg(
NumberOfReads=pd.NamedAgg("CB", "size"),
nrUMIs=pd.NamedAgg("UB", "nunique")
)
logger.info("Done calculating number of mapped reads and number of UMIs per Cell Barcode, writing to %s",
par["nrReadsNrUMIsPerCB"])
nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"]).reset_index(names="CB")\
.to_csv(par["nrReadsNrUMIsPerCB"], sep="\t", header=True,
index=False, columns=("WellBC", "CB", "NumberOfReads", "nrUMIs"))
logger.info("Finished!")

View File

@@ -0,0 +1,110 @@
import sys
import pytest
import pysam
from uuid import uuid4
from pathlib import Path
from textwrap import dedent
### VIASH START
meta = {
"resources_dir": "./src/stats/generate_well_statistics/",
"executable": "target/executable/stats/generate_well_statistics/generate_well_statistics",
"config": "src/stats/generate_well_statistics/config.vsh.yaml"
}
### VIASH END
def assert_file_content_equals(file_to_check, expected):
with file_to_check.open('r') as open_file:
contents = open_file.read()
assert contents == expected
@pytest.fixture
def input_sam_path():
return Path(meta["resources_dir"]) / "test.sam"
@pytest.fixture
def random_path(tmp_path):
def wrapper(extension=None):
extension = "" if not extension else f".{extension}"
return tmp_path / f"{uuid4()}{extension}"
return wrapper
@pytest.fixture
def random_bam_path(random_path):
def wrapper():
return random_path(".bam")
return wrapper
@pytest.fixture
def sam_to_bam(random_bam_path):
def wrapper(sam_file):
out_path = random_bam_path()
with pysam.AlignmentFile(sam_file, "r") as infile, \
pysam.AlignmentFile(out_path, "wb", template=infile) as outfile:
for s in infile:
outfile.write(s)
infile.close()
return out_path
return wrapper
def test_generate_well_statistics_simple_bam(run_component, input_sam_path, sam_to_bam, random_path):
bam_file = sam_to_bam(input_sam_path)
processed_bam = random_path("tsv")
reads_per_chromosome = random_path("tsv")
nr_reads_nr_umis_per_cb = random_path("tsv")
top_onehundred_umis = random_path("tsv")
run_component([
"--input", bam_file,
"--processedBAMFile", processed_bam,
"--nrReadsNrGenesPerChrom", reads_per_chromosome,
"--nrReadsNrUMIsPerCB", nr_reads_nr_umis_per_cb,
"--umiFreqTop", top_onehundred_umis,
"--barcode", "ACGT"
])
for file_path in (processed_bam, reads_per_chromosome,
nr_reads_nr_umis_per_cb, top_onehundred_umis):
assert file_path.is_file()
expected_processed_bam = \
dedent("""\
WellBC Chr CB UB GX GN
ACGT 1 ACA CGG gene1 gene1
ACGT 1 ACA CGG gene1 gene1
ACGT 2 GGG GTT gene2 gene2
ACGT 2 GGG GTC gene3 gene3
""")
expected_reads_per_chromosome = \
dedent("""\
WellBC Chr NumberOfReads NumberOfGenes
ACGT 1 2 1
ACGT 2 2 2
""")
expected_nr_reads_nr_umis_per_cb = \
dedent("""\
WellBC CB NumberOfReads nrUMIs
ACGT ACA 2 1
ACGT GGG 2 2
""")
expected_top_onehundred_umis = \
dedent("""\
WellBC UB N
ACGT CGG 2
ACGT GTC 1
ACGT GTT 1
""")
assert_file_content_equals(processed_bam, expected_processed_bam)
assert_file_content_equals(reads_per_chromosome, expected_reads_per_chromosome)
assert_file_content_equals(nr_reads_nr_umis_per_cb, expected_nr_reads_nr_umis_per_cb)
assert_file_content_equals(top_onehundred_umis, expected_top_onehundred_umis)
if __name__ == '__main__':
sys.exit(pytest.main([__file__]))

View File

@@ -0,0 +1,7 @@
@HD VN:1.4 SO:coordinate
@SQ SN:1 LN:200
@SQ SN:2 LN:50
test_1 16 1 22 255 1M * 0 0 C I NH:i:1 HI:i:1 nM:i:0 AS:i:47 CR:Z:ACA UR:Z:CGG GX:Z:gene1 GN:Z:gene1 CB:Z:ACA UB:Z:CGG
test_2 16 1 22 255 1M * 0 0 G ! NH:i:1 HI:i:1 nM:i:0 AS:i:47 CR:Z:ACA UR:Z:CGG GX:Z:gene1 GN:Z:gene1 CB:Z:ACA UB:Z:CGG
test_3 0 2 40 255 1M * 0 0 T ! NH:i:1 HI:i:1 nM:i:0 AS:i:47 CR:Z:GGG UR:Z:GTT GX:Z:gene2 GN:Z:gene2 CB:Z:GGG UB:Z:GTT
test_4 0 2 60 255 1M * 0 0 C ! NH:i:1 HI:i:1 nM:i:0 AS:i:47 CR:Z:GGG UR:Z:GTC GX:Z:gene3 GN:Z:gene3 CB:Z:GGG UB:Z:GTC

View File

@@ -40,6 +40,11 @@ argument_groups:
multiple: true
required: true
default: $id/star/*
- name: "--nrReadsNrGenesPerChrom"
type: file
direction: output
required: true
default: "nrReadsNrGenesPerChrom.txt"
resources:
- type: nextflow_script
path: main.nf
@@ -51,6 +56,10 @@ resources:
# entrypoint: test_wf
dependencies:
- name: stats/generate_pool_statistics
repository: local
- name: stats/generate_well_statistics
repository: local
- name: workflows/well_demultiplex
repository: local
- name: workflows/parallel_map_wf

View File

@@ -68,12 +68,47 @@ workflow run_wf {
state + ["star_output": result.output]
},
)
| generate_well_statistics.run(
fromState: { id, state ->
[
"input": state.star_output.resolve('Aligned.sortedByCoord.out.bam'),
"barcode": state.barcode,
]
},
toState: [
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
"nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB",
]
)
| map {id, state ->
[state.pool, id, state]
}
| groupTuple(by: 0, sort: "hash")
| map {id, well_ids, states ->
def collected_state = [
"fastq_output_r1": states.collect{it.fastq_output_r1[0]},
"fastq_output_r2": states.collect{it.fastq_output_r2[0]},
"nrReadsNrGenesPerChrom": states.collect{it.nrReadsNrGenesPerChrom},
]
def newState = states[0] + collected_state
[id, newState]
}
| generate_pool_statistics.run(
fromState: [
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
],
toState: {id, result, state ->
state + ["nrReadsNrGenesPerChrom": result.nrReadsNrGenesPerChromPool]
}
)
| niceView()
| setState(["star_output", "fastq_output_r1", "fastq_output_r2", "star_output"])
//| niceView()
//
//| setState( [ "output": "out" ] )
| setState([
"star_output",
"fastq_output_r1",
"fastq_output_r2",
"star_output",
"nrReadsNrGenesPerChrom",
])
emit:
output_ch

View File

@@ -29,7 +29,6 @@ argument_groups:
- name: "--output"
type: file
direction: output
multiple: true
required: true
resources:
- type: nextflow_script

View File

@@ -3,8 +3,7 @@ workflow run_wf {
input_ch
main:
output_ch = input_ch
| map {id, state -> [id, state + ["orig_id": id]]}
pool_ch = input_ch
| groupWells.run(
fromState: { id, state ->
[
@@ -19,7 +18,6 @@ workflow run_wf {
"wells": result.wells,
"input_r1": result.output_r1,
"input_r2": result.output_r2,
"_meta": ["join_id": state.orig_id]
]
}
)
@@ -33,7 +31,7 @@ workflow run_wf {
"pool": state.pool,
"wellBarcodesLength": 10,
"umiLength": 10,
"output": state.output[0],
"output": state.output,
]
},
toState: { id, result, state ->
@@ -43,8 +41,33 @@ workflow run_wf {
},
directives: [label: ["midmem", "midcpu"]]
)
| setState(["output", "_meta"])
| setState(["output"])
input_join_ch = input_ch
| map {id, state ->
[state.pool, id, state]
}
output_ch = input_join_ch.combine(pool_ch, by: 0)
| map {pool, well_id, state_well, state_pool ->
well_output = state_pool.output.findAll{star_output_dir ->
def barcodes_list = []
def barcode_file_regex = ~/.*\/raw\/barcodes\.tsv$/
star_output_dir.eachFileRecurse{barcode_file ->
if (barcode_file =~ barcode_file_regex) {
assert barcode_file.countLines() == 1, "Expected only one barcode in a single STAR output."
barcodes_list.add(barcode_file.text.trim())
}
}
assert barcodes_list.size() == 1, "Exactly one file should have matched the barcodes file regex (found: $barcodes_list)."
def barcode
barcodes_list.each{ it -> barcode = it }
return barcode == state_well.barcode
}
assert well_output.size() == 1, "Two or more outputs from the mapping seemed to have processed barcode '$barcode'."
[well_id, ["output": well_output[0]]]
}
emit:
output_ch
}

View File

@@ -236,7 +236,7 @@ build_info:
output: "target/executable/parallel_map"
executable: "target/executable/parallel_map/parallel_map"
viash_version: "0.9.0-RC7"
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -516,9 +516,9 @@ RUN wget -O $STAR_TARGET $STAR_SOURCE && \
rm $STAR_TARGET && rm -rf /tmp/STAR_$STAR_VERSION
LABEL org.opencontainers.image.description="Companion container for running component parallel_map"
LABEL org.opencontainers.image.created="2024-08-29T07:58:04Z"
LABEL org.opencontainers.image.created="2024-08-29T12:31:01Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="21831c2104098ecce57aa9b372e49f865296cc48"
LABEL org.opencontainers.image.revision="b98f6367d672368af134843711a46d3b53717187"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -0,0 +1,186 @@
name: "generate_pool_statistics"
namespace: "stats"
version: "main"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--nrReadsNrGenesPerChrom"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
column) and the number of genes on that chromosome that had at least one\nread\
\ mapped to it (NumberOfGenes).\n"
info: null
default:
- "processedBamFile_well1.tsv"
- "processedBamfile_well2.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChromPool"
description: "Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom\
\ files. Describes\nper chromosome (as columns) the number of reads, as well\
\ as the total number \nof reads per cell barcode and the percentage of nuclear,\
\ ERCC and mitochondrial\nreads.\n"
info: null
example:
- "nrReadsNrGenesPerChrom.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.11-slim"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/generate_pool_statistics/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/stats/generate_pool_statistics"
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"
description: "High-throughput pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,43 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// Resource labels
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,257 @@
name: "generate_well_statistics"
namespace: "stats"
version: "main"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--input"
description: "The .bam file as returned by the mapping tool STAR."
info: null
example:
- "input.bam"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcode"
description: "The barcode for the well that is being processed. Is only used to\
\ add a metadata\ncolumn to all output files.\n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--processedBAMFile"
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
\ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
\ to which the read was mapped to.\n"
info: null
default:
- "processedBamFile.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChrom"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
column) and the number of genes on that chromosome that had at least one\nread\
\ mapped to it (NumberOfGenes).\n"
info: null
default:
- "nrReadsNrGenesPerChrom.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrUMIsPerCB"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
info: null
default:
- "nrReadsNrUMIsPerCB.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--umiFreqTop"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per UMI (column UB) the frequency at which they occur in the reads (column\n\
N). Only the top 100 UMIs are included.\n"
info: null
default:
- "umiFreqTop100.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--threads"
description: "Number of threads to use for decompressing BAM files.\n"
info: null
default:
- 1
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Generate summary statistics from BAM files generated by STAR solo."
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "test.sam"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
env:
- "PIP_BREAK_SYSTEM_PACKAGES=1"
- "HTSLIB_LIBRARY_DIR=/usr/lib/"
- "HTSLIB_INCLUDE_DIR=/usr/include/"
- type: "apt"
packages:
- "python3"
- "python3-pip"
- "python3-venv"
- "python-is-python3"
- "libhts-dev"
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pysam"
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/generate_well_statistics/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/stats/generate_well_statistics"
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"
description: "High-throughput pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,43 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// Resource labels
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -236,7 +236,7 @@ build_info:
output: "target/nextflow/parallel_map"
executable: "target/nextflow/parallel_map/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -3087,7 +3087,7 @@ meta = [
"engine" : "docker|native",
"output" : "target/nextflow/parallel_map",
"viash_version" : "0.9.0-RC7",
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {

View File

@@ -0,0 +1,186 @@
name: "generate_pool_statistics"
namespace: "stats"
version: "main"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--nrReadsNrGenesPerChrom"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
column) and the number of genes on that chromosome that had at least one\nread\
\ mapped to it (NumberOfGenes).\n"
info: null
default:
- "processedBamFile_well1.tsv"
- "processedBamfile_well2.tsv"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChromPool"
description: "Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom\
\ files. Describes\nper chromosome (as columns) the number of reads, as well\
\ as the total number \nof reads per cell barcode and the percentage of nuclear,\
\ ERCC and mitochondrial\nreads.\n"
info: null
example:
- "nrReadsNrGenesPerChrom.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.11-slim"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/generate_pool_statistics/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/stats/generate_pool_statistics"
executable: "target/nextflow/stats/generate_pool_statistics/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"
description: "High-throughput pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,124 @@
manifest {
name = 'stats/generate_pool_statistics'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}
includeConfig("nextflow_labels.config")

View File

@@ -0,0 +1,43 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// Resource labels
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,82 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "generate_pool_statistics",
"description": "No description",
"type": "object",
"definitions": {
"arguments" : {
"title": "Arguments",
"type": "object",
"description": "No description",
"properties": {
"nrReadsNrGenesPerChrom": {
"type":
"string",
"description": "Type: List of `file`, default: `processedBamFile_well1.tsv;processedBamfile_well2.tsv`, multiple_sep: `\";\"`. Path to an output file that contains a ",
"help_text": "Type: List of `file`, default: `processedBamFile_well1.tsv;processedBamfile_well2.tsv`, multiple_sep: `\";\"`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n"
,
"default": "processedBamFile_well1.tsv;processedBamfile_well2.tsv"
}
,
"nrReadsNrGenesPerChromPool": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChromPool.txt`, example: `nrReadsNrGenesPerChrom.txt`. Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files",
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChromPool.txt`, example: `nrReadsNrGenesPerChrom.txt`. Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files. Describes\nper chromosome (as columns) the number of reads, as well as the total number \nof reads per cell barcode and the percentage of nuclear, ERCC and mitochondrial\nreads.\n"
,
"default": "$id.$key.nrReadsNrGenesPerChromPool.txt"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,257 @@
name: "generate_well_statistics"
namespace: "stats"
version: "main"
argument_groups:
- name: "Arguments"
arguments:
- type: "file"
name: "--input"
description: "The .bam file as returned by the mapping tool STAR."
info: null
example:
- "input.bam"
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--barcode"
description: "The barcode for the well that is being processed. Is only used to\
\ add a metadata\ncolumn to all output files.\n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--processedBAMFile"
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
\ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
\ to which the read was mapped to.\n"
info: null
default:
- "processedBamFile.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChrom"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
column) and the number of genes on that chromosome that had at least one\nread\
\ mapped to it (NumberOfGenes).\n"
info: null
default:
- "nrReadsNrGenesPerChrom.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrUMIsPerCB"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
info: null
default:
- "nrReadsNrUMIsPerCB.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--umiFreqTop"
description: "Path to an output file that contains a .tsv formatted table describing\n\
per UMI (column UB) the frequency at which they occur in the reads (column\n\
N). Only the top 100 UMIs are included.\n"
info: null
default:
- "umiFreqTop100.txt"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--threads"
description: "Number of threads to use for decompressing BAM files.\n"
info: null
default:
- 1
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Generate summary statistics from BAM files generated by STAR solo."
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "test.sam"
info: null
status: "enabled"
requirements:
commands:
- "ps"
license: "MIT"
links:
repository: "https://github.com/viash-hub/htrnaseq"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "debian:stable-slim"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "docker"
env:
- "PIP_BREAK_SYSTEM_PACKAGES=1"
- "HTSLIB_LIBRARY_DIR=/usr/lib/"
- "HTSLIB_INCLUDE_DIR=/usr/include/"
- type: "apt"
packages:
- "python3"
- "python3-pip"
- "python3-venv"
- "python-is-python3"
- "libhts-dev"
- "procps"
interactive: false
- type: "python"
user: false
packages:
- "pysam"
- "pandas"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/stats/generate_well_statistics/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/stats/generate_well_statistics"
executable: "target/nextflow/stats/generate_well_statistics/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"
description: "High-throughput pipeline [WIP]\n"
info: null
viash_version: "0.9.0-RC7"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
\ dest: 'nextflow_labels.config'}\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "sequence"
- "high-throughput"
- "mapping"
- "counting"
- "pipeline"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/htrnaseq"
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,125 @@
manifest {
name = 'stats/generate_well_statistics'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'main'
description = 'Generate summary statistics from BAM files generated by STAR solo.'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}
includeConfig("nextflow_labels.config")

View File

@@ -0,0 +1,43 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// Resource labels
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,135 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "generate_well_statistics",
"description": "Generate summary statistics from BAM files generated by STAR solo.",
"type": "object",
"definitions": {
"arguments" : {
"title": "Arguments",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type":
"string",
"description": "Type: `file`, example: `input.bam`. The ",
"help_text": "Type: `file`, example: `input.bam`. The .bam file as returned by the mapping tool STAR."
}
,
"barcode": {
"type":
"string",
"description": "Type: `string`, required. The barcode for the well that is being processed",
"help_text": "Type: `string`, required. The barcode for the well that is being processed. Is only used to add a metadata\ncolumn to all output files.\n"
}
,
"processedBAMFile": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a ",
"help_text": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a .tsv file listing, per read in the BAM file,\nthe value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome to which the read was mapped to.\n"
,
"default": "$id.$key.processedBAMFile.txt"
}
,
"nrReadsNrGenesPerChrom": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a ",
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n"
,
"default": "$id.$key.nrReadsNrGenesPerChrom.txt"
}
,
"nrReadsNrUMIsPerCB": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a ",
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a .tsv formatted table describing\nper barcode the number of UMI\u0027s (nrUMIs) and the total number of reads (NumberOfReads).\n"
,
"default": "$id.$key.nrReadsNrUMIsPerCB.txt"
}
,
"umiFreqTop": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a ",
"help_text": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a .tsv formatted table describing\nper UMI (column UB) the frequency at which they occur in the reads (column\nN). Only the top 100 UMIs are included.\n"
,
"default": "$id.$key.umiFreqTop.txt"
}
,
"threads": {
"type":
"integer",
"description": "Type: `integer`, default: `1`. Number of threads to use for decompressing BAM files",
"help_text": "Type: `integer`, default: `1`. Number of threads to use for decompressing BAM files.\n"
,
"default": "1"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}

View File

@@ -80,6 +80,17 @@ argument_groups:
direction: "output"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--nrReadsNrGenesPerChrom"
info: null
default:
- "nrReadsNrGenesPerChrom.txt"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
@@ -94,6 +105,12 @@ requirements:
commands:
- "ps"
dependencies:
- name: "stats/generate_pool_statistics"
repository:
type: "local"
- name: "stats/generate_well_statistics"
repository:
type: "local"
- name: "workflows/well_demultiplex"
repository:
type: "local"
@@ -198,9 +215,11 @@ build_info:
output: "target/nextflow/workflows/htrnaseq"
executable: "target/nextflow/workflows/htrnaseq/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/nextflow/stats/generate_pool_statistics"
- "target/nextflow/stats/generate_well_statistics"
- "target/nextflow/workflows/well_demultiplex"
- "target/nextflow/workflows/parallel_map_wf"
- "target/nextflow/workflows/utils/groupWells"

View File

@@ -2893,6 +2893,19 @@ meta = [
"direction" : "output",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--nrReadsNrGenesPerChrom",
"default" : [
"nrReadsNrGenesPerChrom.txt"
],
"must_exist" : true,
"create_parent" : true,
"required" : true,
"direction" : "output",
"multiple" : false,
"multiple_sep" : ";"
}
]
}
@@ -2917,6 +2930,18 @@ meta = [
]
},
"dependencies" : [
{
"name" : "stats/generate_pool_statistics",
"repository" : {
"type" : "local"
}
},
{
"name" : "stats/generate_well_statistics",
"repository" : {
"type" : "local"
}
},
{
"name" : "workflows/well_demultiplex",
"repository" : {
@@ -3054,7 +3079,7 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/workflows/htrnaseq",
"viash_version" : "0.9.0-RC7",
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3090,6 +3115,8 @@ meta = [
// resolve dependencies dependencies (if any)
meta["root_dir"] = getRootDir()
include { generate_pool_statistics } from "${meta.resources_dir}/../../../nextflow/stats/generate_pool_statistics/main.nf"
include { generate_well_statistics } from "${meta.resources_dir}/../../../nextflow/stats/generate_well_statistics/main.nf"
include { well_demultiplex } from "${meta.resources_dir}/../../../nextflow/workflows/well_demultiplex/main.nf"
include { parallel_map_wf } from "${meta.resources_dir}/../../../nextflow/workflows/parallel_map_wf/main.nf"
include { groupWells } from "${meta.resources_dir}/../../../nextflow/workflows/utils/groupWells/main.nf"
@@ -3167,12 +3194,47 @@ workflow run_wf {
state + ["star_output": result.output]
},
)
| generate_well_statistics.run(
fromState: { id, state ->
[
"input": state.star_output.resolve('Aligned.sortedByCoord.out.bam'),
"barcode": state.barcode,
]
},
toState: [
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
"nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB",
]
)
| map {id, state ->
[state.pool, id, state]
}
| groupTuple(by: 0, sort: "hash")
| map {id, well_ids, states ->
def collected_state = [
"fastq_output_r1": states.collect{it.fastq_output_r1[0]},
"fastq_output_r2": states.collect{it.fastq_output_r2[0]},
"nrReadsNrGenesPerChrom": states.collect{it.nrReadsNrGenesPerChrom},
]
def newState = states[0] + collected_state
[id, newState]
}
| generate_pool_statistics.run(
fromState: [
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
],
toState: {id, result, state ->
state + ["nrReadsNrGenesPerChrom": result.nrReadsNrGenesPerChromPool]
}
)
| niceView()
| setState(["star_output", "fastq_output_r1", "fastq_output_r2", "star_output"])
//| niceView()
//
//| setState( [ "output": "out" ] )
| setState([
"star_output",
"fastq_output_r1",
"fastq_output_r2",
"star_output",
"nrReadsNrGenesPerChrom",
])
emit:
output_ch

View File

@@ -96,6 +96,17 @@
}
,
"nrReadsNrGenesPerChrom": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. ",
"help_text": "Type: `file`, required, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. "
,
"default": "$id.$key.nrReadsNrGenesPerChrom.txt"
}
}
},

View File

@@ -52,7 +52,7 @@ argument_groups:
create_parent: true
required: true
direction: "output"
multiple: true
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
@@ -161,7 +161,7 @@ build_info:
output: "target/nextflow/workflows/parallel_map_wf"
executable: "target/nextflow/workflows/parallel_map_wf/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/nextflow/parallel_map"

View File

@@ -2858,7 +2858,7 @@ meta = [
"create_parent" : true,
"required" : true,
"direction" : "output",
"multiple" : true,
"multiple" : false,
"multiple_sep" : ";"
}
]
@@ -2996,7 +2996,7 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/workflows/parallel_map_wf",
"viash_version" : "0.9.0-RC7",
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {
@@ -3042,8 +3042,7 @@ workflow run_wf {
input_ch
main:
output_ch = input_ch
| map {id, state -> [id, state + ["orig_id": id]]}
pool_ch = input_ch
| groupWells.run(
fromState: { id, state ->
[
@@ -3058,7 +3057,6 @@ workflow run_wf {
"wells": result.wells,
"input_r1": result.output_r1,
"input_r2": result.output_r2,
"_meta": ["join_id": state.orig_id]
]
}
)
@@ -3072,7 +3070,7 @@ workflow run_wf {
"pool": state.pool,
"wellBarcodesLength": 10,
"umiLength": 10,
"output": state.output[0],
"output": state.output,
]
},
toState: { id, result, state ->
@@ -3082,8 +3080,33 @@ workflow run_wf {
},
directives: [label: ["midmem", "midcpu"]]
)
| setState(["output", "_meta"])
| setState(["output"])
input_join_ch = input_ch
| map {id, state ->
[state.pool, id, state]
}
output_ch = input_join_ch.combine(pool_ch, by: 0)
| map {pool, well_id, state_well, state_pool ->
well_output = state_pool.output.findAll{star_output_dir ->
def barcodes_list = []
def barcode_file_regex = ~/.*\/raw\/barcodes\.tsv$/
star_output_dir.eachFileRecurse{barcode_file ->
if (barcode_file =~ barcode_file_regex) {
assert barcode_file.countLines() == 1, "Expected only one barcode in a single STAR output."
barcodes_list.add(barcode_file.text.trim())
}
}
assert barcodes_list.size() == 1, "Exactly one file should have matched the barcodes file regex (found: $barcodes_list)."
def barcode
barcodes_list.each{ it -> barcode = it }
return barcode == state_well.barcode
}
assert well_output.size() == 1, "Two or more outputs from the mapping seemed to have processed barcode '$barcode'."
[well_id, ["output": well_output[0]]]
}
emit:
output_ch
}

View File

@@ -67,10 +67,10 @@
"output": {
"type":
"string",
"description": "Type: List of `file`, required, default: `$id.$key.output_*.output_*`, multiple_sep: `\";\"`. ",
"help_text": "Type: List of `file`, required, default: `$id.$key.output_*.output_*`, multiple_sep: `\";\"`. "
"description": "Type: `file`, required, default: `$id.$key.output.output`. ",
"help_text": "Type: `file`, required, default: `$id.$key.output.output`. "
,
"default": "$id.$key.output_*.output_*"
"default": "$id.$key.output.output"
}

View File

@@ -171,7 +171,7 @@ build_info:
output: "target/nextflow/workflows/utils/groupWells"
executable: "target/nextflow/workflows/utils/groupWells/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -3007,7 +3007,7 @@ meta = [
"engine" : "native",
"output" : "target/nextflow/workflows/utils/groupWells",
"viash_version" : "0.9.0-RC7",
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {

View File

@@ -197,7 +197,7 @@ build_info:
output: "target/nextflow/workflows/well_demultiplex"
executable: "target/nextflow/workflows/well_demultiplex/main.nf"
viash_version: "0.9.0-RC7"
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
git_commit: "b98f6367d672368af134843711a46d3b53717187"
git_remote: "https://github.com/viash-hub/htrnaseq"
dependencies:
- "target/dependencies/vsh/vsh/biobox/v0.1.0/nextflow/cutadapt"

View File

@@ -3044,7 +3044,7 @@ meta = [
"engine" : "native|native",
"output" : "target/nextflow/workflows/well_demultiplex",
"viash_version" : "0.9.0-RC7",
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
"git_remote" : "https://github.com/viash-hub/htrnaseq"
},
"package_config" : {