Build branch main with version main (b98f636)
Build pipeline: viash-hub.htrnaseq.main-48gzk
Source commit: b98f6367d6
Source message: Add BAM statistics calculations on pool and well level (#6)
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -4,4 +4,8 @@ testData
|
||||
# Nextflow related files
|
||||
.nextflow
|
||||
.nextflow.log*
|
||||
work
|
||||
work
|
||||
|
||||
# Python related files
|
||||
*__pycache__*
|
||||
.venv
|
||||
@@ -1,7 +1,7 @@
|
||||
set -eo pipefail
|
||||
|
||||
## VIASH START
|
||||
meta_executable="target/executable/parallel_map/parallel_map"
|
||||
meta_executable=$(realpath "target/executable/parallel_map/parallel_map")
|
||||
## VIASH END
|
||||
|
||||
# Some helper functions
|
||||
|
||||
51
src/stats/generate_pool_statistics/config.vsh.yaml
Normal file
51
src/stats/generate_pool_statistics/config.vsh.yaml
Normal file
@@ -0,0 +1,51 @@
|
||||
name: generate_pool_statistics
|
||||
namespace: "stats"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- name: "--nrReadsNrGenesPerChrom"
|
||||
type: file
|
||||
multiple: true
|
||||
description: |
|
||||
Path to an output file that contains a .tsv formatted table describing
|
||||
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads
|
||||
column) and the number of genes on that chromosome that had at least one
|
||||
read mapped to it (NumberOfGenes).
|
||||
direction: input
|
||||
default: [processedBamFile_well1.tsv, processedBamfile_well2.tsv]
|
||||
- name: "--nrReadsNrGenesPerChromPool"
|
||||
direction: output
|
||||
type: file
|
||||
multiple: false
|
||||
description: |
|
||||
Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files. Describes
|
||||
per chromosome (as columns) the number of reads, as well as the total number
|
||||
of reads per cell barcode and the percentage of nuclear, ERCC and mitochondrial
|
||||
reads.
|
||||
example: "nrReadsNrGenesPerChrom.txt"
|
||||
|
||||
resources:
|
||||
- type: python_script
|
||||
path: script.py
|
||||
|
||||
test_resources:
|
||||
- type: python_script
|
||||
path: test.py
|
||||
|
||||
engines:
|
||||
- type: docker
|
||||
image: python:3.11-slim
|
||||
setup:
|
||||
- type: apt
|
||||
packages:
|
||||
- procps
|
||||
- type: python
|
||||
packages:
|
||||
- pandas
|
||||
test_setup:
|
||||
- type: python
|
||||
packages:
|
||||
- viashpy
|
||||
runners:
|
||||
- type: executable
|
||||
- type: nextflow
|
||||
75
src/stats/generate_pool_statistics/script.py
Normal file
75
src/stats/generate_pool_statistics/script.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import pandas as pd
|
||||
import re
|
||||
|
||||
### VIASH START
|
||||
par = {
|
||||
"nrReadsNrGenesPerChrom": ["test/nrReadsNrGenesPerChrom_2.txt", "test/nrReadsNrGenesPerChrom.txt"],
|
||||
"nrReadsNrGenesPerChromPool": "nrReadsNrGenesPerChrom_pool.txt"
|
||||
}
|
||||
|
||||
### VIASH END
|
||||
|
||||
if __name__ == "__main__":
|
||||
#########
|
||||
# nrReadsNrGenesPerChrom file
|
||||
#########
|
||||
nr_reads_nr_genes_wells = []
|
||||
for nr_reads_nr_genes_file in par["nrReadsNrGenesPerChrom"]:
|
||||
nr_reads_nr_genes_wells.append(pd.read_csv(nr_reads_nr_genes_file,
|
||||
header=0, delimiter="\t"))
|
||||
nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True)
|
||||
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr",
|
||||
values=["NumberOfReads"], fill_value=0,
|
||||
aggfunc="sum").droplevel(0, axis=1)
|
||||
total_nr_reads_per_chromosome.columns.name = None
|
||||
|
||||
##### Total number of genes from all chromosomes
|
||||
total_nr_genes = nr_reads_nr_genes_pool.loc[:,['WellBC', 'NumberOfGenes']].groupby("WellBC").sum()
|
||||
|
||||
##### Total counts across (irrespective of chromosome)
|
||||
total_sum_of_reads = total_nr_reads_per_chromosome.sum(numeric_only=True, axis=1)
|
||||
|
||||
##### Logic to split up chromosome per type
|
||||
chromosome_names = total_nr_reads_per_chromosome.columns.to_list()
|
||||
chr_regex = re.compile(r"^(chr)?\d+")
|
||||
matching_chromosomes = [chr_name for chr_name
|
||||
in chromosome_names
|
||||
if chr_regex.match(chr_name)]
|
||||
sex_chromosome_names = ["X", "Y"]
|
||||
mitochondrial_chr_name = "MT"
|
||||
# This is logic from the original HT pipeline,
|
||||
# only when all of the matched chromosomes start with "chr", the mitochonrial, X and Y
|
||||
# chromosomes should also start with 'chr'
|
||||
if all(chr_name.startswith("chr") for chr_name in matching_chromosomes):
|
||||
sex_chromosome_names += ["chrX", "chrY"]
|
||||
mitochondrial_chr_name = "chrM"
|
||||
|
||||
###### Counts for mitochondrial reads
|
||||
try:
|
||||
mitochondrial_reads = total_nr_reads_per_chromosome.loc[:,mitochondrial_chr_name]
|
||||
except KeyError:
|
||||
mitochondrial_reads = 0
|
||||
percentage_mitochondrial_reads = round(mitochondrial_reads / total_sum_of_reads * 100, 2)
|
||||
|
||||
###### Counts for ERCC reads
|
||||
total_ercc_reads = total_nr_reads_per_chromosome.filter(regex=r"^ERCC").sum(axis=1)
|
||||
percentage_ercc_reads = round(total_ercc_reads / total_sum_of_reads * 100, 2)
|
||||
|
||||
###### Counts for nuclear chromosomes
|
||||
total_chromosomal_reads = total_nr_reads_per_chromosome.loc[:,matching_chromosomes].sum(axis=1)
|
||||
percentage_chromosomal_reads = round(total_chromosomal_reads / total_sum_of_reads * 100, 2)
|
||||
|
||||
total_nr_reads_per_chromosome = total_nr_reads_per_chromosome.assign(
|
||||
pctChrom=percentage_chromosomal_reads,
|
||||
pctMT=percentage_mitochondrial_reads,
|
||||
pctERCC=percentage_ercc_reads,
|
||||
SumReads=total_sum_of_reads,
|
||||
NumberOfGenes=total_nr_genes,
|
||||
)
|
||||
|
||||
total_nr_reads_per_chromosome.reset_index(names="WellBC")\
|
||||
.to_csv(par["nrReadsNrGenesPerChromPool"], sep="\t",
|
||||
header=True, index=False,
|
||||
columns=("WellBC",) + tuple(chromosome_names) + \
|
||||
("SumReads", "pctMT", "pctERCC", "pctChrom", "NumberOfGenes"))
|
||||
|
||||
98
src/stats/generate_pool_statistics/test.py
Normal file
98
src/stats/generate_pool_statistics/test.py
Normal file
@@ -0,0 +1,98 @@
|
||||
from uuid import uuid4
|
||||
from textwrap import dedent
|
||||
from io import StringIO
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
### VIASH START
|
||||
meta = {
|
||||
"resources_dir": "./src/stats/generate_pool_statistics/",
|
||||
"executable": "target/executable/stats/generate_pool_statistics/generate_pool_statistics",
|
||||
"config": "src/stats/generate_pool_statistics/config.vsh.yaml"
|
||||
}
|
||||
### VIASH END
|
||||
|
||||
@pytest.fixture
|
||||
def random_path(tmp_path):
|
||||
def wrapper(extension=None):
|
||||
extension = "" if not extension else f".{extension}"
|
||||
return tmp_path / f"{uuid4()}{extension}"
|
||||
return wrapper
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def random_tsv_path(random_path):
|
||||
def wrapper():
|
||||
return random_path(".tsv")
|
||||
return wrapper
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_input_file_one(random_tsv_path, request):
|
||||
prefix = request.param
|
||||
mito_name = f"{prefix}M{'T' if not prefix else ''}"
|
||||
|
||||
contents = dedent(
|
||||
f"""\
|
||||
WellBC Chr NumberOfReads NumberOfGenes
|
||||
AGG {prefix}1 2 1
|
||||
AGG {prefix}2 3 2
|
||||
AGG {prefix}3 4 2
|
||||
AGG {mito_name} 4 2
|
||||
AGG {prefix}X 2 3
|
||||
AGG ERCC-1 1 1
|
||||
AGG ERCC-2 1 1
|
||||
""")
|
||||
output_file = random_tsv_path()
|
||||
with output_file.open("w") as open_file:
|
||||
open_file.write(contents)
|
||||
return output_file
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_input_file_two(random_tsv_path, request):
|
||||
prefix = request.param
|
||||
contents = dedent(
|
||||
f"""\
|
||||
WellBC Chr NumberOfReads NumberOfGenes
|
||||
CCC {prefix}2 2 1
|
||||
CCC {prefix}3 3 2
|
||||
CCC {prefix}5 4 2
|
||||
CCC {prefix}1 4 2
|
||||
CCC {prefix}Y 2 3
|
||||
CCC {prefix}X 2 3
|
||||
CCC ERCC-3 1 1
|
||||
CCC ERCC-2 1 1
|
||||
""")
|
||||
output_file = random_tsv_path()
|
||||
with output_file.open("w") as open_file:
|
||||
open_file.write(contents)
|
||||
return output_file
|
||||
|
||||
@pytest.mark.parametrize("simple_input_file_one,simple_input_file_two,expected", [("chr", "chr", "chr"), ("", "", "")],
|
||||
indirect=["simple_input_file_one", "simple_input_file_two"])
|
||||
def test_generate_pool_statistics_simple(run_component, simple_input_file_one,
|
||||
simple_input_file_two, random_tsv_path, expected):
|
||||
|
||||
output_path = random_tsv_path()
|
||||
run_component([
|
||||
"--nrReadsNrGenesPerChrom", simple_input_file_one,
|
||||
"--nrReadsNrGenesPerChrom", simple_input_file_two,
|
||||
"--nrReadsNrGenesPerChromPool", output_path
|
||||
])
|
||||
mito_name = f"{expected}M{'T' if not expected else ''}"
|
||||
expected_output = StringIO(dedent(
|
||||
f"""\
|
||||
WellBC ERCC-1 ERCC-2 ERCC-3 {expected}1 {expected}2 {expected}3 {expected}5 {mito_name} {expected}X {expected}Y SumReads pctMT pctERCC pctChrom NumberOfGenes
|
||||
AGG 1 1 0 2 3 4 0 4 2 0 17 23.53 11.76 52.94 12
|
||||
CCC 0 1 1 4 2 3 4 0 2 2 19 0.0 10.53 68.42 15
|
||||
"""))
|
||||
assert output_path.is_file()
|
||||
contents = pd.read_csv(output_path, sep="\t")
|
||||
expected_frame = pd.read_csv(expected_output, sep="\t")
|
||||
pd.testing.assert_frame_equal(contents, expected_frame, check_like=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(pytest.main([__file__]))
|
||||
92
src/stats/generate_well_statistics/config.vsh.yaml
Normal file
92
src/stats/generate_well_statistics/config.vsh.yaml
Normal file
@@ -0,0 +1,92 @@
|
||||
name: generate_well_statistics
|
||||
namespace: "stats"
|
||||
description: Generate summary statistics from BAM files generated by STAR solo.
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- name: "--input"
|
||||
type: file
|
||||
description: "The .bam file as returned by the mapping tool STAR."
|
||||
direction: input
|
||||
example: "input.bam"
|
||||
- name: "--barcode"
|
||||
type: string
|
||||
description: |
|
||||
The barcode for the well that is being processed. Is only used to add a metadata
|
||||
column to all output files.
|
||||
required: true
|
||||
- name: "--processedBAMFile"
|
||||
type: file
|
||||
description: |
|
||||
Path to a .tsv file listing, per read in the BAM file,
|
||||
the value for the "CB", "UX", "GX" and "GN" tag, together with the
|
||||
chromsome to which the read was mapped to.
|
||||
direction: output
|
||||
default: processedBamFile.txt
|
||||
- name: "--nrReadsNrGenesPerChrom"
|
||||
type: file
|
||||
description: |
|
||||
Path to an output file that contains a .tsv formatted table describing
|
||||
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads
|
||||
column) and the number of genes on that chromosome that had at least one
|
||||
read mapped to it (NumberOfGenes).
|
||||
default: nrReadsNrGenesPerChrom.txt
|
||||
direction: output
|
||||
- name: "--nrReadsNrUMIsPerCB"
|
||||
type: file
|
||||
description: |
|
||||
Path to an output file that contains a .tsv formatted table describing
|
||||
per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).
|
||||
direction: output
|
||||
default: nrReadsNrUMIsPerCB.txt
|
||||
- name: "--umiFreqTop"
|
||||
type: file
|
||||
description: |
|
||||
Path to an output file that contains a .tsv formatted table describing
|
||||
per UMI (column UB) the frequency at which they occur in the reads (column
|
||||
N). Only the top 100 UMIs are included.
|
||||
default: umiFreqTop100.txt
|
||||
direction: output
|
||||
- name: "--threads"
|
||||
type: integer
|
||||
description: |
|
||||
Number of threads to use for decompressing BAM files.
|
||||
min: 1
|
||||
default: 1
|
||||
resources:
|
||||
- type: python_script
|
||||
path: script.py
|
||||
|
||||
test_resources:
|
||||
- type: python_script
|
||||
path: test.py
|
||||
- path: test.sam
|
||||
|
||||
engines:
|
||||
- type: docker
|
||||
image: debian:stable-slim
|
||||
setup:
|
||||
- type: docker
|
||||
env:
|
||||
- PIP_BREAK_SYSTEM_PACKAGES=1
|
||||
- HTSLIB_LIBRARY_DIR=/usr/lib/
|
||||
- HTSLIB_INCLUDE_DIR=/usr/include/
|
||||
- type: apt
|
||||
packages:
|
||||
- python3
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- python-is-python3
|
||||
- libhts-dev
|
||||
- procps
|
||||
- type: python
|
||||
packages:
|
||||
- pysam
|
||||
- pandas
|
||||
test_setup:
|
||||
- type: python
|
||||
packages:
|
||||
- viashpy
|
||||
runners:
|
||||
- type: executable
|
||||
- type: nextflow
|
||||
77
src/stats/generate_well_statistics/script.py
Normal file
77
src/stats/generate_well_statistics/script.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import pysam
|
||||
import pandas as pd
|
||||
import logging
|
||||
|
||||
### VIASH START
|
||||
par = {
|
||||
"input": "src/stats/generate_well_statistics/test.sam",
|
||||
"processedBAMFile": "processedBamFile.txt",
|
||||
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom.txt",
|
||||
"nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB.txt",
|
||||
"umiFreqTop": "umiFreqTop.txt",
|
||||
"threads": 1,
|
||||
"barcode": "ACGT"
|
||||
}
|
||||
### VIASH END
|
||||
logger = logging.getLogger()
|
||||
console_handler = logging.StreamHandler()
|
||||
logger.addHandler(console_handler)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("Component started.")
|
||||
parameters_str = [f'\t{param}: {param_val}\n' for param, param_val in par.items()]
|
||||
logger.info("Parameters:\n%s", "".join(parameters_str).rstrip())
|
||||
logger.info("Opening '%s'", par["input"])
|
||||
samfile = pysam.AlignmentFile(par["input"], "rb", threads=par["threads"])
|
||||
all_tags = []
|
||||
index = []
|
||||
tags_selection = ("CB", "UB", "GX", "GN")
|
||||
for aligned_segment in samfile:
|
||||
tags = dict(aligned_segment.get_tags())
|
||||
all_tags.append(tags)
|
||||
reference_name = aligned_segment.reference_name
|
||||
index.append("*" if not reference_name else reference_name)
|
||||
tag_dataframe = pd.DataFrame.from_records(all_tags, index=index,
|
||||
columns=tags_selection)
|
||||
tag_dataframe_to_write = tag_dataframe.copy()
|
||||
logger.info("Done reading BAM file. Found %i entries", tag_dataframe.shape[0])
|
||||
tag_dataframe.assign(WellBC=par["barcode"])\
|
||||
.reset_index(names="Chr")\
|
||||
.to_csv(par["processedBAMFile"], sep="\t", na_rep="",
|
||||
header=True, index=False,
|
||||
columns=("WellBC", "Chr") + tags_selection)
|
||||
logger.info("Constructing of dataframe done.")
|
||||
# Number of genes that had a read mapped to them per chromosome,
|
||||
# and the number of reads mapped to those genes per chromosome.
|
||||
nr_reads_nr_genes = tag_dataframe.dropna(subset=["GX"]).groupby(level=0).agg(
|
||||
NumberOfReads=pd.NamedAgg("GX", aggfunc="size"),
|
||||
NumberOfGenes=pd.NamedAgg(column="GX", aggfunc="nunique")
|
||||
)
|
||||
logger.info("Done calculating number of reads per gene and per chromesome. Writing to %s",
|
||||
par['nrReadsNrGenesPerChrom'])
|
||||
nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"])\
|
||||
.to_csv(par["nrReadsNrGenesPerChrom"], sep="\t",
|
||||
header=True, index=False,
|
||||
columns=("WellBC", "Chr", "NumberOfReads", "NumberOfGenes"))
|
||||
|
||||
# Number of reads mapped to the reference, grouped by UMI
|
||||
nr_read_per_umi = tag_dataframe.groupby('UB').size()\
|
||||
.drop("", errors="ignore").sort_values(ascending=False).head(100)
|
||||
nr_read_per_umi_df = nr_read_per_umi.to_frame(name="N")
|
||||
logger.info("Done calculating number of mapped reads per UMI, writing to %s", par["umiFreqTop"])
|
||||
nr_read_per_umi_df.assign(WellBC=par["barcode"]).reset_index(names="UB")\
|
||||
.to_csv(par["umiFreqTop"], header=True, sep="\t",
|
||||
index=False, columns=("WellBC", "UB", "N"))
|
||||
|
||||
# Total number of mapped reads and total number of UMIs (not grouped per chromosome)
|
||||
nr_reads_and_umi_per_barcode = tag_dataframe.groupby(by="CB").agg(
|
||||
NumberOfReads=pd.NamedAgg("CB", "size"),
|
||||
nrUMIs=pd.NamedAgg("UB", "nunique")
|
||||
)
|
||||
logger.info("Done calculating number of mapped reads and number of UMIs per Cell Barcode, writing to %s",
|
||||
par["nrReadsNrUMIsPerCB"])
|
||||
nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"]).reset_index(names="CB")\
|
||||
.to_csv(par["nrReadsNrUMIsPerCB"], sep="\t", header=True,
|
||||
index=False, columns=("WellBC", "CB", "NumberOfReads", "nrUMIs"))
|
||||
logger.info("Finished!")
|
||||
110
src/stats/generate_well_statistics/test.py
Normal file
110
src/stats/generate_well_statistics/test.py
Normal file
@@ -0,0 +1,110 @@
|
||||
import sys
|
||||
import pytest
|
||||
import pysam
|
||||
from uuid import uuid4
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
|
||||
### VIASH START
|
||||
meta = {
|
||||
"resources_dir": "./src/stats/generate_well_statistics/",
|
||||
"executable": "target/executable/stats/generate_well_statistics/generate_well_statistics",
|
||||
"config": "src/stats/generate_well_statistics/config.vsh.yaml"
|
||||
}
|
||||
### VIASH END
|
||||
|
||||
def assert_file_content_equals(file_to_check, expected):
|
||||
with file_to_check.open('r') as open_file:
|
||||
contents = open_file.read()
|
||||
assert contents == expected
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_sam_path():
|
||||
return Path(meta["resources_dir"]) / "test.sam"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def random_path(tmp_path):
|
||||
def wrapper(extension=None):
|
||||
extension = "" if not extension else f".{extension}"
|
||||
return tmp_path / f"{uuid4()}{extension}"
|
||||
return wrapper
|
||||
|
||||
@pytest.fixture
|
||||
def random_bam_path(random_path):
|
||||
def wrapper():
|
||||
return random_path(".bam")
|
||||
return wrapper
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sam_to_bam(random_bam_path):
|
||||
def wrapper(sam_file):
|
||||
out_path = random_bam_path()
|
||||
with pysam.AlignmentFile(sam_file, "r") as infile, \
|
||||
pysam.AlignmentFile(out_path, "wb", template=infile) as outfile:
|
||||
for s in infile:
|
||||
outfile.write(s)
|
||||
infile.close()
|
||||
return out_path
|
||||
return wrapper
|
||||
|
||||
|
||||
def test_generate_well_statistics_simple_bam(run_component, input_sam_path, sam_to_bam, random_path):
|
||||
bam_file = sam_to_bam(input_sam_path)
|
||||
processed_bam = random_path("tsv")
|
||||
reads_per_chromosome = random_path("tsv")
|
||||
nr_reads_nr_umis_per_cb = random_path("tsv")
|
||||
top_onehundred_umis = random_path("tsv")
|
||||
run_component([
|
||||
"--input", bam_file,
|
||||
"--processedBAMFile", processed_bam,
|
||||
"--nrReadsNrGenesPerChrom", reads_per_chromosome,
|
||||
"--nrReadsNrUMIsPerCB", nr_reads_nr_umis_per_cb,
|
||||
"--umiFreqTop", top_onehundred_umis,
|
||||
"--barcode", "ACGT"
|
||||
])
|
||||
for file_path in (processed_bam, reads_per_chromosome,
|
||||
nr_reads_nr_umis_per_cb, top_onehundred_umis):
|
||||
assert file_path.is_file()
|
||||
|
||||
expected_processed_bam = \
|
||||
dedent("""\
|
||||
WellBC Chr CB UB GX GN
|
||||
ACGT 1 ACA CGG gene1 gene1
|
||||
ACGT 1 ACA CGG gene1 gene1
|
||||
ACGT 2 GGG GTT gene2 gene2
|
||||
ACGT 2 GGG GTC gene3 gene3
|
||||
""")
|
||||
|
||||
expected_reads_per_chromosome = \
|
||||
dedent("""\
|
||||
WellBC Chr NumberOfReads NumberOfGenes
|
||||
ACGT 1 2 1
|
||||
ACGT 2 2 2
|
||||
""")
|
||||
|
||||
expected_nr_reads_nr_umis_per_cb = \
|
||||
dedent("""\
|
||||
WellBC CB NumberOfReads nrUMIs
|
||||
ACGT ACA 2 1
|
||||
ACGT GGG 2 2
|
||||
""")
|
||||
|
||||
expected_top_onehundred_umis = \
|
||||
dedent("""\
|
||||
WellBC UB N
|
||||
ACGT CGG 2
|
||||
ACGT GTC 1
|
||||
ACGT GTT 1
|
||||
""")
|
||||
|
||||
assert_file_content_equals(processed_bam, expected_processed_bam)
|
||||
assert_file_content_equals(reads_per_chromosome, expected_reads_per_chromosome)
|
||||
assert_file_content_equals(nr_reads_nr_umis_per_cb, expected_nr_reads_nr_umis_per_cb)
|
||||
assert_file_content_equals(top_onehundred_umis, expected_top_onehundred_umis)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(pytest.main([__file__]))
|
||||
7
src/stats/generate_well_statistics/test.sam
Normal file
7
src/stats/generate_well_statistics/test.sam
Normal file
@@ -0,0 +1,7 @@
|
||||
@HD VN:1.4 SO:coordinate
|
||||
@SQ SN:1 LN:200
|
||||
@SQ SN:2 LN:50
|
||||
test_1 16 1 22 255 1M * 0 0 C I NH:i:1 HI:i:1 nM:i:0 AS:i:47 CR:Z:ACA UR:Z:CGG GX:Z:gene1 GN:Z:gene1 CB:Z:ACA UB:Z:CGG
|
||||
test_2 16 1 22 255 1M * 0 0 G ! NH:i:1 HI:i:1 nM:i:0 AS:i:47 CR:Z:ACA UR:Z:CGG GX:Z:gene1 GN:Z:gene1 CB:Z:ACA UB:Z:CGG
|
||||
test_3 0 2 40 255 1M * 0 0 T ! NH:i:1 HI:i:1 nM:i:0 AS:i:47 CR:Z:GGG UR:Z:GTT GX:Z:gene2 GN:Z:gene2 CB:Z:GGG UB:Z:GTT
|
||||
test_4 0 2 60 255 1M * 0 0 C ! NH:i:1 HI:i:1 nM:i:0 AS:i:47 CR:Z:GGG UR:Z:GTC GX:Z:gene3 GN:Z:gene3 CB:Z:GGG UB:Z:GTC
|
||||
@@ -40,6 +40,11 @@ argument_groups:
|
||||
multiple: true
|
||||
required: true
|
||||
default: $id/star/*
|
||||
- name: "--nrReadsNrGenesPerChrom"
|
||||
type: file
|
||||
direction: output
|
||||
required: true
|
||||
default: "nrReadsNrGenesPerChrom.txt"
|
||||
resources:
|
||||
- type: nextflow_script
|
||||
path: main.nf
|
||||
@@ -51,6 +56,10 @@ resources:
|
||||
# entrypoint: test_wf
|
||||
|
||||
dependencies:
|
||||
- name: stats/generate_pool_statistics
|
||||
repository: local
|
||||
- name: stats/generate_well_statistics
|
||||
repository: local
|
||||
- name: workflows/well_demultiplex
|
||||
repository: local
|
||||
- name: workflows/parallel_map_wf
|
||||
|
||||
@@ -68,12 +68,47 @@ workflow run_wf {
|
||||
state + ["star_output": result.output]
|
||||
},
|
||||
)
|
||||
| generate_well_statistics.run(
|
||||
fromState: { id, state ->
|
||||
[
|
||||
"input": state.star_output.resolve('Aligned.sortedByCoord.out.bam'),
|
||||
"barcode": state.barcode,
|
||||
]
|
||||
},
|
||||
toState: [
|
||||
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
|
||||
"nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB",
|
||||
]
|
||||
)
|
||||
| map {id, state ->
|
||||
[state.pool, id, state]
|
||||
}
|
||||
| groupTuple(by: 0, sort: "hash")
|
||||
| map {id, well_ids, states ->
|
||||
def collected_state = [
|
||||
"fastq_output_r1": states.collect{it.fastq_output_r1[0]},
|
||||
"fastq_output_r2": states.collect{it.fastq_output_r2[0]},
|
||||
"nrReadsNrGenesPerChrom": states.collect{it.nrReadsNrGenesPerChrom},
|
||||
]
|
||||
def newState = states[0] + collected_state
|
||||
[id, newState]
|
||||
}
|
||||
| generate_pool_statistics.run(
|
||||
fromState: [
|
||||
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
|
||||
],
|
||||
toState: {id, result, state ->
|
||||
state + ["nrReadsNrGenesPerChrom": result.nrReadsNrGenesPerChromPool]
|
||||
}
|
||||
)
|
||||
| niceView()
|
||||
| setState(["star_output", "fastq_output_r1", "fastq_output_r2", "star_output"])
|
||||
|
||||
//| niceView()
|
||||
//
|
||||
//| setState( [ "output": "out" ] )
|
||||
| setState([
|
||||
"star_output",
|
||||
"fastq_output_r1",
|
||||
"fastq_output_r2",
|
||||
"star_output",
|
||||
"nrReadsNrGenesPerChrom",
|
||||
])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -29,7 +29,6 @@ argument_groups:
|
||||
- name: "--output"
|
||||
type: file
|
||||
direction: output
|
||||
multiple: true
|
||||
required: true
|
||||
resources:
|
||||
- type: nextflow_script
|
||||
|
||||
@@ -3,8 +3,7 @@ workflow run_wf {
|
||||
input_ch
|
||||
|
||||
main:
|
||||
output_ch = input_ch
|
||||
| map {id, state -> [id, state + ["orig_id": id]]}
|
||||
pool_ch = input_ch
|
||||
| groupWells.run(
|
||||
fromState: { id, state ->
|
||||
[
|
||||
@@ -19,7 +18,6 @@ workflow run_wf {
|
||||
"wells": result.wells,
|
||||
"input_r1": result.output_r1,
|
||||
"input_r2": result.output_r2,
|
||||
"_meta": ["join_id": state.orig_id]
|
||||
]
|
||||
}
|
||||
)
|
||||
@@ -33,7 +31,7 @@ workflow run_wf {
|
||||
"pool": state.pool,
|
||||
"wellBarcodesLength": 10,
|
||||
"umiLength": 10,
|
||||
"output": state.output[0],
|
||||
"output": state.output,
|
||||
]
|
||||
},
|
||||
toState: { id, result, state ->
|
||||
@@ -43,8 +41,33 @@ workflow run_wf {
|
||||
},
|
||||
directives: [label: ["midmem", "midcpu"]]
|
||||
)
|
||||
| setState(["output", "_meta"])
|
||||
|
||||
| setState(["output"])
|
||||
|
||||
input_join_ch = input_ch
|
||||
| map {id, state ->
|
||||
[state.pool, id, state]
|
||||
}
|
||||
output_ch = input_join_ch.combine(pool_ch, by: 0)
|
||||
| map {pool, well_id, state_well, state_pool ->
|
||||
well_output = state_pool.output.findAll{star_output_dir ->
|
||||
def barcodes_list = []
|
||||
def barcode_file_regex = ~/.*\/raw\/barcodes\.tsv$/
|
||||
star_output_dir.eachFileRecurse{barcode_file ->
|
||||
if (barcode_file =~ barcode_file_regex) {
|
||||
assert barcode_file.countLines() == 1, "Expected only one barcode in a single STAR output."
|
||||
barcodes_list.add(barcode_file.text.trim())
|
||||
}
|
||||
}
|
||||
assert barcodes_list.size() == 1, "Exactly one file should have matched the barcodes file regex (found: $barcodes_list)."
|
||||
def barcode
|
||||
barcodes_list.each{ it -> barcode = it }
|
||||
return barcode == state_well.barcode
|
||||
}
|
||||
assert well_output.size() == 1, "Two or more outputs from the mapping seemed to have processed barcode '$barcode'."
|
||||
[well_id, ["output": well_output[0]]]
|
||||
}
|
||||
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
}
|
||||
@@ -236,7 +236,7 @@ build_info:
|
||||
output: "target/executable/parallel_map"
|
||||
executable: "target/executable/parallel_map/parallel_map"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
|
||||
@@ -516,9 +516,9 @@ RUN wget -O $STAR_TARGET $STAR_SOURCE && \
|
||||
rm $STAR_TARGET && rm -rf /tmp/STAR_$STAR_VERSION
|
||||
|
||||
LABEL org.opencontainers.image.description="Companion container for running component parallel_map"
|
||||
LABEL org.opencontainers.image.created="2024-08-29T07:58:04Z"
|
||||
LABEL org.opencontainers.image.created="2024-08-29T12:31:01Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="21831c2104098ecce57aa9b372e49f865296cc48"
|
||||
LABEL org.opencontainers.image.revision="b98f6367d672368af134843711a46d3b53717187"
|
||||
LABEL org.opencontainers.image.version="main"
|
||||
|
||||
VIASHDOCKER
|
||||
|
||||
@@ -0,0 +1,186 @@
|
||||
name: "generate_pool_statistics"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChrom"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
|
||||
column) and the number of genes on that chromosome that had at least one\nread\
|
||||
\ mapped to it (NumberOfGenes).\n"
|
||||
info: null
|
||||
default:
|
||||
- "processedBamFile_well1.tsv"
|
||||
- "processedBamfile_well2.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChromPool"
|
||||
description: "Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom\
|
||||
\ files. Describes\nper chromosome (as columns) the number of reads, as well\
|
||||
\ as the total number \nof reads per cell barcode and the percentage of nuclear,\
|
||||
\ ERCC and mitochondrial\nreads.\n"
|
||||
info: null
|
||||
example:
|
||||
- "nrReadsNrGenesPerChrom.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.11-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/stats/generate_pool_statistics/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/stats/generate_pool_statistics"
|
||||
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info: null
|
||||
viash_version: "0.9.0-RC7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1187
target/executable/stats/generate_pool_statistics/generate_pool_statistics
Executable file
1187
target/executable/stats/generate_pool_statistics/generate_pool_statistics
Executable file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,43 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,257 @@
|
||||
name: "generate_well_statistics"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
description: "The .bam file as returned by the mapping tool STAR."
|
||||
info: null
|
||||
example:
|
||||
- "input.bam"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--barcode"
|
||||
description: "The barcode for the well that is being processed. Is only used to\
|
||||
\ add a metadata\ncolumn to all output files.\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--processedBAMFile"
|
||||
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
|
||||
\ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
|
||||
\ to which the read was mapped to.\n"
|
||||
info: null
|
||||
default:
|
||||
- "processedBamFile.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChrom"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
|
||||
column) and the number of genes on that chromosome that had at least one\nread\
|
||||
\ mapped to it (NumberOfGenes).\n"
|
||||
info: null
|
||||
default:
|
||||
- "nrReadsNrGenesPerChrom.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrUMIsPerCB"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
|
||||
info: null
|
||||
default:
|
||||
- "nrReadsNrUMIsPerCB.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--umiFreqTop"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per UMI (column UB) the frequency at which they occur in the reads (column\n\
|
||||
N). Only the top 100 UMIs are included.\n"
|
||||
info: null
|
||||
default:
|
||||
- "umiFreqTop100.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--threads"
|
||||
description: "Number of threads to use for decompressing BAM files.\n"
|
||||
info: null
|
||||
default:
|
||||
- 1
|
||||
required: false
|
||||
min: 1
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Generate summary statistics from BAM files generated by STAR solo."
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "test.sam"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "debian:stable-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "docker"
|
||||
env:
|
||||
- "PIP_BREAK_SYSTEM_PACKAGES=1"
|
||||
- "HTSLIB_LIBRARY_DIR=/usr/lib/"
|
||||
- "HTSLIB_INCLUDE_DIR=/usr/include/"
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "python3"
|
||||
- "python3-pip"
|
||||
- "python3-venv"
|
||||
- "python-is-python3"
|
||||
- "libhts-dev"
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pysam"
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/stats/generate_well_statistics/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/stats/generate_well_statistics"
|
||||
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info: null
|
||||
viash_version: "0.9.0-RC7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
1319
target/executable/stats/generate_well_statistics/generate_well_statistics
Executable file
1319
target/executable/stats/generate_well_statistics/generate_well_statistics
Executable file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,43 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -236,7 +236,7 @@ build_info:
|
||||
output: "target/nextflow/parallel_map"
|
||||
executable: "target/nextflow/parallel_map/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
|
||||
@@ -3087,7 +3087,7 @@ meta = [
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/parallel_map",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
|
||||
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
|
||||
186
target/nextflow/stats/generate_pool_statistics/.config.vsh.yaml
Normal file
186
target/nextflow/stats/generate_pool_statistics/.config.vsh.yaml
Normal file
@@ -0,0 +1,186 @@
|
||||
name: "generate_pool_statistics"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChrom"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
|
||||
column) and the number of genes on that chromosome that had at least one\nread\
|
||||
\ mapped to it (NumberOfGenes).\n"
|
||||
info: null
|
||||
default:
|
||||
- "processedBamFile_well1.tsv"
|
||||
- "processedBamfile_well2.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChromPool"
|
||||
description: "Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom\
|
||||
\ files. Describes\nper chromosome (as columns) the number of reads, as well\
|
||||
\ as the total number \nof reads per cell barcode and the percentage of nuclear,\
|
||||
\ ERCC and mitochondrial\nreads.\n"
|
||||
info: null
|
||||
example:
|
||||
- "nrReadsNrGenesPerChrom.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.11-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/stats/generate_pool_statistics/config.vsh.yaml"
|
||||
runner: "nextflow"
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/stats/generate_pool_statistics"
|
||||
executable: "target/nextflow/stats/generate_pool_statistics/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info: null
|
||||
viash_version: "0.9.0-RC7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
3609
target/nextflow/stats/generate_pool_statistics/main.nf
Normal file
3609
target/nextflow/stats/generate_pool_statistics/main.nf
Normal file
File diff suppressed because it is too large
Load Diff
124
target/nextflow/stats/generate_pool_statistics/nextflow.config
Normal file
124
target/nextflow/stats/generate_pool_statistics/nextflow.config
Normal file
@@ -0,0 +1,124 @@
|
||||
manifest {
|
||||
name = 'stats/generate_pool_statistics'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'main'
|
||||
}
|
||||
|
||||
process.container = 'nextflow/bash:latest'
|
||||
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
profiles {
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
docker {
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
singularity {
|
||||
singularity.enabled = true
|
||||
singularity.autoMounts = true
|
||||
docker.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
podman {
|
||||
podman.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
shifter {
|
||||
shifter.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
charliecloud {
|
||||
charliecloud.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
}
|
||||
}
|
||||
|
||||
process{
|
||||
withLabel: mem1gb { memory = 1000000000.B }
|
||||
withLabel: mem2gb { memory = 2000000000.B }
|
||||
withLabel: mem5gb { memory = 5000000000.B }
|
||||
withLabel: mem10gb { memory = 10000000000.B }
|
||||
withLabel: mem20gb { memory = 20000000000.B }
|
||||
withLabel: mem50gb { memory = 50000000000.B }
|
||||
withLabel: mem100gb { memory = 100000000000.B }
|
||||
withLabel: mem200gb { memory = 200000000000.B }
|
||||
withLabel: mem500gb { memory = 500000000000.B }
|
||||
withLabel: mem1tb { memory = 1000000000000.B }
|
||||
withLabel: mem2tb { memory = 2000000000000.B }
|
||||
withLabel: mem5tb { memory = 5000000000000.B }
|
||||
withLabel: mem10tb { memory = 10000000000000.B }
|
||||
withLabel: mem20tb { memory = 20000000000000.B }
|
||||
withLabel: mem50tb { memory = 50000000000000.B }
|
||||
withLabel: mem100tb { memory = 100000000000000.B }
|
||||
withLabel: mem200tb { memory = 200000000000000.B }
|
||||
withLabel: mem500tb { memory = 500000000000000.B }
|
||||
withLabel: mem1gib { memory = 1073741824.B }
|
||||
withLabel: mem2gib { memory = 2147483648.B }
|
||||
withLabel: mem4gib { memory = 4294967296.B }
|
||||
withLabel: mem8gib { memory = 8589934592.B }
|
||||
withLabel: mem16gib { memory = 17179869184.B }
|
||||
withLabel: mem32gib { memory = 34359738368.B }
|
||||
withLabel: mem64gib { memory = 68719476736.B }
|
||||
withLabel: mem128gib { memory = 137438953472.B }
|
||||
withLabel: mem256gib { memory = 274877906944.B }
|
||||
withLabel: mem512gib { memory = 549755813888.B }
|
||||
withLabel: mem1tib { memory = 1099511627776.B }
|
||||
withLabel: mem2tib { memory = 2199023255552.B }
|
||||
withLabel: mem4tib { memory = 4398046511104.B }
|
||||
withLabel: mem8tib { memory = 8796093022208.B }
|
||||
withLabel: mem16tib { memory = 17592186044416.B }
|
||||
withLabel: mem32tib { memory = 35184372088832.B }
|
||||
withLabel: mem64tib { memory = 70368744177664.B }
|
||||
withLabel: mem128tib { memory = 140737488355328.B }
|
||||
withLabel: mem256tib { memory = 281474976710656.B }
|
||||
withLabel: mem512tib { memory = 562949953421312.B }
|
||||
withLabel: cpu1 { cpus = 1 }
|
||||
withLabel: cpu2 { cpus = 2 }
|
||||
withLabel: cpu5 { cpus = 5 }
|
||||
withLabel: cpu10 { cpus = 10 }
|
||||
withLabel: cpu20 { cpus = 20 }
|
||||
withLabel: cpu50 { cpus = 50 }
|
||||
withLabel: cpu100 { cpus = 100 }
|
||||
withLabel: cpu200 { cpus = 200 }
|
||||
withLabel: cpu500 { cpus = 500 }
|
||||
withLabel: cpu1000 { cpus = 1000 }
|
||||
}
|
||||
|
||||
includeConfig("nextflow_labels.config")
|
||||
@@ -0,0 +1,43 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema",
|
||||
"title": "generate_pool_statistics",
|
||||
"description": "No description",
|
||||
"type": "object",
|
||||
"definitions": {
|
||||
|
||||
|
||||
|
||||
"arguments" : {
|
||||
"title": "Arguments",
|
||||
"type": "object",
|
||||
"description": "No description",
|
||||
"properties": {
|
||||
|
||||
|
||||
"nrReadsNrGenesPerChrom": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, default: `processedBamFile_well1.tsv;processedBamfile_well2.tsv`, multiple_sep: `\";\"`. Path to an output file that contains a ",
|
||||
"help_text": "Type: List of `file`, default: `processedBamFile_well1.tsv;processedBamfile_well2.tsv`, multiple_sep: `\";\"`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n"
|
||||
,
|
||||
"default": "processedBamFile_well1.tsv;processedBamfile_well2.tsv"
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"nrReadsNrGenesPerChromPool": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChromPool.txt`, example: `nrReadsNrGenesPerChrom.txt`. Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files",
|
||||
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChromPool.txt`, example: `nrReadsNrGenesPerChrom.txt`. Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files. Describes\nper chromosome (as columns) the number of reads, as well as the total number \nof reads per cell barcode and the percentage of nuclear, ERCC and mitochondrial\nreads.\n"
|
||||
,
|
||||
"default": "$id.$key.nrReadsNrGenesPerChromPool.txt"
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
"nextflow input-output arguments" : {
|
||||
"title": "Nextflow input-output arguments",
|
||||
"type": "object",
|
||||
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
||||
"properties": {
|
||||
|
||||
|
||||
"publish_dir": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
||||
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"param_list": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
||||
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
||||
"hidden": true
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
|
||||
{
|
||||
"$ref": "#/definitions/arguments"
|
||||
},
|
||||
|
||||
{
|
||||
"$ref": "#/definitions/nextflow input-output arguments"
|
||||
}
|
||||
]
|
||||
}
|
||||
257
target/nextflow/stats/generate_well_statistics/.config.vsh.yaml
Normal file
257
target/nextflow/stats/generate_well_statistics/.config.vsh.yaml
Normal file
@@ -0,0 +1,257 @@
|
||||
name: "generate_well_statistics"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
description: "The .bam file as returned by the mapping tool STAR."
|
||||
info: null
|
||||
example:
|
||||
- "input.bam"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--barcode"
|
||||
description: "The barcode for the well that is being processed. Is only used to\
|
||||
\ add a metadata\ncolumn to all output files.\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--processedBAMFile"
|
||||
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
|
||||
\ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
|
||||
\ to which the read was mapped to.\n"
|
||||
info: null
|
||||
default:
|
||||
- "processedBamFile.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChrom"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
|
||||
column) and the number of genes on that chromosome that had at least one\nread\
|
||||
\ mapped to it (NumberOfGenes).\n"
|
||||
info: null
|
||||
default:
|
||||
- "nrReadsNrGenesPerChrom.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrUMIsPerCB"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
|
||||
info: null
|
||||
default:
|
||||
- "nrReadsNrUMIsPerCB.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--umiFreqTop"
|
||||
description: "Path to an output file that contains a .tsv formatted table describing\n\
|
||||
per UMI (column UB) the frequency at which they occur in the reads (column\n\
|
||||
N). Only the top 100 UMIs are included.\n"
|
||||
info: null
|
||||
default:
|
||||
- "umiFreqTop100.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--threads"
|
||||
description: "Number of threads to use for decompressing BAM files.\n"
|
||||
info: null
|
||||
default:
|
||||
- 1
|
||||
required: false
|
||||
min: 1
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Generate summary statistics from BAM files generated by STAR solo."
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "test.sam"
|
||||
info: null
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "debian:stable-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "docker"
|
||||
env:
|
||||
- "PIP_BREAK_SYSTEM_PACKAGES=1"
|
||||
- "HTSLIB_LIBRARY_DIR=/usr/lib/"
|
||||
- "HTSLIB_INCLUDE_DIR=/usr/include/"
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "python3"
|
||||
- "python3-pip"
|
||||
- "python3-venv"
|
||||
- "python-is-python3"
|
||||
- "libhts-dev"
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "pysam"
|
||||
- "pandas"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/stats/generate_well_statistics/config.vsh.yaml"
|
||||
runner: "nextflow"
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/stats/generate_well_statistics"
|
||||
executable: "target/nextflow/stats/generate_well_statistics/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
description: "High-throughput pipeline [WIP]\n"
|
||||
info: null
|
||||
viash_version: "0.9.0-RC7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
|
||||
\ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
|
||||
\ dest: 'nextflow_labels.config'}\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "bioinformatics"
|
||||
- "sequence"
|
||||
- "high-throughput"
|
||||
- "mapping"
|
||||
- "counting"
|
||||
- "pipeline"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/viash-hub/htrnaseq"
|
||||
issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
|
||||
3695
target/nextflow/stats/generate_well_statistics/main.nf
Normal file
3695
target/nextflow/stats/generate_well_statistics/main.nf
Normal file
File diff suppressed because it is too large
Load Diff
125
target/nextflow/stats/generate_well_statistics/nextflow.config
Normal file
125
target/nextflow/stats/generate_well_statistics/nextflow.config
Normal file
@@ -0,0 +1,125 @@
|
||||
manifest {
|
||||
name = 'stats/generate_well_statistics'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'main'
|
||||
description = 'Generate summary statistics from BAM files generated by STAR solo.'
|
||||
}
|
||||
|
||||
process.container = 'nextflow/bash:latest'
|
||||
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
profiles {
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
docker {
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
singularity {
|
||||
singularity.enabled = true
|
||||
singularity.autoMounts = true
|
||||
docker.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
podman {
|
||||
podman.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
shifter {
|
||||
shifter.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
charliecloud {
|
||||
charliecloud.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
}
|
||||
}
|
||||
|
||||
process{
|
||||
withLabel: mem1gb { memory = 1000000000.B }
|
||||
withLabel: mem2gb { memory = 2000000000.B }
|
||||
withLabel: mem5gb { memory = 5000000000.B }
|
||||
withLabel: mem10gb { memory = 10000000000.B }
|
||||
withLabel: mem20gb { memory = 20000000000.B }
|
||||
withLabel: mem50gb { memory = 50000000000.B }
|
||||
withLabel: mem100gb { memory = 100000000000.B }
|
||||
withLabel: mem200gb { memory = 200000000000.B }
|
||||
withLabel: mem500gb { memory = 500000000000.B }
|
||||
withLabel: mem1tb { memory = 1000000000000.B }
|
||||
withLabel: mem2tb { memory = 2000000000000.B }
|
||||
withLabel: mem5tb { memory = 5000000000000.B }
|
||||
withLabel: mem10tb { memory = 10000000000000.B }
|
||||
withLabel: mem20tb { memory = 20000000000000.B }
|
||||
withLabel: mem50tb { memory = 50000000000000.B }
|
||||
withLabel: mem100tb { memory = 100000000000000.B }
|
||||
withLabel: mem200tb { memory = 200000000000000.B }
|
||||
withLabel: mem500tb { memory = 500000000000000.B }
|
||||
withLabel: mem1gib { memory = 1073741824.B }
|
||||
withLabel: mem2gib { memory = 2147483648.B }
|
||||
withLabel: mem4gib { memory = 4294967296.B }
|
||||
withLabel: mem8gib { memory = 8589934592.B }
|
||||
withLabel: mem16gib { memory = 17179869184.B }
|
||||
withLabel: mem32gib { memory = 34359738368.B }
|
||||
withLabel: mem64gib { memory = 68719476736.B }
|
||||
withLabel: mem128gib { memory = 137438953472.B }
|
||||
withLabel: mem256gib { memory = 274877906944.B }
|
||||
withLabel: mem512gib { memory = 549755813888.B }
|
||||
withLabel: mem1tib { memory = 1099511627776.B }
|
||||
withLabel: mem2tib { memory = 2199023255552.B }
|
||||
withLabel: mem4tib { memory = 4398046511104.B }
|
||||
withLabel: mem8tib { memory = 8796093022208.B }
|
||||
withLabel: mem16tib { memory = 17592186044416.B }
|
||||
withLabel: mem32tib { memory = 35184372088832.B }
|
||||
withLabel: mem64tib { memory = 70368744177664.B }
|
||||
withLabel: mem128tib { memory = 140737488355328.B }
|
||||
withLabel: mem256tib { memory = 281474976710656.B }
|
||||
withLabel: mem512tib { memory = 562949953421312.B }
|
||||
withLabel: cpu1 { cpus = 1 }
|
||||
withLabel: cpu2 { cpus = 2 }
|
||||
withLabel: cpu5 { cpus = 5 }
|
||||
withLabel: cpu10 { cpus = 10 }
|
||||
withLabel: cpu20 { cpus = 20 }
|
||||
withLabel: cpu50 { cpus = 50 }
|
||||
withLabel: cpu100 { cpus = 100 }
|
||||
withLabel: cpu200 { cpus = 200 }
|
||||
withLabel: cpu500 { cpus = 500 }
|
||||
withLabel: cpu1000 { cpus = 1000 }
|
||||
}
|
||||
|
||||
includeConfig("nextflow_labels.config")
|
||||
@@ -0,0 +1,43 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
maxMemory = null
|
||||
|
||||
// Resource labels
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
|
||||
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
|
||||
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
|
||||
|
||||
}
|
||||
|
||||
def get_memory(to_compare) {
|
||||
if (!process.containsKey("maxMemory") || !process.maxMemory) {
|
||||
return to_compare
|
||||
}
|
||||
|
||||
try {
|
||||
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
|
||||
return process.maxMemory
|
||||
}
|
||||
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
|
||||
return max_memory as nextflow.util.MemoryUnit
|
||||
}
|
||||
else {
|
||||
return to_compare
|
||||
}
|
||||
} catch (all) {
|
||||
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
|
||||
System.exit(1)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema",
|
||||
"title": "generate_well_statistics",
|
||||
"description": "Generate summary statistics from BAM files generated by STAR solo.",
|
||||
"type": "object",
|
||||
"definitions": {
|
||||
|
||||
|
||||
|
||||
"arguments" : {
|
||||
"title": "Arguments",
|
||||
"type": "object",
|
||||
"description": "No description",
|
||||
"properties": {
|
||||
|
||||
|
||||
"input": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, example: `input.bam`. The ",
|
||||
"help_text": "Type: `file`, example: `input.bam`. The .bam file as returned by the mapping tool STAR."
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"barcode": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, required. The barcode for the well that is being processed",
|
||||
"help_text": "Type: `string`, required. The barcode for the well that is being processed. Is only used to add a metadata\ncolumn to all output files.\n"
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"processedBAMFile": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a ",
|
||||
"help_text": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a .tsv file listing, per read in the BAM file,\nthe value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome to which the read was mapped to.\n"
|
||||
,
|
||||
"default": "$id.$key.processedBAMFile.txt"
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"nrReadsNrGenesPerChrom": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a ",
|
||||
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n"
|
||||
,
|
||||
"default": "$id.$key.nrReadsNrGenesPerChrom.txt"
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"nrReadsNrUMIsPerCB": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a ",
|
||||
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a .tsv formatted table describing\nper barcode the number of UMI\u0027s (nrUMIs) and the total number of reads (NumberOfReads).\n"
|
||||
,
|
||||
"default": "$id.$key.nrReadsNrUMIsPerCB.txt"
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"umiFreqTop": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a ",
|
||||
"help_text": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a .tsv formatted table describing\nper UMI (column UB) the frequency at which they occur in the reads (column\nN). Only the top 100 UMIs are included.\n"
|
||||
,
|
||||
"default": "$id.$key.umiFreqTop.txt"
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"threads": {
|
||||
"type":
|
||||
"integer",
|
||||
"description": "Type: `integer`, default: `1`. Number of threads to use for decompressing BAM files",
|
||||
"help_text": "Type: `integer`, default: `1`. Number of threads to use for decompressing BAM files.\n"
|
||||
,
|
||||
"default": "1"
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
"nextflow input-output arguments" : {
|
||||
"title": "Nextflow input-output arguments",
|
||||
"type": "object",
|
||||
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
||||
"properties": {
|
||||
|
||||
|
||||
"publish_dir": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
||||
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"param_list": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
||||
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
||||
"hidden": true
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
|
||||
{
|
||||
"$ref": "#/definitions/arguments"
|
||||
},
|
||||
|
||||
{
|
||||
"$ref": "#/definitions/nextflow input-output arguments"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -80,6 +80,17 @@ argument_groups:
|
||||
direction: "output"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--nrReadsNrGenesPerChrom"
|
||||
info: null
|
||||
default:
|
||||
- "nrReadsNrGenesPerChrom.txt"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "nextflow_script"
|
||||
path: "main.nf"
|
||||
@@ -94,6 +105,12 @@ requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
dependencies:
|
||||
- name: "stats/generate_pool_statistics"
|
||||
repository:
|
||||
type: "local"
|
||||
- name: "stats/generate_well_statistics"
|
||||
repository:
|
||||
type: "local"
|
||||
- name: "workflows/well_demultiplex"
|
||||
repository:
|
||||
type: "local"
|
||||
@@ -198,9 +215,11 @@ build_info:
|
||||
output: "target/nextflow/workflows/htrnaseq"
|
||||
executable: "target/nextflow/workflows/htrnaseq/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
dependencies:
|
||||
- "target/nextflow/stats/generate_pool_statistics"
|
||||
- "target/nextflow/stats/generate_well_statistics"
|
||||
- "target/nextflow/workflows/well_demultiplex"
|
||||
- "target/nextflow/workflows/parallel_map_wf"
|
||||
- "target/nextflow/workflows/utils/groupWells"
|
||||
|
||||
@@ -2893,6 +2893,19 @@ meta = [
|
||||
"direction" : "output",
|
||||
"multiple" : true,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--nrReadsNrGenesPerChrom",
|
||||
"default" : [
|
||||
"nrReadsNrGenesPerChrom.txt"
|
||||
],
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
"direction" : "output",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -2917,6 +2930,18 @@ meta = [
|
||||
]
|
||||
},
|
||||
"dependencies" : [
|
||||
{
|
||||
"name" : "stats/generate_pool_statistics",
|
||||
"repository" : {
|
||||
"type" : "local"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "stats/generate_well_statistics",
|
||||
"repository" : {
|
||||
"type" : "local"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "workflows/well_demultiplex",
|
||||
"repository" : {
|
||||
@@ -3054,7 +3079,7 @@ meta = [
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/workflows/htrnaseq",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
|
||||
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3090,6 +3115,8 @@ meta = [
|
||||
|
||||
// resolve dependencies dependencies (if any)
|
||||
meta["root_dir"] = getRootDir()
|
||||
include { generate_pool_statistics } from "${meta.resources_dir}/../../../nextflow/stats/generate_pool_statistics/main.nf"
|
||||
include { generate_well_statistics } from "${meta.resources_dir}/../../../nextflow/stats/generate_well_statistics/main.nf"
|
||||
include { well_demultiplex } from "${meta.resources_dir}/../../../nextflow/workflows/well_demultiplex/main.nf"
|
||||
include { parallel_map_wf } from "${meta.resources_dir}/../../../nextflow/workflows/parallel_map_wf/main.nf"
|
||||
include { groupWells } from "${meta.resources_dir}/../../../nextflow/workflows/utils/groupWells/main.nf"
|
||||
@@ -3167,12 +3194,47 @@ workflow run_wf {
|
||||
state + ["star_output": result.output]
|
||||
},
|
||||
)
|
||||
| generate_well_statistics.run(
|
||||
fromState: { id, state ->
|
||||
[
|
||||
"input": state.star_output.resolve('Aligned.sortedByCoord.out.bam'),
|
||||
"barcode": state.barcode,
|
||||
]
|
||||
},
|
||||
toState: [
|
||||
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
|
||||
"nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB",
|
||||
]
|
||||
)
|
||||
| map {id, state ->
|
||||
[state.pool, id, state]
|
||||
}
|
||||
| groupTuple(by: 0, sort: "hash")
|
||||
| map {id, well_ids, states ->
|
||||
def collected_state = [
|
||||
"fastq_output_r1": states.collect{it.fastq_output_r1[0]},
|
||||
"fastq_output_r2": states.collect{it.fastq_output_r2[0]},
|
||||
"nrReadsNrGenesPerChrom": states.collect{it.nrReadsNrGenesPerChrom},
|
||||
]
|
||||
def newState = states[0] + collected_state
|
||||
[id, newState]
|
||||
}
|
||||
| generate_pool_statistics.run(
|
||||
fromState: [
|
||||
"nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
|
||||
],
|
||||
toState: {id, result, state ->
|
||||
state + ["nrReadsNrGenesPerChrom": result.nrReadsNrGenesPerChromPool]
|
||||
}
|
||||
)
|
||||
| niceView()
|
||||
| setState(["star_output", "fastq_output_r1", "fastq_output_r2", "star_output"])
|
||||
|
||||
//| niceView()
|
||||
//
|
||||
//| setState( [ "output": "out" ] )
|
||||
| setState([
|
||||
"star_output",
|
||||
"fastq_output_r1",
|
||||
"fastq_output_r2",
|
||||
"star_output",
|
||||
"nrReadsNrGenesPerChrom",
|
||||
])
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
|
||||
@@ -96,6 +96,17 @@
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"nrReadsNrGenesPerChrom": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `file`, required, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. ",
|
||||
"help_text": "Type: `file`, required, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. "
|
||||
,
|
||||
"default": "$id.$key.nrReadsNrGenesPerChrom.txt"
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ argument_groups:
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: true
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "nextflow_script"
|
||||
@@ -161,7 +161,7 @@ build_info:
|
||||
output: "target/nextflow/workflows/parallel_map_wf"
|
||||
executable: "target/nextflow/workflows/parallel_map_wf/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
dependencies:
|
||||
- "target/nextflow/parallel_map"
|
||||
|
||||
@@ -2858,7 +2858,7 @@ meta = [
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
"direction" : "output",
|
||||
"multiple" : true,
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
@@ -2996,7 +2996,7 @@ meta = [
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/workflows/parallel_map_wf",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
|
||||
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3042,8 +3042,7 @@ workflow run_wf {
|
||||
input_ch
|
||||
|
||||
main:
|
||||
output_ch = input_ch
|
||||
| map {id, state -> [id, state + ["orig_id": id]]}
|
||||
pool_ch = input_ch
|
||||
| groupWells.run(
|
||||
fromState: { id, state ->
|
||||
[
|
||||
@@ -3058,7 +3057,6 @@ workflow run_wf {
|
||||
"wells": result.wells,
|
||||
"input_r1": result.output_r1,
|
||||
"input_r2": result.output_r2,
|
||||
"_meta": ["join_id": state.orig_id]
|
||||
]
|
||||
}
|
||||
)
|
||||
@@ -3072,7 +3070,7 @@ workflow run_wf {
|
||||
"pool": state.pool,
|
||||
"wellBarcodesLength": 10,
|
||||
"umiLength": 10,
|
||||
"output": state.output[0],
|
||||
"output": state.output,
|
||||
]
|
||||
},
|
||||
toState: { id, result, state ->
|
||||
@@ -3082,8 +3080,33 @@ workflow run_wf {
|
||||
},
|
||||
directives: [label: ["midmem", "midcpu"]]
|
||||
)
|
||||
| setState(["output", "_meta"])
|
||||
|
||||
| setState(["output"])
|
||||
|
||||
input_join_ch = input_ch
|
||||
| map {id, state ->
|
||||
[state.pool, id, state]
|
||||
}
|
||||
output_ch = input_join_ch.combine(pool_ch, by: 0)
|
||||
| map {pool, well_id, state_well, state_pool ->
|
||||
well_output = state_pool.output.findAll{star_output_dir ->
|
||||
def barcodes_list = []
|
||||
def barcode_file_regex = ~/.*\/raw\/barcodes\.tsv$/
|
||||
star_output_dir.eachFileRecurse{barcode_file ->
|
||||
if (barcode_file =~ barcode_file_regex) {
|
||||
assert barcode_file.countLines() == 1, "Expected only one barcode in a single STAR output."
|
||||
barcodes_list.add(barcode_file.text.trim())
|
||||
}
|
||||
}
|
||||
assert barcodes_list.size() == 1, "Exactly one file should have matched the barcodes file regex (found: $barcodes_list)."
|
||||
def barcode
|
||||
barcodes_list.each{ it -> barcode = it }
|
||||
return barcode == state_well.barcode
|
||||
}
|
||||
assert well_output.size() == 1, "Two or more outputs from the mapping seemed to have processed barcode '$barcode'."
|
||||
[well_id, ["output": well_output[0]]]
|
||||
}
|
||||
|
||||
|
||||
emit:
|
||||
output_ch
|
||||
}
|
||||
|
||||
@@ -67,10 +67,10 @@
|
||||
"output": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: List of `file`, required, default: `$id.$key.output_*.output_*`, multiple_sep: `\";\"`. ",
|
||||
"help_text": "Type: List of `file`, required, default: `$id.$key.output_*.output_*`, multiple_sep: `\";\"`. "
|
||||
"description": "Type: `file`, required, default: `$id.$key.output.output`. ",
|
||||
"help_text": "Type: `file`, required, default: `$id.$key.output.output`. "
|
||||
,
|
||||
"default": "$id.$key.output_*.output_*"
|
||||
"default": "$id.$key.output.output"
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ build_info:
|
||||
output: "target/nextflow/workflows/utils/groupWells"
|
||||
executable: "target/nextflow/workflows/utils/groupWells/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
|
||||
@@ -3007,7 +3007,7 @@ meta = [
|
||||
"engine" : "native",
|
||||
"output" : "target/nextflow/workflows/utils/groupWells",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
|
||||
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
|
||||
@@ -197,7 +197,7 @@ build_info:
|
||||
output: "target/nextflow/workflows/well_demultiplex"
|
||||
executable: "target/nextflow/workflows/well_demultiplex/main.nf"
|
||||
viash_version: "0.9.0-RC7"
|
||||
git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
|
||||
git_commit: "b98f6367d672368af134843711a46d3b53717187"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
dependencies:
|
||||
- "target/dependencies/vsh/vsh/biobox/v0.1.0/nextflow/cutadapt"
|
||||
|
||||
@@ -3044,7 +3044,7 @@ meta = [
|
||||
"engine" : "native|native",
|
||||
"output" : "target/nextflow/workflows/well_demultiplex",
|
||||
"viash_version" : "0.9.0-RC7",
|
||||
"git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
|
||||
"git_commit" : "b98f6367d672368af134843711a46d3b53717187",
|
||||
"git_remote" : "https://github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
|
||||
Reference in New Issue
Block a user