Build branch main with version main (b98f636)

Build pipeline: viash-hub.htrnaseq.main-48gzk Source commit: b98f6367d6 Source message: Add BAM statistics calculations on pool and well level (#6)
2024-08-29 12:41:46 +00:00
parent 044a3af7a9
commit bde35f120c
43 changed files with 12072 additions and 43 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,8 @@ testData
 # Nextflow related files
 .nextflow
 .nextflow.log*
-work
+work
+
+# Python related files
+*__pycache__*
+.venv
--- a/src/parallel_map/test.sh
+++ b/src/parallel_map/test.sh
@@ -1,7 +1,7 @@
 set -eo pipefail

 ## VIASH START
-meta_executable="target/executable/parallel_map/parallel_map"
+meta_executable=$(realpath "target/executable/parallel_map/parallel_map")
 ## VIASH END

 # Some helper functions
--- a/src/stats/generate_pool_statistics/config.vsh.yaml
+++ b/src/stats/generate_pool_statistics/config.vsh.yaml
@@ -0,0 +1,51 @@
+name: generate_pool_statistics
+namespace: "stats"
+argument_groups:
+  - name: "Arguments"
+    arguments:
+      - name: "--nrReadsNrGenesPerChrom"
+        type: file
+        multiple: true
+        description: |
+          Path to an output file that contains a .tsv formatted table describing
+          per chromosome the number of reads that were mapped to that chromosome (NumberOfReads
+          column) and the number of genes on that chromosome that had at least one
+          read mapped to it (NumberOfGenes).
+        direction: input
+        default: [processedBamFile_well1.tsv, processedBamfile_well2.tsv]
+      - name: "--nrReadsNrGenesPerChromPool"
+        direction: output
+        type: file
+        multiple: false
+        description: |
+          Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files. Describes
+          per chromosome (as columns) the number of reads, as well as the total number 
+          of reads per cell barcode and the percentage of nuclear, ERCC and mitochondrial
+          reads.
+        example: "nrReadsNrGenesPerChrom.txt"
+
+resources:
+- type: python_script
+  path: script.py
+
+test_resources:
+  - type: python_script
+    path: test.py
+
+engines:
+  - type: docker
+    image: python:3.11-slim
+    setup:
+      - type: apt
+        packages:
+          - procps
+      - type: python
+        packages:
+          - pandas
+    test_setup:
+      - type: python
+        packages:
+          - viashpy
+runners:
+  - type: executable
+  - type: nextflow
--- a/src/stats/generate_pool_statistics/script.py
+++ b/src/stats/generate_pool_statistics/script.py
@@ -0,0 +1,75 @@
+import pandas as pd
+import re
+
+### VIASH START
+par = {
+    "nrReadsNrGenesPerChrom": ["test/nrReadsNrGenesPerChrom_2.txt", "test/nrReadsNrGenesPerChrom.txt"],
+    "nrReadsNrGenesPerChromPool": "nrReadsNrGenesPerChrom_pool.txt"
+}
+
+### VIASH END
+
+if __name__ == "__main__":
+    #########
+    # nrReadsNrGenesPerChrom file
+    #########
+    nr_reads_nr_genes_wells = []
+    for nr_reads_nr_genes_file in par["nrReadsNrGenesPerChrom"]:
+        nr_reads_nr_genes_wells.append(pd.read_csv(nr_reads_nr_genes_file, 
+                                                   header=0, delimiter="\t"))
+    nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True)
+    total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr", 
+                                                                       values=["NumberOfReads"], fill_value=0,
+                                                                       aggfunc="sum").droplevel(0, axis=1)
+    total_nr_reads_per_chromosome.columns.name = None
+
+    ##### Total number of genes from all chromosomes
+    total_nr_genes = nr_reads_nr_genes_pool.loc[:,['WellBC', 'NumberOfGenes']].groupby("WellBC").sum()
+
+    ##### Total counts across (irrespective of chromosome)
+    total_sum_of_reads = total_nr_reads_per_chromosome.sum(numeric_only=True, axis=1) 
+
+    ##### Logic to split up chromosome per type
+    chromosome_names = total_nr_reads_per_chromosome.columns.to_list()
+    chr_regex = re.compile(r"^(chr)?\d+")
+    matching_chromosomes = [chr_name for chr_name 
+                            in chromosome_names
+                            if chr_regex.match(chr_name)]
+    sex_chromosome_names = ["X", "Y"]
+    mitochondrial_chr_name = "MT"
+    # This is logic from the original HT pipeline,
+    # only when all of the matched chromosomes start with "chr", the mitochonrial, X and Y
+    # chromosomes should also start with 'chr'
+    if all(chr_name.startswith("chr") for chr_name in matching_chromosomes):
+       sex_chromosome_names += ["chrX", "chrY"]
+       mitochondrial_chr_name = "chrM"
+
+    ###### Counts for mitochondrial reads
+    try:
+        mitochondrial_reads = total_nr_reads_per_chromosome.loc[:,mitochondrial_chr_name]
+    except KeyError:
+       mitochondrial_reads = 0
+    percentage_mitochondrial_reads = round(mitochondrial_reads / total_sum_of_reads * 100, 2)
+
+    ###### Counts for ERCC reads
+    total_ercc_reads = total_nr_reads_per_chromosome.filter(regex=r"^ERCC").sum(axis=1)
+    percentage_ercc_reads = round(total_ercc_reads / total_sum_of_reads * 100, 2)
+
+    ###### Counts for nuclear chromosomes
+    total_chromosomal_reads = total_nr_reads_per_chromosome.loc[:,matching_chromosomes].sum(axis=1)
+    percentage_chromosomal_reads = round(total_chromosomal_reads / total_sum_of_reads * 100, 2)
+
+    total_nr_reads_per_chromosome = total_nr_reads_per_chromosome.assign(
+        pctChrom=percentage_chromosomal_reads,
+        pctMT=percentage_mitochondrial_reads,
+        pctERCC=percentage_ercc_reads,
+        SumReads=total_sum_of_reads,
+        NumberOfGenes=total_nr_genes,
+    )
+
+    total_nr_reads_per_chromosome.reset_index(names="WellBC")\
+        .to_csv(par["nrReadsNrGenesPerChromPool"], sep="\t",
+                header=True, index=False, 
+                columns=("WellBC",) + tuple(chromosome_names) + \
+                        ("SumReads", "pctMT", "pctERCC", "pctChrom", "NumberOfGenes"))
+
--- a/src/stats/generate_pool_statistics/test.py
+++ b/src/stats/generate_pool_statistics/test.py
@@ -0,0 +1,98 @@
+from uuid import uuid4
+from textwrap import dedent
+from io import StringIO
+import pandas as pd
+import pytest
+import sys
+
+### VIASH START
+meta = {
+    "resources_dir": "./src/stats/generate_pool_statistics/",
+    "executable": "target/executable/stats/generate_pool_statistics/generate_pool_statistics",
+    "config": "src/stats/generate_pool_statistics/config.vsh.yaml"
+}
+### VIASH END
+
+@pytest.fixture
+def random_path(tmp_path):
+    def wrapper(extension=None):
+        extension = "" if not extension else f".{extension}"
+        return tmp_path / f"{uuid4()}{extension}"
+    return wrapper
+
+
+@pytest.fixture
+def random_tsv_path(random_path):
+    def wrapper():
+        return random_path(".tsv")
+    return wrapper
+
+
+@pytest.fixture
+def simple_input_file_one(random_tsv_path, request):
+    prefix = request.param
+    mito_name = f"{prefix}M{'T' if not prefix else ''}"
+
+    contents = dedent(
+    f"""\
+    WellBC	Chr	NumberOfReads	NumberOfGenes
+    AGG	{prefix}1	2	1
+    AGG	{prefix}2	3	2
+    AGG	{prefix}3	4	2
+    AGG	{mito_name}	4	2
+    AGG	{prefix}X	2	3
+    AGG	ERCC-1	1	1
+    AGG	ERCC-2	1	1
+    """)
+    output_file = random_tsv_path()
+    with output_file.open("w") as open_file:
+        open_file.write(contents)
+    return output_file
+
+
+@pytest.fixture
+def simple_input_file_two(random_tsv_path, request):
+    prefix = request.param
+    contents = dedent(
+    f"""\
+    WellBC	Chr	NumberOfReads	NumberOfGenes
+    CCC	{prefix}2	2	1
+    CCC	{prefix}3	3	2
+    CCC	{prefix}5	4	2
+    CCC	{prefix}1	4	2
+    CCC	{prefix}Y	2	3
+    CCC	{prefix}X	2	3
+    CCC	ERCC-3	1	1
+    CCC	ERCC-2	1	1
+    """)
+    output_file = random_tsv_path()
+    with output_file.open("w") as open_file:
+        open_file.write(contents)
+    return output_file
+
+@pytest.mark.parametrize("simple_input_file_one,simple_input_file_two,expected", [("chr", "chr", "chr"), ("", "", "")], 
+                         indirect=["simple_input_file_one", "simple_input_file_two"])
+def test_generate_pool_statistics_simple(run_component, simple_input_file_one,
+                                         simple_input_file_two, random_tsv_path, expected):
+    
+    output_path = random_tsv_path()
+    run_component([
+        "--nrReadsNrGenesPerChrom", simple_input_file_one,
+        "--nrReadsNrGenesPerChrom", simple_input_file_two,
+        "--nrReadsNrGenesPerChromPool", output_path
+    ])
+    mito_name = f"{expected}M{'T' if not expected else ''}"
+    expected_output = StringIO(dedent(
+    f"""\
+    WellBC	ERCC-1	ERCC-2	ERCC-3	{expected}1	{expected}2	{expected}3	{expected}5	{mito_name}	{expected}X	{expected}Y	SumReads	pctMT	pctERCC	pctChrom	NumberOfGenes
+    AGG	1	1	0	2	3	4	0	4	2	0	17	23.53	11.76	52.94	12
+    CCC	0	1	1	4	2	3	4	0	2	2	19	0.0	10.53	68.42	15
+    """))
+    assert output_path.is_file()
+    contents = pd.read_csv(output_path, sep="\t")
+    expected_frame = pd.read_csv(expected_output, sep="\t")
+    pd.testing.assert_frame_equal(contents, expected_frame, check_like=True)
+
+
+if __name__ == '__main__':
+    sys.exit(pytest.main([__file__]))
--- a/src/stats/generate_well_statistics/config.vsh.yaml
+++ b/src/stats/generate_well_statistics/config.vsh.yaml
@@ -0,0 +1,92 @@
+name: generate_well_statistics
+namespace: "stats"
+description: Generate summary statistics from BAM files generated by STAR solo.
+argument_groups:
+  - name: "Arguments"
+    arguments:
+      - name: "--input"
+        type: file
+        description: "The .bam file as returned by the mapping tool STAR."
+        direction: input
+        example: "input.bam"
+      - name: "--barcode"
+        type: string
+        description: |
+          The barcode for the well that is being processed. Is only used to add a metadata
+          column to all output files.
+        required: true
+      - name: "--processedBAMFile"
+        type: file
+        description: |
+          Path to a .tsv file listing, per read in the BAM file,
+          the value for the "CB", "UX", "GX" and "GN" tag, together with the
+          chromsome to which the read was mapped to.
+        direction: output
+        default: processedBamFile.txt
+      - name: "--nrReadsNrGenesPerChrom"
+        type: file
+        description: |
+          Path to an output file that contains a .tsv formatted table describing
+          per chromosome the number of reads that were mapped to that chromosome (NumberOfReads
+          column) and the number of genes on that chromosome that had at least one
+          read mapped to it (NumberOfGenes).
+        default: nrReadsNrGenesPerChrom.txt
+        direction: output
+      - name: "--nrReadsNrUMIsPerCB"
+        type: file
+        description: |
+          Path to an output file that contains a .tsv formatted table describing
+          per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).
+        direction: output
+        default: nrReadsNrUMIsPerCB.txt
+      - name: "--umiFreqTop"
+        type: file
+        description: |
+          Path to an output file that contains a .tsv formatted table describing
+          per UMI (column UB) the frequency at which they occur in the reads (column
+          N). Only the top 100 UMIs are included.
+        default: umiFreqTop100.txt
+        direction: output
+      - name: "--threads"
+        type: integer
+        description: |
+          Number of threads to use for decompressing BAM files.
+        min: 1
+        default: 1
+resources:
+- type: python_script
+  path: script.py
+
+test_resources:
+  - type: python_script
+    path: test.py
+  - path: test.sam
+
+engines:
+  - type: docker
+    image: debian:stable-slim
+    setup:
+      - type: docker
+        env:
+          - PIP_BREAK_SYSTEM_PACKAGES=1
+          - HTSLIB_LIBRARY_DIR=/usr/lib/
+          - HTSLIB_INCLUDE_DIR=/usr/include/
+      - type: apt
+        packages:
+          - python3
+          - python3-pip
+          - python3-venv
+          - python-is-python3
+          - libhts-dev
+          - procps
+      - type: python
+        packages:
+          - pysam
+          - pandas
+    test_setup:
+      - type: python
+        packages:
+          - viashpy
+runners:
+  - type: executable
+  - type: nextflow
--- a/src/stats/generate_well_statistics/script.py
+++ b/src/stats/generate_well_statistics/script.py
@@ -0,0 +1,77 @@
+import pysam
+import pandas as pd
+import logging
+
+### VIASH START
+par = {
+    "input": "src/stats/generate_well_statistics/test.sam",
+    "processedBAMFile": "processedBamFile.txt",
+    "nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom.txt",
+    "nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB.txt",
+    "umiFreqTop": "umiFreqTop.txt",
+    "threads": 1,
+    "barcode": "ACGT"
+}
+### VIASH END
+logger = logging.getLogger()
+console_handler = logging.StreamHandler()
+logger.addHandler(console_handler)
+logger.setLevel(logging.DEBUG)
+
+if __name__ == "__main__":
+    logger.info("Component started.")
+    parameters_str = [f'\t{param}: {param_val}\n' for param, param_val in par.items()]
+    logger.info("Parameters:\n%s", "".join(parameters_str).rstrip())
+    logger.info("Opening '%s'", par["input"])
+    samfile = pysam.AlignmentFile(par["input"], "rb", threads=par["threads"])
+    all_tags = []
+    index = []
+    tags_selection = ("CB", "UB", "GX", "GN")
+    for aligned_segment in samfile:
+        tags = dict(aligned_segment.get_tags())
+        all_tags.append(tags)
+        reference_name = aligned_segment.reference_name
+        index.append("*" if not reference_name else reference_name)
+    tag_dataframe = pd.DataFrame.from_records(all_tags, index=index,
+                                              columns=tags_selection)
+    tag_dataframe_to_write = tag_dataframe.copy()
+    logger.info("Done reading BAM file. Found %i entries", tag_dataframe.shape[0])
+    tag_dataframe.assign(WellBC=par["barcode"])\
+        .reset_index(names="Chr")\
+        .to_csv(par["processedBAMFile"], sep="\t", na_rep="",
+                header=True, index=False,
+                columns=("WellBC", "Chr") + tags_selection)
+    logger.info("Constructing of dataframe done.")
+    # Number of genes that had a read mapped to them per chromosome,
+    # and the number of reads mapped to those genes per chromosome.
+    nr_reads_nr_genes = tag_dataframe.dropna(subset=["GX"]).groupby(level=0).agg(
+        NumberOfReads=pd.NamedAgg("GX", aggfunc="size"),
+        NumberOfGenes=pd.NamedAgg(column="GX", aggfunc="nunique")
+    )
+    logger.info("Done calculating number of reads per gene and per chromesome. Writing to %s",
+                par['nrReadsNrGenesPerChrom'])
+    nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"])\
+        .to_csv(par["nrReadsNrGenesPerChrom"], sep="\t",
+                header=True, index=False, 
+                columns=("WellBC", "Chr", "NumberOfReads", "NumberOfGenes"))
+
+    # Number of reads mapped to the reference, grouped by UMI
+    nr_read_per_umi = tag_dataframe.groupby('UB').size()\
+        .drop("", errors="ignore").sort_values(ascending=False).head(100)
+    nr_read_per_umi_df = nr_read_per_umi.to_frame(name="N")
+    logger.info("Done calculating number of mapped reads per UMI, writing to %s", par["umiFreqTop"])
+    nr_read_per_umi_df.assign(WellBC=par["barcode"]).reset_index(names="UB")\
+        .to_csv(par["umiFreqTop"], header=True, sep="\t", 
+                index=False, columns=("WellBC", "UB", "N"))
+
+    # Total number of mapped reads and total number of UMIs (not grouped per chromosome)
+    nr_reads_and_umi_per_barcode = tag_dataframe.groupby(by="CB").agg(
+        NumberOfReads=pd.NamedAgg("CB", "size"),
+        nrUMIs=pd.NamedAgg("UB", "nunique")
+    )
+    logger.info("Done calculating number of mapped reads and number of UMIs per Cell Barcode, writing to %s",
+                par["nrReadsNrUMIsPerCB"])
+    nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"]).reset_index(names="CB")\
+        .to_csv(par["nrReadsNrUMIsPerCB"], sep="\t", header=True, 
+                index=False, columns=("WellBC", "CB", "NumberOfReads", "nrUMIs"))
+    logger.info("Finished!")
--- a/src/stats/generate_well_statistics/test.py
+++ b/src/stats/generate_well_statistics/test.py
@@ -0,0 +1,110 @@
+import sys
+import pytest
+import pysam
+from uuid import uuid4
+from pathlib import Path
+from textwrap import dedent
+
+### VIASH START
+meta = {
+    "resources_dir": "./src/stats/generate_well_statistics/",
+    "executable": "target/executable/stats/generate_well_statistics/generate_well_statistics",
+    "config": "src/stats/generate_well_statistics/config.vsh.yaml"
+}
+### VIASH END
+
+def assert_file_content_equals(file_to_check, expected):
+    with file_to_check.open('r') as open_file:
+        contents = open_file.read()
+        assert contents == expected
+
+
+@pytest.fixture
+def input_sam_path():
+    return Path(meta["resources_dir"]) / "test.sam"
+
+
+@pytest.fixture
+def random_path(tmp_path):
+    def wrapper(extension=None):
+        extension = "" if not extension else f".{extension}"
+        return tmp_path / f"{uuid4()}{extension}"
+    return wrapper 
+
+@pytest.fixture
+def random_bam_path(random_path):
+    def wrapper():
+        return random_path(".bam")
+    return wrapper
+
+
+@pytest.fixture
+def sam_to_bam(random_bam_path):
+    def wrapper(sam_file):
+        out_path = random_bam_path()
+        with pysam.AlignmentFile(sam_file, "r") as infile, \
+            pysam.AlignmentFile(out_path, "wb", template=infile) as outfile:
+            for s in infile:
+                outfile.write(s)
+        infile.close()
+        return out_path
+    return wrapper
+
+
+def test_generate_well_statistics_simple_bam(run_component, input_sam_path, sam_to_bam, random_path):
+    bam_file = sam_to_bam(input_sam_path)
+    processed_bam = random_path("tsv")
+    reads_per_chromosome = random_path("tsv")
+    nr_reads_nr_umis_per_cb = random_path("tsv")
+    top_onehundred_umis = random_path("tsv")
+    run_component([
+        "--input", bam_file,
+        "--processedBAMFile", processed_bam,
+        "--nrReadsNrGenesPerChrom", reads_per_chromosome,
+        "--nrReadsNrUMIsPerCB", nr_reads_nr_umis_per_cb,
+        "--umiFreqTop", top_onehundred_umis,
+        "--barcode", "ACGT"
+    ])
+    for file_path in (processed_bam, reads_per_chromosome,
+                      nr_reads_nr_umis_per_cb, top_onehundred_umis):
+        assert file_path.is_file()
+
+    expected_processed_bam = \
+    dedent("""\
+    WellBC	Chr	CB	UB	GX	GN
+    ACGT	1	ACA	CGG	gene1	gene1
+    ACGT	1	ACA	CGG	gene1	gene1
+    ACGT	2	GGG	GTT	gene2	gene2
+    ACGT	2	GGG	GTC	gene3	gene3
+    """)
+
+    expected_reads_per_chromosome = \
+    dedent("""\
+    WellBC	Chr	NumberOfReads	NumberOfGenes
+    ACGT	1	2	1
+    ACGT	2	2	2
+    """)
+
+    expected_nr_reads_nr_umis_per_cb = \
+    dedent("""\
+    WellBC	CB	NumberOfReads	nrUMIs
+    ACGT	ACA	2	1
+    ACGT	GGG	2	2
+    """)
+
+    expected_top_onehundred_umis = \
+    dedent("""\
+    WellBC	UB	N
+    ACGT	CGG	2
+    ACGT	GTC	1
+    ACGT	GTT	1
+    """)
+
+    assert_file_content_equals(processed_bam, expected_processed_bam)
+    assert_file_content_equals(reads_per_chromosome, expected_reads_per_chromosome)
+    assert_file_content_equals(nr_reads_nr_umis_per_cb, expected_nr_reads_nr_umis_per_cb)
+    assert_file_content_equals(top_onehundred_umis, expected_top_onehundred_umis)
+
+
+if __name__ == '__main__':
+    sys.exit(pytest.main([__file__]))
--- a/src/stats/generate_well_statistics/test.sam
+++ b/src/stats/generate_well_statistics/test.sam
@@ -0,0 +1,7 @@
+@HD	VN:1.4	SO:coordinate
+@SQ	SN:1	LN:200
+@SQ	SN:2	LN:50
+test_1	16	1	22	255	1M	*	0	0	C	I	NH:i:1	HI:i:1	nM:i:0	AS:i:47	CR:Z:ACA	UR:Z:CGG	GX:Z:gene1	GN:Z:gene1	CB:Z:ACA	UB:Z:CGG
+test_2	16	1	22	255	1M	*	0	0	G	!	NH:i:1	HI:i:1	nM:i:0	AS:i:47	CR:Z:ACA	UR:Z:CGG	GX:Z:gene1	GN:Z:gene1	CB:Z:ACA	UB:Z:CGG
+test_3	0	2	40	255	1M	*	0	0	T	!	NH:i:1	HI:i:1	nM:i:0	AS:i:47	CR:Z:GGG	UR:Z:GTT	GX:Z:gene2	GN:Z:gene2	CB:Z:GGG	UB:Z:GTT
+test_4	0	2	60	255	1M	*	0	0	C	!	NH:i:1	HI:i:1	nM:i:0	AS:i:47	CR:Z:GGG	UR:Z:GTC	GX:Z:gene3	GN:Z:gene3	CB:Z:GGG	UB:Z:GTC
--- a/src/workflows/htrnaseq/config.vsh.yaml
+++ b/src/workflows/htrnaseq/config.vsh.yaml
@@ -40,6 +40,11 @@ argument_groups:
        multiple: true
        required: true
        default: $id/star/*
+      - name: "--nrReadsNrGenesPerChrom"
+        type: file
+        direction: output
+        required: true
+        default: "nrReadsNrGenesPerChrom.txt"
 resources:
  - type: nextflow_script
    path: main.nf
@@ -51,6 +56,10 @@ resources:
 #     entrypoint: test_wf

 dependencies:
+  - name: stats/generate_pool_statistics
+    repository: local
+  - name: stats/generate_well_statistics
+    repository: local
  - name: workflows/well_demultiplex
    repository: local
  - name: workflows/parallel_map_wf
--- a/src/workflows/htrnaseq/main.nf
+++ b/src/workflows/htrnaseq/main.nf
@@ -68,12 +68,47 @@ workflow run_wf {
          state + ["star_output": result.output]
        },
      )
+      | generate_well_statistics.run(
+        fromState: { id, state ->
+          [
+            "input": state.star_output.resolve('Aligned.sortedByCoord.out.bam'),
+            "barcode": state.barcode,
+          ]
+        },
+        toState: [
+          "nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
+          "nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB",
+        ]
+      )
+      | map {id, state -> 
+        [state.pool, id, state]
+      }
+      | groupTuple(by: 0, sort: "hash")
+      | map {id, well_ids, states ->
+        def collected_state = [
+          "fastq_output_r1": states.collect{it.fastq_output_r1[0]},
+          "fastq_output_r2": states.collect{it.fastq_output_r2[0]},
+          "nrReadsNrGenesPerChrom": states.collect{it.nrReadsNrGenesPerChrom},
+        ]
+        def newState = states[0] + collected_state
+        [id, newState]
+      }
+      | generate_pool_statistics.run(
+        fromState: [
+          "nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
+        ],
+        toState: {id, result, state -> 
+          state + ["nrReadsNrGenesPerChrom": result.nrReadsNrGenesPerChromPool]
+        }
+      )
      | niceView()
-      | setState(["star_output", "fastq_output_r1", "fastq_output_r2", "star_output"])
-      
-      //| niceView()
-      //
-      //| setState( [ "output": "out" ] )
+      | setState([
+        "star_output", 
+        "fastq_output_r1",
+        "fastq_output_r2",
+        "star_output",
+        "nrReadsNrGenesPerChrom",
+      ])

  emit:
    output_ch
--- a/src/workflows/parallel_map_wf/config.vsh.yaml
+++ b/src/workflows/parallel_map_wf/config.vsh.yaml
@@ -29,7 +29,6 @@ argument_groups:
      - name: "--output"
        type: file
        direction: output
-        multiple: true
        required: true
 resources:
  - type: nextflow_script
--- a/src/workflows/parallel_map_wf/main.nf
+++ b/src/workflows/parallel_map_wf/main.nf
@@ -3,8 +3,7 @@ workflow run_wf {
    input_ch

    main:
-    output_ch = input_ch
-      | map {id, state -> [id, state + ["orig_id": id]]}
+    pool_ch = input_ch
      | groupWells.run(
        fromState: { id, state ->
          [
@@ -19,7 +18,6 @@ workflow run_wf {
            "wells": result.wells,
            "input_r1": result.output_r1,
            "input_r2": result.output_r2,
-            "_meta": ["join_id": state.orig_id]
          ]
        }
      )
@@ -33,7 +31,7 @@ workflow run_wf {
           "pool": state.pool,
           "wellBarcodesLength": 10,
           "umiLength": 10,
-           "output": state.output[0],
+           "output": state.output,
         ]
        },
        toState: { id, result, state ->
@@ -43,8 +41,33 @@ workflow run_wf {
        },
        directives: [label: ["midmem", "midcpu"]]
      )
-      | setState(["output", "_meta"])
-      
+      | setState(["output"])
+
+    input_join_ch = input_ch
+      | map {id, state ->
+        [state.pool, id, state]
+      }
+    output_ch = input_join_ch.combine(pool_ch, by: 0)
+      | map {pool, well_id, state_well, state_pool ->
+        well_output = state_pool.output.findAll{star_output_dir ->
+          def barcodes_list = []
+          def barcode_file_regex = ~/.*\/raw\/barcodes\.tsv$/
+          star_output_dir.eachFileRecurse{barcode_file ->
+            if (barcode_file =~ barcode_file_regex) {
+              assert barcode_file.countLines() == 1, "Expected only one barcode in a single STAR output."
+              barcodes_list.add(barcode_file.text.trim())
+            }
+          }
+          assert barcodes_list.size() == 1, "Exactly one file should have matched the barcodes file regex (found: $barcodes_list)."
+          def barcode
+          barcodes_list.each{ it -> barcode = it }
+          return barcode == state_well.barcode
+        }
+        assert well_output.size() == 1, "Two or more outputs from the mapping seemed to have processed barcode '$barcode'."
+        [well_id, ["output": well_output[0]]]
+      }
+
+
    emit:
    output_ch
 }
--- a/target/executable/parallel_map/.config.vsh.yaml
+++ b/target/executable/parallel_map/.config.vsh.yaml
@@ -236,7 +236,7 @@ build_info:
  output: "target/executable/parallel_map"
  executable: "target/executable/parallel_map/parallel_map"
  viash_version: "0.9.0-RC7"
-  git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
  git_remote: "https://github.com/viash-hub/htrnaseq"
 package_config:
  name: "htrnaseq"
--- a/target/executable/parallel_map/parallel_map
+++ b/target/executable/parallel_map/parallel_map
@@ -516,9 +516,9 @@ RUN wget -O $STAR_TARGET $STAR_SOURCE && \
  rm $STAR_TARGET && rm -rf /tmp/STAR_$STAR_VERSION

 LABEL org.opencontainers.image.description="Companion container for running component parallel_map"
-LABEL org.opencontainers.image.created="2024-08-29T07:58:04Z"
+LABEL org.opencontainers.image.created="2024-08-29T12:31:01Z"
 LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
-LABEL org.opencontainers.image.revision="21831c2104098ecce57aa9b372e49f865296cc48"
+LABEL org.opencontainers.image.revision="b98f6367d672368af134843711a46d3b53717187"
 LABEL org.opencontainers.image.version="main"

 VIASHDOCKER
--- a/target/executable/stats/generate_pool_statistics/.config.vsh.yaml
+++ b/target/executable/stats/generate_pool_statistics/.config.vsh.yaml
@@ -0,0 +1,186 @@
+name: "generate_pool_statistics"
+namespace: "stats"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--nrReadsNrGenesPerChrom"
+    description: "Path to an output file that contains a .tsv formatted table describing\n\
+      per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
+      column) and the number of genes on that chromosome that had at least one\nread\
+      \ mapped to it (NumberOfGenes).\n"
+    info: null
+    default:
+    - "processedBamFile_well1.tsv"
+    - "processedBamfile_well2.tsv"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--nrReadsNrGenesPerChromPool"
+    description: "Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom\
+      \ files. Describes\nper chromosome (as columns) the number of reads, as well\
+      \ as the total number \nof reads per cell barcode and the percentage of nuclear,\
+      \ ERCC and mitochondrial\nreads.\n"
+    info: null
+    example:
+    - "nrReadsNrGenesPerChrom.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.11-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "pandas"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/stats/generate_pool_statistics/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/stats/generate_pool_statistics"
+  executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
+  viash_version: "0.9.0-RC7"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
+  git_remote: "https://github.com/viash-hub/htrnaseq"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info: null
+  viash_version: "0.9.0-RC7"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/executable/stats/generate_pool_statistics/generate_pool_statistics
+++ b/target/executable/stats/generate_pool_statistics/generate_pool_statistics
--- a/target/executable/stats/generate_pool_statistics/nextflow_labels.config
+++ b/target/executable/stats/generate_pool_statistics/nextflow_labels.config
@@ -0,0 +1,43 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = null
+
+  // Resource labels
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
+  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/executable/stats/generate_well_statistics/.config.vsh.yaml
+++ b/target/executable/stats/generate_well_statistics/.config.vsh.yaml
@@ -0,0 +1,257 @@
+name: "generate_well_statistics"
+namespace: "stats"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    description: "The .bam file as returned by the mapping tool STAR."
+    info: null
+    example:
+    - "input.bam"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--barcode"
+    description: "The barcode for the well that is being processed. Is only used to\
+      \ add a metadata\ncolumn to all output files.\n"
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--processedBAMFile"
+    description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
+      \ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
+      \ to which the read was mapped to.\n"
+    info: null
+    default:
+    - "processedBamFile.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--nrReadsNrGenesPerChrom"
+    description: "Path to an output file that contains a .tsv formatted table describing\n\
+      per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
+      column) and the number of genes on that chromosome that had at least one\nread\
+      \ mapped to it (NumberOfGenes).\n"
+    info: null
+    default:
+    - "nrReadsNrGenesPerChrom.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--nrReadsNrUMIsPerCB"
+    description: "Path to an output file that contains a .tsv formatted table describing\n\
+      per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
+    info: null
+    default:
+    - "nrReadsNrUMIsPerCB.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--umiFreqTop"
+    description: "Path to an output file that contains a .tsv formatted table describing\n\
+      per UMI (column UB) the frequency at which they occur in the reads (column\n\
+      N). Only the top 100 UMIs are included.\n"
+    info: null
+    default:
+    - "umiFreqTop100.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--threads"
+    description: "Number of threads to use for decompressing BAM files.\n"
+    info: null
+    default:
+    - 1
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Generate summary statistics from BAM files generated by STAR solo."
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "test.sam"
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "debian:stable-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "docker"
+    env:
+    - "PIP_BREAK_SYSTEM_PACKAGES=1"
+    - "HTSLIB_LIBRARY_DIR=/usr/lib/"
+    - "HTSLIB_INCLUDE_DIR=/usr/include/"
+  - type: "apt"
+    packages:
+    - "python3"
+    - "python3-pip"
+    - "python3-venv"
+    - "python-is-python3"
+    - "libhts-dev"
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "pysam"
+    - "pandas"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/stats/generate_well_statistics/config.vsh.yaml"
+  runner: "executable"
+  engine: "docker|native"
+  output: "target/executable/stats/generate_well_statistics"
+  executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
+  viash_version: "0.9.0-RC7"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
+  git_remote: "https://github.com/viash-hub/htrnaseq"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info: null
+  viash_version: "0.9.0-RC7"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/executable/stats/generate_well_statistics/generate_well_statistics
+++ b/target/executable/stats/generate_well_statistics/generate_well_statistics
--- a/target/executable/stats/generate_well_statistics/nextflow_labels.config
+++ b/target/executable/stats/generate_well_statistics/nextflow_labels.config
@@ -0,0 +1,43 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = null
+
+  // Resource labels
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
+  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/nextflow/parallel_map/.config.vsh.yaml
+++ b/target/nextflow/parallel_map/.config.vsh.yaml
@@ -236,7 +236,7 @@ build_info:
  output: "target/nextflow/parallel_map"
  executable: "target/nextflow/parallel_map/main.nf"
  viash_version: "0.9.0-RC7"
-  git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
  git_remote: "https://github.com/viash-hub/htrnaseq"
 package_config:
  name: "htrnaseq"
--- a/target/nextflow/parallel_map/main.nf
+++ b/target/nextflow/parallel_map/main.nf
@@ -3087,7 +3087,7 @@ meta = [
    "engine" : "docker|native",
    "output" : "target/nextflow/parallel_map",
    "viash_version" : "0.9.0-RC7",
-    "git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
+    "git_commit" : "b98f6367d672368af134843711a46d3b53717187",
    "git_remote" : "https://github.com/viash-hub/htrnaseq"
  },
  "package_config" : {
--- a/target/nextflow/stats/generate_pool_statistics/.config.vsh.yaml
+++ b/target/nextflow/stats/generate_pool_statistics/.config.vsh.yaml
@@ -0,0 +1,186 @@
+name: "generate_pool_statistics"
+namespace: "stats"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--nrReadsNrGenesPerChrom"
+    description: "Path to an output file that contains a .tsv formatted table describing\n\
+      per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
+      column) and the number of genes on that chromosome that had at least one\nread\
+      \ mapped to it (NumberOfGenes).\n"
+    info: null
+    default:
+    - "processedBamFile_well1.tsv"
+    - "processedBamfile_well2.tsv"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: true
+    multiple_sep: ";"
+  - type: "file"
+    name: "--nrReadsNrGenesPerChromPool"
+    description: "Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom\
+      \ files. Describes\nper chromosome (as columns) the number of reads, as well\
+      \ as the total number \nof reads per cell barcode and the percentage of nuclear,\
+      \ ERCC and mitochondrial\nreads.\n"
+    info: null
+    example:
+    - "nrReadsNrGenesPerChrom.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "python:3.11-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "apt"
+    packages:
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "pandas"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/stats/generate_pool_statistics/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker|native"
+  output: "target/nextflow/stats/generate_pool_statistics"
+  executable: "target/nextflow/stats/generate_pool_statistics/main.nf"
+  viash_version: "0.9.0-RC7"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
+  git_remote: "https://github.com/viash-hub/htrnaseq"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info: null
+  viash_version: "0.9.0-RC7"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/nextflow/stats/generate_pool_statistics/main.nf
+++ b/target/nextflow/stats/generate_pool_statistics/main.nf
--- a/target/nextflow/stats/generate_pool_statistics/nextflow.config
+++ b/target/nextflow/stats/generate_pool_statistics/nextflow.config
@@ -0,0 +1,124 @@
+manifest {
+  name = 'stats/generate_pool_statistics'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = 'main'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/nextflow/stats/generate_pool_statistics/nextflow_labels.config
+++ b/target/nextflow/stats/generate_pool_statistics/nextflow_labels.config
@@ -0,0 +1,43 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = null
+
+  // Resource labels
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
+  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/nextflow/stats/generate_pool_statistics/nextflow_schema.json
+++ b/target/nextflow/stats/generate_pool_statistics/nextflow_schema.json
@@ -0,0 +1,82 @@
+{
+"$schema": "http://json-schema.org/draft-07/schema",
+"title": "generate_pool_statistics",
+"description": "No description",
+"type": "object",
+"definitions": {
+
+    
+    
+    "arguments" : {
+    "title": "Arguments",
+    "type": "object",
+    "description": "No description",
+    "properties": {
+    
+        
+                "nrReadsNrGenesPerChrom": {
+                "type":
+                "string",
+                "description": "Type: List of `file`, default: `processedBamFile_well1.tsv;processedBamfile_well2.tsv`, multiple_sep: `\";\"`. Path to an output file that contains a ",
+                "help_text": "Type: List of `file`, default: `processedBamFile_well1.tsv;processedBamfile_well2.tsv`, multiple_sep: `\";\"`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n"
+            ,
+                "default": "processedBamFile_well1.tsv;processedBamfile_well2.tsv"
+            }
+    
+
+        ,
+                "nrReadsNrGenesPerChromPool": {
+                "type":
+                "string",
+                "description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChromPool.txt`, example: `nrReadsNrGenesPerChrom.txt`. Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files",
+                "help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChromPool.txt`, example: `nrReadsNrGenesPerChrom.txt`. Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files. Describes\nper chromosome (as columns) the number of reads, as well as the total number \nof reads per cell barcode and the percentage of nuclear, ERCC and mitochondrial\nreads.\n"
+            ,
+                "default": "$id.$key.nrReadsNrGenesPerChromPool.txt"
+            }
+    
+
+}
+},
+    
+    
+    "nextflow input-output arguments" : {
+    "title": "Nextflow input-output arguments",
+    "type": "object",
+    "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+    "properties": {
+    
+        
+                "publish_dir": {
+                "type":
+                "string",
+                "description": "Type: `string`, required, example: `output/`. Path to an output directory",
+                "help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
+            
+            }
+    
+
+        ,
+                "param_list": {
+                "type":
+                "string",
+                "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
+                "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
+                "hidden": true
+            
+            }
+    
+
+}
+}
+},
+"allOf": [
+
+    {
+    "$ref": "#/definitions/arguments"
+    },
+
+    {
+    "$ref": "#/definitions/nextflow input-output arguments"
+    }
+]
+}
--- a/target/nextflow/stats/generate_well_statistics/.config.vsh.yaml
+++ b/target/nextflow/stats/generate_well_statistics/.config.vsh.yaml
@@ -0,0 +1,257 @@
+name: "generate_well_statistics"
+namespace: "stats"
+version: "main"
+argument_groups:
+- name: "Arguments"
+  arguments:
+  - type: "file"
+    name: "--input"
+    description: "The .bam file as returned by the mapping tool STAR."
+    info: null
+    example:
+    - "input.bam"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "string"
+    name: "--barcode"
+    description: "The barcode for the well that is being processed. Is only used to\
+      \ add a metadata\ncolumn to all output files.\n"
+    info: null
+    required: true
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--processedBAMFile"
+    description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
+      \ for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome\
+      \ to which the read was mapped to.\n"
+    info: null
+    default:
+    - "processedBamFile.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--nrReadsNrGenesPerChrom"
+    description: "Path to an output file that contains a .tsv formatted table describing\n\
+      per chromosome the number of reads that were mapped to that chromosome (NumberOfReads\n\
+      column) and the number of genes on that chromosome that had at least one\nread\
+      \ mapped to it (NumberOfGenes).\n"
+    info: null
+    default:
+    - "nrReadsNrGenesPerChrom.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--nrReadsNrUMIsPerCB"
+    description: "Path to an output file that contains a .tsv formatted table describing\n\
+      per barcode the number of UMI's (nrUMIs) and the total number of reads (NumberOfReads).\n"
+    info: null
+    default:
+    - "nrReadsNrUMIsPerCB.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "file"
+    name: "--umiFreqTop"
+    description: "Path to an output file that contains a .tsv formatted table describing\n\
+      per UMI (column UB) the frequency at which they occur in the reads (column\n\
+      N). Only the top 100 UMIs are included.\n"
+    info: null
+    default:
+    - "umiFreqTop100.txt"
+    must_exist: true
+    create_parent: true
+    required: false
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
+  - type: "integer"
+    name: "--threads"
+    description: "Number of threads to use for decompressing BAM files.\n"
+    info: null
+    default:
+    - 1
+    required: false
+    min: 1
+    direction: "input"
+    multiple: false
+    multiple_sep: ";"
+resources:
+- type: "python_script"
+  path: "script.py"
+  is_executable: true
+- type: "file"
+  path: "nextflow_labels.config"
+  dest: "nextflow_labels.config"
+description: "Generate summary statistics from BAM files generated by STAR solo."
+test_resources:
+- type: "python_script"
+  path: "test.py"
+  is_executable: true
+- type: "file"
+  path: "test.sam"
+info: null
+status: "enabled"
+requirements:
+  commands:
+  - "ps"
+license: "MIT"
+links:
+  repository: "https://github.com/viash-hub/htrnaseq"
+runners:
+- type: "executable"
+  id: "executable"
+  docker_setup_strategy: "ifneedbepullelsecachedbuild"
+- type: "nextflow"
+  id: "nextflow"
+  directives:
+    tag: "$id"
+  auto:
+    simplifyInput: true
+    simplifyOutput: false
+    transcript: false
+    publish: false
+  config:
+    labels:
+      mem1gb: "memory = 1000000000.B"
+      mem2gb: "memory = 2000000000.B"
+      mem5gb: "memory = 5000000000.B"
+      mem10gb: "memory = 10000000000.B"
+      mem20gb: "memory = 20000000000.B"
+      mem50gb: "memory = 50000000000.B"
+      mem100gb: "memory = 100000000000.B"
+      mem200gb: "memory = 200000000000.B"
+      mem500gb: "memory = 500000000000.B"
+      mem1tb: "memory = 1000000000000.B"
+      mem2tb: "memory = 2000000000000.B"
+      mem5tb: "memory = 5000000000000.B"
+      mem10tb: "memory = 10000000000000.B"
+      mem20tb: "memory = 20000000000000.B"
+      mem50tb: "memory = 50000000000000.B"
+      mem100tb: "memory = 100000000000000.B"
+      mem200tb: "memory = 200000000000000.B"
+      mem500tb: "memory = 500000000000000.B"
+      mem1gib: "memory = 1073741824.B"
+      mem2gib: "memory = 2147483648.B"
+      mem4gib: "memory = 4294967296.B"
+      mem8gib: "memory = 8589934592.B"
+      mem16gib: "memory = 17179869184.B"
+      mem32gib: "memory = 34359738368.B"
+      mem64gib: "memory = 68719476736.B"
+      mem128gib: "memory = 137438953472.B"
+      mem256gib: "memory = 274877906944.B"
+      mem512gib: "memory = 549755813888.B"
+      mem1tib: "memory = 1099511627776.B"
+      mem2tib: "memory = 2199023255552.B"
+      mem4tib: "memory = 4398046511104.B"
+      mem8tib: "memory = 8796093022208.B"
+      mem16tib: "memory = 17592186044416.B"
+      mem32tib: "memory = 35184372088832.B"
+      mem64tib: "memory = 70368744177664.B"
+      mem128tib: "memory = 140737488355328.B"
+      mem256tib: "memory = 281474976710656.B"
+      mem512tib: "memory = 562949953421312.B"
+      cpu1: "cpus = 1"
+      cpu2: "cpus = 2"
+      cpu5: "cpus = 5"
+      cpu10: "cpus = 10"
+      cpu20: "cpus = 20"
+      cpu50: "cpus = 50"
+      cpu100: "cpus = 100"
+      cpu200: "cpus = 200"
+      cpu500: "cpus = 500"
+      cpu1000: "cpus = 1000"
+    script:
+    - "includeConfig(\"nextflow_labels.config\")"
+  debug: false
+  container: "docker"
+engines:
+- type: "docker"
+  id: "docker"
+  image: "debian:stable-slim"
+  target_registry: "images.viash-hub.com"
+  target_tag: "main"
+  namespace_separator: "/"
+  setup:
+  - type: "docker"
+    env:
+    - "PIP_BREAK_SYSTEM_PACKAGES=1"
+    - "HTSLIB_LIBRARY_DIR=/usr/lib/"
+    - "HTSLIB_INCLUDE_DIR=/usr/include/"
+  - type: "apt"
+    packages:
+    - "python3"
+    - "python3-pip"
+    - "python3-venv"
+    - "python-is-python3"
+    - "libhts-dev"
+    - "procps"
+    interactive: false
+  - type: "python"
+    user: false
+    packages:
+    - "pysam"
+    - "pandas"
+    upgrade: true
+  test_setup:
+  - type: "python"
+    user: false
+    packages:
+    - "viashpy"
+    upgrade: true
+  entrypoint: []
+  cmd: null
+- type: "native"
+  id: "native"
+build_info:
+  config: "src/stats/generate_well_statistics/config.vsh.yaml"
+  runner: "nextflow"
+  engine: "docker|native"
+  output: "target/nextflow/stats/generate_well_statistics"
+  executable: "target/nextflow/stats/generate_well_statistics/main.nf"
+  viash_version: "0.9.0-RC7"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
+  git_remote: "https://github.com/viash-hub/htrnaseq"
+package_config:
+  name: "htrnaseq"
+  version: "main"
+  description: "High-throughput pipeline [WIP]\n"
+  info: null
+  viash_version: "0.9.0-RC7"
+  source: "src"
+  target: "target"
+  config_mods:
+  - ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].config.script\
+    \ := 'includeConfig(\"nextflow_labels.config\")'\n.resources += {path: '/src/config/labels.config',\
+    \ dest: 'nextflow_labels.config'}\n"
+  - ".engines += { type: \"native\" }"
+  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
+  - ".engines[.type == 'docker'].target_tag := 'main'"
+  keywords:
+  - "bioinformatics"
+  - "sequence"
+  - "high-throughput"
+  - "mapping"
+  - "counting"
+  - "pipeline"
+  license: "MIT"
+  organization: "vsh"
+  links:
+    repository: "https://github.com/viash-hub/htrnaseq"
+    issue_tracker: "https://github.com/viash-hub/htrnaseq/issues"
--- a/target/nextflow/stats/generate_well_statistics/main.nf
+++ b/target/nextflow/stats/generate_well_statistics/main.nf
--- a/target/nextflow/stats/generate_well_statistics/nextflow.config
+++ b/target/nextflow/stats/generate_well_statistics/nextflow.config
@@ -0,0 +1,125 @@
+manifest {
+  name = 'stats/generate_well_statistics'
+  mainScript = 'main.nf'
+  nextflowVersion = '!>=20.12.1-edge'
+  version = 'main'
+  description = 'Generate summary statistics from BAM files generated by STAR solo.'
+}
+
+process.container = 'nextflow/bash:latest'
+
+// detect tempdir
+tempDir = java.nio.file.Paths.get(
+  System.getenv('NXF_TEMP') ?:
+    System.getenv('VIASH_TEMP') ?: 
+    System.getenv('TEMPDIR') ?: 
+    System.getenv('TMPDIR') ?: 
+    '/tmp'
+).toAbsolutePath()
+
+profiles {
+  no_publish {
+    process {
+      withName: '.*' {
+        publishDir = [
+          enabled: false
+        ]
+      }
+    }
+  }
+  mount_temp {
+    docker.temp            = tempDir
+    podman.temp            = tempDir
+    charliecloud.temp      = tempDir
+  }
+  docker {
+    docker.enabled         = true
+    // docker.userEmulation   = true
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  singularity {
+    singularity.enabled    = true
+    singularity.autoMounts = true
+    docker.enabled         = false
+    podman.enabled         = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  podman {
+    podman.enabled         = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    shifter.enabled        = false
+    charliecloud.enabled   = false
+  }
+  shifter {
+    shifter.enabled        = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    charliecloud.enabled   = false
+  }
+  charliecloud {
+    charliecloud.enabled   = true
+    docker.enabled         = false
+    singularity.enabled    = false
+    podman.enabled         = false
+    shifter.enabled        = false
+  }
+}
+
+process{
+  withLabel: mem1gb { memory = 1000000000.B }
+  withLabel: mem2gb { memory = 2000000000.B }
+  withLabel: mem5gb { memory = 5000000000.B }
+  withLabel: mem10gb { memory = 10000000000.B }
+  withLabel: mem20gb { memory = 20000000000.B }
+  withLabel: mem50gb { memory = 50000000000.B }
+  withLabel: mem100gb { memory = 100000000000.B }
+  withLabel: mem200gb { memory = 200000000000.B }
+  withLabel: mem500gb { memory = 500000000000.B }
+  withLabel: mem1tb { memory = 1000000000000.B }
+  withLabel: mem2tb { memory = 2000000000000.B }
+  withLabel: mem5tb { memory = 5000000000000.B }
+  withLabel: mem10tb { memory = 10000000000000.B }
+  withLabel: mem20tb { memory = 20000000000000.B }
+  withLabel: mem50tb { memory = 50000000000000.B }
+  withLabel: mem100tb { memory = 100000000000000.B }
+  withLabel: mem200tb { memory = 200000000000000.B }
+  withLabel: mem500tb { memory = 500000000000000.B }
+  withLabel: mem1gib { memory = 1073741824.B }
+  withLabel: mem2gib { memory = 2147483648.B }
+  withLabel: mem4gib { memory = 4294967296.B }
+  withLabel: mem8gib { memory = 8589934592.B }
+  withLabel: mem16gib { memory = 17179869184.B }
+  withLabel: mem32gib { memory = 34359738368.B }
+  withLabel: mem64gib { memory = 68719476736.B }
+  withLabel: mem128gib { memory = 137438953472.B }
+  withLabel: mem256gib { memory = 274877906944.B }
+  withLabel: mem512gib { memory = 549755813888.B }
+  withLabel: mem1tib { memory = 1099511627776.B }
+  withLabel: mem2tib { memory = 2199023255552.B }
+  withLabel: mem4tib { memory = 4398046511104.B }
+  withLabel: mem8tib { memory = 8796093022208.B }
+  withLabel: mem16tib { memory = 17592186044416.B }
+  withLabel: mem32tib { memory = 35184372088832.B }
+  withLabel: mem64tib { memory = 70368744177664.B }
+  withLabel: mem128tib { memory = 140737488355328.B }
+  withLabel: mem256tib { memory = 281474976710656.B }
+  withLabel: mem512tib { memory = 562949953421312.B }
+  withLabel: cpu1 { cpus = 1 }
+  withLabel: cpu2 { cpus = 2 }
+  withLabel: cpu5 { cpus = 5 }
+  withLabel: cpu10 { cpus = 10 }
+  withLabel: cpu20 { cpus = 20 }
+  withLabel: cpu50 { cpus = 50 }
+  withLabel: cpu100 { cpus = 100 }
+  withLabel: cpu200 { cpus = 200 }
+  withLabel: cpu500 { cpus = 500 }
+  withLabel: cpu1000 { cpus = 1000 }
+}
+
+includeConfig("nextflow_labels.config")
--- a/target/nextflow/stats/generate_well_statistics/nextflow_labels.config
+++ b/target/nextflow/stats/generate_well_statistics/nextflow_labels.config
@@ -0,0 +1,43 @@
+process {
+  // Default resources for components that hardly do any processing
+  memory = { 2.GB * task.attempt }
+  cpus = 1
+
+  // Retry for exit codes that have something to do with memory issues
+  errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+  maxRetries = 3
+  maxMemory = null
+
+  // Resource labels
+  withLabel: singlecpu { cpus = 1 }
+  withLabel: lowcpu { cpus = 4 }
+  withLabel: midcpu { cpus = 10 }
+  withLabel: highcpu { cpus = 20 }
+  
+  withLabel: lowmem { memory = { get_memory( 4.GB * task.attempt ) } }
+  withLabel: midmem { memory = { get_memory( 25.GB * task.attempt ) } }
+  withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
+  withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
+
+}
+
+def get_memory(to_compare) {
+    if (!process.containsKey("maxMemory") || !process.maxMemory) {
+      return to_compare
+    }
+
+    try {
+      if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
+        return process.maxMemory
+      }
+      else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
+        return max_memory as nextflow.util.MemoryUnit
+      }
+      else {
+        return to_compare
+      }  
+    } catch (all) {
+          println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
+          System.exit(1)
+    }
+  }
--- a/target/nextflow/stats/generate_well_statistics/nextflow_schema.json
+++ b/target/nextflow/stats/generate_well_statistics/nextflow_schema.json
@@ -0,0 +1,135 @@
+{
+"$schema": "http://json-schema.org/draft-07/schema",
+"title": "generate_well_statistics",
+"description": "Generate summary statistics from BAM files generated by STAR solo.",
+"type": "object",
+"definitions": {
+
+    
+    
+    "arguments" : {
+    "title": "Arguments",
+    "type": "object",
+    "description": "No description",
+    "properties": {
+    
+        
+                "input": {
+                "type":
+                "string",
+                "description": "Type: `file`, example: `input.bam`. The ",
+                "help_text": "Type: `file`, example: `input.bam`. The .bam file as returned by the mapping tool STAR."
+            
+            }
+    
+
+        ,
+                "barcode": {
+                "type":
+                "string",
+                "description": "Type: `string`, required. The barcode for the well that is being processed",
+                "help_text": "Type: `string`, required. The barcode for the well that is being processed. Is only used to add a metadata\ncolumn to all output files.\n"
+            
+            }
+    
+
+        ,
+                "processedBAMFile": {
+                "type":
+                "string",
+                "description": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a ",
+                "help_text": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a .tsv file listing, per read in the BAM file,\nthe value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome to which the read was mapped to.\n"
+            ,
+                "default": "$id.$key.processedBAMFile.txt"
+            }
+    
+
+        ,
+                "nrReadsNrGenesPerChrom": {
+                "type":
+                "string",
+                "description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a ",
+                "help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n"
+            ,
+                "default": "$id.$key.nrReadsNrGenesPerChrom.txt"
+            }
+    
+
+        ,
+                "nrReadsNrUMIsPerCB": {
+                "type":
+                "string",
+                "description": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a ",
+                "help_text": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a .tsv formatted table describing\nper barcode the number of UMI\u0027s (nrUMIs) and the total number of reads (NumberOfReads).\n"
+            ,
+                "default": "$id.$key.nrReadsNrUMIsPerCB.txt"
+            }
+    
+
+        ,
+                "umiFreqTop": {
+                "type":
+                "string",
+                "description": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a ",
+                "help_text": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a .tsv formatted table describing\nper UMI (column UB) the frequency at which they occur in the reads (column\nN). Only the top 100 UMIs are included.\n"
+            ,
+                "default": "$id.$key.umiFreqTop.txt"
+            }
+    
+
+        ,
+                "threads": {
+                "type":
+                "integer",
+                "description": "Type: `integer`, default: `1`. Number of threads to use for decompressing BAM files",
+                "help_text": "Type: `integer`, default: `1`. Number of threads to use for decompressing BAM files.\n"
+            ,
+                "default": "1"
+            }
+    
+
+}
+},
+    
+    
+    "nextflow input-output arguments" : {
+    "title": "Nextflow input-output arguments",
+    "type": "object",
+    "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
+    "properties": {
+    
+        
+                "publish_dir": {
+                "type":
+                "string",
+                "description": "Type: `string`, required, example: `output/`. Path to an output directory",
+                "help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
+            
+            }
+    
+
+        ,
+                "param_list": {
+                "type":
+                "string",
+                "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
+                "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
+                "hidden": true
+            
+            }
+    
+
+}
+}
+},
+"allOf": [
+
+    {
+    "$ref": "#/definitions/arguments"
+    },
+
+    {
+    "$ref": "#/definitions/nextflow input-output arguments"
+    }
+]
+}
--- a/target/nextflow/workflows/htrnaseq/.config.vsh.yaml
+++ b/target/nextflow/workflows/htrnaseq/.config.vsh.yaml
@@ -80,6 +80,17 @@ argument_groups:
    direction: "output"
    multiple: true
    multiple_sep: ";"
+  - type: "file"
+    name: "--nrReadsNrGenesPerChrom"
+    info: null
+    default:
+    - "nrReadsNrGenesPerChrom.txt"
+    must_exist: true
+    create_parent: true
+    required: true
+    direction: "output"
+    multiple: false
+    multiple_sep: ";"
 resources:
 - type: "nextflow_script"
  path: "main.nf"
@@ -94,6 +105,12 @@ requirements:
  commands:
  - "ps"
 dependencies:
+- name: "stats/generate_pool_statistics"
+  repository:
+    type: "local"
+- name: "stats/generate_well_statistics"
+  repository:
+    type: "local"
 - name: "workflows/well_demultiplex"
  repository:
    type: "local"
@@ -198,9 +215,11 @@ build_info:
  output: "target/nextflow/workflows/htrnaseq"
  executable: "target/nextflow/workflows/htrnaseq/main.nf"
  viash_version: "0.9.0-RC7"
-  git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
  git_remote: "https://github.com/viash-hub/htrnaseq"
  dependencies:
+  - "target/nextflow/stats/generate_pool_statistics"
+  - "target/nextflow/stats/generate_well_statistics"
  - "target/nextflow/workflows/well_demultiplex"
  - "target/nextflow/workflows/parallel_map_wf"
  - "target/nextflow/workflows/utils/groupWells"
--- a/target/nextflow/workflows/htrnaseq/main.nf
+++ b/target/nextflow/workflows/htrnaseq/main.nf
@@ -2893,6 +2893,19 @@ meta = [
          "direction" : "output",
          "multiple" : true,
          "multiple_sep" : ";"
+        },
+        {
+          "type" : "file",
+          "name" : "--nrReadsNrGenesPerChrom",
+          "default" : [
+            "nrReadsNrGenesPerChrom.txt"
+          ],
+          "must_exist" : true,
+          "create_parent" : true,
+          "required" : true,
+          "direction" : "output",
+          "multiple" : false,
+          "multiple_sep" : ";"
        }
      ]
    }
@@ -2917,6 +2930,18 @@ meta = [
    ]
  },
  "dependencies" : [
+    {
+      "name" : "stats/generate_pool_statistics",
+      "repository" : {
+        "type" : "local"
+      }
+    },
+    {
+      "name" : "stats/generate_well_statistics",
+      "repository" : {
+        "type" : "local"
+      }
+    },
    {
      "name" : "workflows/well_demultiplex",
      "repository" : {
@@ -3054,7 +3079,7 @@ meta = [
    "engine" : "native|native",
    "output" : "target/nextflow/workflows/htrnaseq",
    "viash_version" : "0.9.0-RC7",
-    "git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
+    "git_commit" : "b98f6367d672368af134843711a46d3b53717187",
    "git_remote" : "https://github.com/viash-hub/htrnaseq"
  },
  "package_config" : {
@@ -3090,6 +3115,8 @@ meta = [

 // resolve dependencies dependencies (if any)
 meta["root_dir"] = getRootDir()
+include { generate_pool_statistics } from "${meta.resources_dir}/../../../nextflow/stats/generate_pool_statistics/main.nf"
+include { generate_well_statistics } from "${meta.resources_dir}/../../../nextflow/stats/generate_well_statistics/main.nf"
 include { well_demultiplex } from "${meta.resources_dir}/../../../nextflow/workflows/well_demultiplex/main.nf"
 include { parallel_map_wf } from "${meta.resources_dir}/../../../nextflow/workflows/parallel_map_wf/main.nf"
 include { groupWells } from "${meta.resources_dir}/../../../nextflow/workflows/utils/groupWells/main.nf"
@@ -3167,12 +3194,47 @@ workflow run_wf {
          state + ["star_output": result.output]
        },
      )
+      | generate_well_statistics.run(
+        fromState: { id, state ->
+          [
+            "input": state.star_output.resolve('Aligned.sortedByCoord.out.bam'),
+            "barcode": state.barcode,
+          ]
+        },
+        toState: [
+          "nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
+          "nrReadsNrUMIsPerCB": "nrReadsNrUMIsPerCB",
+        ]
+      )
+      | map {id, state -> 
+        [state.pool, id, state]
+      }
+      | groupTuple(by: 0, sort: "hash")
+      | map {id, well_ids, states ->
+        def collected_state = [
+          "fastq_output_r1": states.collect{it.fastq_output_r1[0]},
+          "fastq_output_r2": states.collect{it.fastq_output_r2[0]},
+          "nrReadsNrGenesPerChrom": states.collect{it.nrReadsNrGenesPerChrom},
+        ]
+        def newState = states[0] + collected_state
+        [id, newState]
+      }
+      | generate_pool_statistics.run(
+        fromState: [
+          "nrReadsNrGenesPerChrom": "nrReadsNrGenesPerChrom",
+        ],
+        toState: {id, result, state -> 
+          state + ["nrReadsNrGenesPerChrom": result.nrReadsNrGenesPerChromPool]
+        }
+      )
      | niceView()
-      | setState(["star_output", "fastq_output_r1", "fastq_output_r2", "star_output"])
-      
-      //| niceView()
-      //
-      //| setState( [ "output": "out" ] )
+      | setState([
+        "star_output", 
+        "fastq_output_r1",
+        "fastq_output_r2",
+        "star_output",
+        "nrReadsNrGenesPerChrom",
+      ])

  emit:
    output_ch
--- a/target/nextflow/workflows/htrnaseq/nextflow_schema.json
+++ b/target/nextflow/workflows/htrnaseq/nextflow_schema.json
@@ -96,6 +96,17 @@
            }
    

+        ,
+                "nrReadsNrGenesPerChrom": {
+                "type":
+                "string",
+                "description": "Type: `file`, required, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. ",
+                "help_text": "Type: `file`, required, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. "
+            ,
+                "default": "$id.$key.nrReadsNrGenesPerChrom.txt"
+            }
+    
+
 }
 },
    
--- a/target/nextflow/workflows/parallel_map_wf/.config.vsh.yaml
+++ b/target/nextflow/workflows/parallel_map_wf/.config.vsh.yaml
@@ -52,7 +52,7 @@ argument_groups:
    create_parent: true
    required: true
    direction: "output"
-    multiple: true
+    multiple: false
    multiple_sep: ";"
 resources:
 - type: "nextflow_script"
@@ -161,7 +161,7 @@ build_info:
  output: "target/nextflow/workflows/parallel_map_wf"
  executable: "target/nextflow/workflows/parallel_map_wf/main.nf"
  viash_version: "0.9.0-RC7"
-  git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
  git_remote: "https://github.com/viash-hub/htrnaseq"
  dependencies:
  - "target/nextflow/parallel_map"
--- a/target/nextflow/workflows/parallel_map_wf/main.nf
+++ b/target/nextflow/workflows/parallel_map_wf/main.nf
@@ -2858,7 +2858,7 @@ meta = [
          "create_parent" : true,
          "required" : true,
          "direction" : "output",
-          "multiple" : true,
+          "multiple" : false,
          "multiple_sep" : ";"
        }
      ]
@@ -2996,7 +2996,7 @@ meta = [
    "engine" : "native|native",
    "output" : "target/nextflow/workflows/parallel_map_wf",
    "viash_version" : "0.9.0-RC7",
-    "git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
+    "git_commit" : "b98f6367d672368af134843711a46d3b53717187",
    "git_remote" : "https://github.com/viash-hub/htrnaseq"
  },
  "package_config" : {
@@ -3042,8 +3042,7 @@ workflow run_wf {
    input_ch

    main:
-    output_ch = input_ch
-      | map {id, state -> [id, state + ["orig_id": id]]}
+    pool_ch = input_ch
      | groupWells.run(
        fromState: { id, state ->
          [
@@ -3058,7 +3057,6 @@ workflow run_wf {
            "wells": result.wells,
            "input_r1": result.output_r1,
            "input_r2": result.output_r2,
-            "_meta": ["join_id": state.orig_id]
          ]
        }
      )
@@ -3072,7 +3070,7 @@ workflow run_wf {
           "pool": state.pool,
           "wellBarcodesLength": 10,
           "umiLength": 10,
-           "output": state.output[0],
+           "output": state.output,
         ]
        },
        toState: { id, result, state ->
@@ -3082,8 +3080,33 @@ workflow run_wf {
        },
        directives: [label: ["midmem", "midcpu"]]
      )
-      | setState(["output", "_meta"])
-      
+      | setState(["output"])
+
+    input_join_ch = input_ch
+      | map {id, state ->
+        [state.pool, id, state]
+      }
+    output_ch = input_join_ch.combine(pool_ch, by: 0)
+      | map {pool, well_id, state_well, state_pool ->
+        well_output = state_pool.output.findAll{star_output_dir ->
+          def barcodes_list = []
+          def barcode_file_regex = ~/.*\/raw\/barcodes\.tsv$/
+          star_output_dir.eachFileRecurse{barcode_file ->
+            if (barcode_file =~ barcode_file_regex) {
+              assert barcode_file.countLines() == 1, "Expected only one barcode in a single STAR output."
+              barcodes_list.add(barcode_file.text.trim())
+            }
+          }
+          assert barcodes_list.size() == 1, "Exactly one file should have matched the barcodes file regex (found: $barcodes_list)."
+          def barcode
+          barcodes_list.each{ it -> barcode = it }
+          return barcode == state_well.barcode
+        }
+        assert well_output.size() == 1, "Two or more outputs from the mapping seemed to have processed barcode '$barcode'."
+        [well_id, ["output": well_output[0]]]
+      }
+
+
    emit:
    output_ch
 }
--- a/target/nextflow/workflows/parallel_map_wf/nextflow_schema.json
+++ b/target/nextflow/workflows/parallel_map_wf/nextflow_schema.json
@@ -67,10 +67,10 @@
                "output": {
                "type":
                "string",
-                "description": "Type: List of `file`, required, default: `$id.$key.output_*.output_*`, multiple_sep: `\";\"`. ",
-                "help_text": "Type: List of `file`, required, default: `$id.$key.output_*.output_*`, multiple_sep: `\";\"`. "
+                "description": "Type: `file`, required, default: `$id.$key.output.output`. ",
+                "help_text": "Type: `file`, required, default: `$id.$key.output.output`. "
            ,
-                "default": "$id.$key.output_*.output_*"
+                "default": "$id.$key.output.output"
            }
    

--- a/target/nextflow/workflows/utils/groupWells/.config.vsh.yaml
+++ b/target/nextflow/workflows/utils/groupWells/.config.vsh.yaml
@@ -171,7 +171,7 @@ build_info:
  output: "target/nextflow/workflows/utils/groupWells"
  executable: "target/nextflow/workflows/utils/groupWells/main.nf"
  viash_version: "0.9.0-RC7"
-  git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
  git_remote: "https://github.com/viash-hub/htrnaseq"
 package_config:
  name: "htrnaseq"
--- a/target/nextflow/workflows/utils/groupWells/main.nf
+++ b/target/nextflow/workflows/utils/groupWells/main.nf
@@ -3007,7 +3007,7 @@ meta = [
    "engine" : "native",
    "output" : "target/nextflow/workflows/utils/groupWells",
    "viash_version" : "0.9.0-RC7",
-    "git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
+    "git_commit" : "b98f6367d672368af134843711a46d3b53717187",
    "git_remote" : "https://github.com/viash-hub/htrnaseq"
  },
  "package_config" : {
--- a/target/nextflow/workflows/well_demultiplex/.config.vsh.yaml
+++ b/target/nextflow/workflows/well_demultiplex/.config.vsh.yaml
@@ -197,7 +197,7 @@ build_info:
  output: "target/nextflow/workflows/well_demultiplex"
  executable: "target/nextflow/workflows/well_demultiplex/main.nf"
  viash_version: "0.9.0-RC7"
-  git_commit: "21831c2104098ecce57aa9b372e49f865296cc48"
+  git_commit: "b98f6367d672368af134843711a46d3b53717187"
  git_remote: "https://github.com/viash-hub/htrnaseq"
  dependencies:
  - "target/dependencies/vsh/vsh/biobox/v0.1.0/nextflow/cutadapt"
--- a/target/nextflow/workflows/well_demultiplex/main.nf
+++ b/target/nextflow/workflows/well_demultiplex/main.nf
@@ -3044,7 +3044,7 @@ meta = [
    "engine" : "native|native",
    "output" : "target/nextflow/workflows/well_demultiplex",
    "viash_version" : "0.9.0-RC7",
-    "git_commit" : "21831c2104098ecce57aa9b372e49f865296cc48",
+    "git_commit" : "b98f6367d672368af134843711a46d3b53717187",
    "git_remote" : "https://github.com/viash-hub/htrnaseq"
  },
  "package_config" : {