Build branch main with version main (82647a4)

Build pipeline: viash-hub.htrnaseq.main-8kbhw

Source commit: 82647a421d

Source message: Assert that the Well ID matches the required format (#22)
This commit is contained in:
CI
2024-12-17 15:30:58 +00:00
parent f2ff92c6ac
commit 554d5253fe
142 changed files with 12149 additions and 384 deletions

View File

@@ -1,6 +1,21 @@
name: "combine_star_logs"
namespace: "stats"
version: "main"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Arguments"
arguments:
@@ -183,8 +198,8 @@ build_info:
output: "target/executable/stats/combine_star_logs"
executable: "target/executable/stats/combine_star_logs/combine_star_logs"
viash_version: "0.9.0"
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -10,6 +10,9 @@
# authors of this component should specify the license in the header of such
# files, or include a separate license file detailing the licenses of all included
# files.
#
# Component authors:
# * Dries Schaumont (author, maintainer)
set -e
@@ -485,10 +488,11 @@ RUN apt-get update && \
RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "pandas"
LABEL org.opencontainers.image.authors="Dries Schaumont"
LABEL org.opencontainers.image.description="Companion container for running component stats combine_star_logs"
LABEL org.opencontainers.image.created="2024-11-05T15:39:41Z"
LABEL org.opencontainers.image.created="2024-12-17T13:58:22Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
LABEL org.opencontainers.image.revision="82647a421dae521a9563f7f02050f13a1319eb4a"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -70,11 +70,14 @@ profiles {
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 12 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }

View File

@@ -1,6 +1,33 @@
name: "generate_pool_statistics"
namespace: "stats"
version: "main"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Marijke Van Moerbeke"
roles:
- "contributor"
info:
links:
github: "mvanmoerbeke"
orcid: "0000-0002-3097-5621"
linkedin: "marijke-van-moerbeke-84303a34"
organizations:
- name: "OpenAnalytics"
href: "https://www.openanalytics.eu"
role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
arguments:
@@ -155,8 +182,8 @@ build_info:
output: "target/executable/stats/generate_pool_statistics"
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
viash_version: "0.9.0"
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -10,6 +10,10 @@
# authors of this component should specify the license in the header of such
# files, or include a separate license file detailing the licenses of all included
# files.
#
# Component authors:
# * Dries Schaumont (author, maintainer)
# * Marijke Van Moerbeke (contributor)
set -e
@@ -477,10 +481,11 @@ RUN apt-get update && \
RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "pandas"
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component stats generate_pool_statistics"
LABEL org.opencontainers.image.created="2024-11-05T15:39:43Z"
LABEL org.opencontainers.image.created="2024-12-17T13:58:22Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
LABEL org.opencontainers.image.revision="82647a421dae521a9563f7f02050f13a1319eb4a"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER
@@ -1073,6 +1078,8 @@ dep = {
### VIASH END
INDEX_COL = ["WellBC", "WellID"]
if __name__ == "__main__":
#########
# nrReadsNrGenesPerChrom file
@@ -1082,17 +1089,18 @@ if __name__ == "__main__":
nr_reads_nr_genes_wells.append(pd.read_csv(nr_reads_nr_genes_file,
header=0, delimiter="\\t",
dtype={"WellBC": pd.StringDtype(),
"WellID": pd.StringDtype(),
"Chr": pd.StringDtype(),
"NumberOfReads": pd.UInt64Dtype(),
"NumberOfGenes": pd.UInt64Dtype()}))
nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True,)
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr",
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index=INDEX_COL, columns="Chr",
values=["NumberOfReads"], fill_value=0,
aggfunc="sum").droplevel(0, axis=1)
total_nr_reads_per_chromosome.columns.name = None
##### Total number of genes from all chromosomes
total_nr_genes = nr_reads_nr_genes_pool.loc[:,['WellBC', 'NumberOfGenes']].groupby("WellBC").sum()
total_nr_genes = nr_reads_nr_genes_pool.loc[:, INDEX_COL + ['NumberOfGenes']].groupby(["WellBC", "WellID"]).sum()
##### Total counts across (irrespective of chromosome)
total_sum_of_reads = total_nr_reads_per_chromosome.sum(numeric_only=True, axis=1)
@@ -1141,10 +1149,10 @@ if __name__ == "__main__":
**cols_to_add
)
total_nr_reads_per_chromosome.reset_index(names="WellBC")\\
total_nr_reads_per_chromosome.reset_index(names=INDEX_COL)\\
.to_csv(par["nrReadsNrGenesPerChromPool"], sep="\\t",
header=True, index=False, float_format="%g",
columns=("WellBC",) + tuple(chromosome_names) + tuple(cols_to_add.keys())
columns=tuple(INDEX_COL) + tuple(chromosome_names) + tuple(cols_to_add.keys())
)
VIASHMAIN
python -B "\$tempscript" &

View File

@@ -70,11 +70,14 @@ profiles {
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 12 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }

View File

@@ -1,6 +1,33 @@
name: "generate_well_statistics"
namespace: "stats"
version: "main"
authors:
- name: "Dries Schaumont"
roles:
- "author"
- "maintainer"
info:
links:
email: "dries@data-intuitive.com"
github: "DriesSchaumont"
orcid: "0000-0002-4389-0440"
linkedin: "dries-schaumont"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Marijke Van Moerbeke"
roles:
- "contributor"
info:
links:
github: "mvanmoerbeke"
orcid: "0000-0002-3097-5621"
linkedin: "marijke-van-moerbeke-84303a34"
organizations:
- name: "OpenAnalytics"
href: "https://www.openanalytics.eu"
role: "Statistical Consultant"
argument_groups:
- name: "Arguments"
arguments:
@@ -25,6 +52,15 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--well_id"
description: "ID of this well. Only used to add a metadata column to the output\
\ files.\n"
info: null
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--processedBAMFile"
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
@@ -226,8 +262,8 @@ build_info:
output: "target/executable/stats/generate_well_statistics"
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
viash_version: "0.9.0"
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"
version: "main"

View File

@@ -10,6 +10,10 @@
# authors of this component should specify the license in the header of such
# files, or include a separate license file detailing the licenses of all included
# files.
#
# Component authors:
# * Dries Schaumont (author, maintainer)
# * Marijke Van Moerbeke (contributor)
set -e
@@ -187,6 +191,10 @@ function ViashHelp {
echo " metadata"
echo " column to all output files."
echo ""
echo " --well_id"
echo " type: string, required parameter"
echo " ID of this well. Only used to add a metadata column to the output files."
echo ""
echo " --processedBAMFile"
echo " type: file, output, file must exist"
echo " default: processedBamFile.txt"
@@ -510,10 +518,11 @@ RUN apt-get update && \
RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "pysam" "pandas"
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component stats generate_well_statistics"
LABEL org.opencontainers.image.created="2024-11-05T15:39:43Z"
LABEL org.opencontainers.image.created="2024-12-17T13:58:23Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
LABEL org.opencontainers.image.revision="82647a421dae521a9563f7f02050f13a1319eb4a"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER
@@ -675,6 +684,17 @@ while [[ $# -gt 0 ]]; do
VIASH_PAR_BARCODE=$(ViashRemoveFlags "$1")
shift 1
;;
--well_id)
[ -n "$VIASH_PAR_WELL_ID" ] && ViashError Bad arguments for option \'--well_id\': \'$VIASH_PAR_WELL_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_WELL_ID="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --well_id. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--well_id=*)
[ -n "$VIASH_PAR_WELL_ID" ] && ViashError Bad arguments for option \'--well_id=*\': \'$VIASH_PAR_WELL_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_WELL_ID=$(ViashRemoveFlags "$1")
shift 1
;;
--processedBAMFile)
[ -n "$VIASH_PAR_PROCESSEDBAMFILE" ] && ViashError Bad arguments for option \'--processedBAMFile\': \'$VIASH_PAR_PROCESSEDBAMFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_PROCESSEDBAMFILE="$2"
@@ -906,6 +926,10 @@ if [ -z ${VIASH_PAR_BARCODE+x} ]; then
ViashError '--barcode' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_PAR_WELL_ID+x} ]; then
ViashError '--well_id' is a required argument. Use "--help" to get more information on the parameters.
exit 1
fi
if [ -z ${VIASH_META_NAME+x} ]; then
ViashError 'name' is a required argument. Use "--help" to get more information on the parameters.
exit 1
@@ -1164,6 +1188,7 @@ import logging
par = {
'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'barcode': $( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "r'${VIASH_PAR_BARCODE//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'well_id': $( if [ ! -z ${VIASH_PAR_WELL_ID+x} ]; then echo "r'${VIASH_PAR_WELL_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'processedBAMFile': $( if [ ! -z ${VIASH_PAR_PROCESSEDBAMFILE+x} ]; then echo "r'${VIASH_PAR_PROCESSEDBAMFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'nrReadsNrGenesPerChrom': $( if [ ! -z ${VIASH_PAR_NRREADSNRGENESPERCHROM+x} ]; then echo "r'${VIASH_PAR_NRREADSNRGENESPERCHROM//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'nrReadsNrUMIsPerCB': $( if [ ! -z ${VIASH_PAR_NRREADSNRUMISPERCB+x} ]; then echo "r'${VIASH_PAR_NRREADSNRUMISPERCB//\'/\'\"\'\"r\'}'"; else echo None; fi ),
@@ -1218,11 +1243,11 @@ if __name__ == "__main__":
columns=tags_selection)
tag_dataframe_to_write = tag_dataframe.copy()
logger.info("Done reading BAM file. Found %i entries", tag_dataframe.shape[0])
tag_dataframe.assign(WellBC=par["barcode"])\\
tag_dataframe.assign(WellBC=par["barcode"], WellID=par["well_id"])\\
.reset_index(names="Chr")\\
.to_csv(par["processedBAMFile"], sep="\\t", na_rep="",
header=True, index=False,
columns=("WellBC", "Chr") + tags_selection)
columns=("WellBC", "WellID", "Chr") + tags_selection)
logger.info("Constructing of dataframe done.")
# Number of genes that had a read mapped to them per chromosome,
# and the number of reads mapped to those genes per chromosome.
@@ -1232,19 +1257,19 @@ if __name__ == "__main__":
)
logger.info("Done calculating number of reads per gene and per chromesome. Writing to %s",
par['nrReadsNrGenesPerChrom'])
nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"])\\
nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"], WellID=par["well_id"])\\
.to_csv(par["nrReadsNrGenesPerChrom"], sep="\\t",
header=True, index=False,
columns=("WellBC", "Chr", "NumberOfReads", "NumberOfGenes"))
columns=("WellBC", "WellID", "Chr", "NumberOfReads", "NumberOfGenes"))
# Number of reads mapped to the reference, grouped by UMI
nr_read_per_umi = tag_dataframe.groupby('UB').size()\\
.drop("", errors="ignore").sort_values(ascending=False).head(100)
nr_read_per_umi_df = nr_read_per_umi.to_frame(name="N")
logger.info("Done calculating number of mapped reads per UMI, writing to %s", par["umiFreqTop"])
nr_read_per_umi_df.assign(WellBC=par["barcode"]).reset_index(names="UB")\\
nr_read_per_umi_df.assign(WellBC=par["barcode"], WellID=par["well_id"]).reset_index(names="UB")\\
.to_csv(par["umiFreqTop"], header=True, sep="\\t",
index=False, columns=("WellBC", "UB", "N"))
index=False, columns=("WellBC", "WellID", "UB", "N"))
# Total number of mapped reads and total number of UMIs (not grouped per chromosome)
nr_reads_and_umi_per_barcode = tag_dataframe.groupby(by="CB").agg(
@@ -1253,9 +1278,9 @@ if __name__ == "__main__":
)
logger.info("Done calculating number of mapped reads and number of UMIs per Cell Barcode, writing to %s",
par["nrReadsNrUMIsPerCB"])
nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"]).reset_index(names="CB")\\
nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"], WellID=par["well_id"]).reset_index(names="CB")\\
.to_csv(par["nrReadsNrUMIsPerCB"], sep="\\t", header=True,
index=False, columns=("WellBC", "CB", "NumberOfReads", "nrUMIs"))
index=False, columns=("WellBC", "WellID", "CB", "NumberOfReads", "nrUMIs"))
logger.info("Finished!")
VIASHMAIN
python -B "\$tempscript" &

View File

@@ -70,11 +70,14 @@ profiles {
local {
// This config is for local processing.
process {
withName: ".*parallel_map_process" {
maxForks = 1
}
maxMemory = 25.GB
withLabel: verylowcpu { cpus = 2 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 6 }
withLabel: highcpu { cpus = 12 }
withLabel: highcpu { cpus = 8 }
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }