Build branch main with version main (82647a4)
Build pipeline: viash-hub.htrnaseq.main-8kbhw
Source commit: 82647a421d
Source message: Assert that the Well ID matches the required format (#22)
This commit is contained in:
@@ -1,6 +1,21 @@
|
||||
name: "combine_star_logs"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
@@ -183,8 +198,8 @@ build_info:
|
||||
output: "target/executable/stats/combine_star_logs"
|
||||
executable: "target/executable/stats/combine_star_logs/combine_star_logs"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
|
||||
@@ -10,6 +10,9 @@
|
||||
# authors of this component should specify the license in the header of such
|
||||
# files, or include a separate license file detailing the licenses of all included
|
||||
# files.
|
||||
#
|
||||
# Component authors:
|
||||
# * Dries Schaumont (author, maintainer)
|
||||
|
||||
set -e
|
||||
|
||||
@@ -485,10 +488,11 @@ RUN apt-get update && \
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --upgrade --no-cache-dir "pandas"
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component stats combine_star_logs"
|
||||
LABEL org.opencontainers.image.created="2024-11-05T15:39:41Z"
|
||||
LABEL org.opencontainers.image.created="2024-12-17T13:58:22Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
LABEL org.opencontainers.image.revision="82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
LABEL org.opencontainers.image.version="main"
|
||||
|
||||
VIASHDOCKER
|
||||
|
||||
@@ -70,11 +70,14 @@ profiles {
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
|
||||
@@ -1,6 +1,33 @@
|
||||
name: "generate_pool_statistics"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
@@ -155,8 +182,8 @@ build_info:
|
||||
output: "target/executable/stats/generate_pool_statistics"
|
||||
executable: "target/executable/stats/generate_pool_statistics/generate_pool_statistics"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
|
||||
@@ -10,6 +10,10 @@
|
||||
# authors of this component should specify the license in the header of such
|
||||
# files, or include a separate license file detailing the licenses of all included
|
||||
# files.
|
||||
#
|
||||
# Component authors:
|
||||
# * Dries Schaumont (author, maintainer)
|
||||
# * Marijke Van Moerbeke (contributor)
|
||||
|
||||
set -e
|
||||
|
||||
@@ -477,10 +481,11 @@ RUN apt-get update && \
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --upgrade --no-cache-dir "pandas"
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component stats generate_pool_statistics"
|
||||
LABEL org.opencontainers.image.created="2024-11-05T15:39:43Z"
|
||||
LABEL org.opencontainers.image.created="2024-12-17T13:58:22Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
LABEL org.opencontainers.image.revision="82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
LABEL org.opencontainers.image.version="main"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -1073,6 +1078,8 @@ dep = {
|
||||
|
||||
### VIASH END
|
||||
|
||||
INDEX_COL = ["WellBC", "WellID"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
#########
|
||||
# nrReadsNrGenesPerChrom file
|
||||
@@ -1082,17 +1089,18 @@ if __name__ == "__main__":
|
||||
nr_reads_nr_genes_wells.append(pd.read_csv(nr_reads_nr_genes_file,
|
||||
header=0, delimiter="\\t",
|
||||
dtype={"WellBC": pd.StringDtype(),
|
||||
"WellID": pd.StringDtype(),
|
||||
"Chr": pd.StringDtype(),
|
||||
"NumberOfReads": pd.UInt64Dtype(),
|
||||
"NumberOfGenes": pd.UInt64Dtype()}))
|
||||
nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True,)
|
||||
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr",
|
||||
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index=INDEX_COL, columns="Chr",
|
||||
values=["NumberOfReads"], fill_value=0,
|
||||
aggfunc="sum").droplevel(0, axis=1)
|
||||
total_nr_reads_per_chromosome.columns.name = None
|
||||
|
||||
##### Total number of genes from all chromosomes
|
||||
total_nr_genes = nr_reads_nr_genes_pool.loc[:,['WellBC', 'NumberOfGenes']].groupby("WellBC").sum()
|
||||
total_nr_genes = nr_reads_nr_genes_pool.loc[:, INDEX_COL + ['NumberOfGenes']].groupby(["WellBC", "WellID"]).sum()
|
||||
|
||||
##### Total counts across (irrespective of chromosome)
|
||||
total_sum_of_reads = total_nr_reads_per_chromosome.sum(numeric_only=True, axis=1)
|
||||
@@ -1141,10 +1149,10 @@ if __name__ == "__main__":
|
||||
**cols_to_add
|
||||
)
|
||||
|
||||
total_nr_reads_per_chromosome.reset_index(names="WellBC")\\
|
||||
total_nr_reads_per_chromosome.reset_index(names=INDEX_COL)\\
|
||||
.to_csv(par["nrReadsNrGenesPerChromPool"], sep="\\t",
|
||||
header=True, index=False, float_format="%g",
|
||||
columns=("WellBC",) + tuple(chromosome_names) + tuple(cols_to_add.keys())
|
||||
columns=tuple(INDEX_COL) + tuple(chromosome_names) + tuple(cols_to_add.keys())
|
||||
)
|
||||
VIASHMAIN
|
||||
python -B "\$tempscript" &
|
||||
|
||||
@@ -70,11 +70,14 @@ profiles {
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
|
||||
@@ -1,6 +1,33 @@
|
||||
name: "generate_well_statistics"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
@@ -25,6 +52,15 @@ argument_groups:
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--well_id"
|
||||
description: "ID of this well. Only used to add a metadata column to the output\
|
||||
\ files.\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--processedBAMFile"
|
||||
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
|
||||
@@ -226,8 +262,8 @@ build_info:
|
||||
output: "target/executable/stats/generate_well_statistics"
|
||||
executable: "target/executable/stats/generate_well_statistics/generate_well_statistics"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
|
||||
@@ -10,6 +10,10 @@
|
||||
# authors of this component should specify the license in the header of such
|
||||
# files, or include a separate license file detailing the licenses of all included
|
||||
# files.
|
||||
#
|
||||
# Component authors:
|
||||
# * Dries Schaumont (author, maintainer)
|
||||
# * Marijke Van Moerbeke (contributor)
|
||||
|
||||
set -e
|
||||
|
||||
@@ -187,6 +191,10 @@ function ViashHelp {
|
||||
echo " metadata"
|
||||
echo " column to all output files."
|
||||
echo ""
|
||||
echo " --well_id"
|
||||
echo " type: string, required parameter"
|
||||
echo " ID of this well. Only used to add a metadata column to the output files."
|
||||
echo ""
|
||||
echo " --processedBAMFile"
|
||||
echo " type: file, output, file must exist"
|
||||
echo " default: processedBamFile.txt"
|
||||
@@ -510,10 +518,11 @@ RUN apt-get update && \
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --upgrade --no-cache-dir "pysam" "pandas"
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component stats generate_well_statistics"
|
||||
LABEL org.opencontainers.image.created="2024-11-05T15:39:43Z"
|
||||
LABEL org.opencontainers.image.created="2024-12-17T13:58:23Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
LABEL org.opencontainers.image.revision="82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
LABEL org.opencontainers.image.version="main"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -675,6 +684,17 @@ while [[ $# -gt 0 ]]; do
|
||||
VIASH_PAR_BARCODE=$(ViashRemoveFlags "$1")
|
||||
shift 1
|
||||
;;
|
||||
--well_id)
|
||||
[ -n "$VIASH_PAR_WELL_ID" ] && ViashError Bad arguments for option \'--well_id\': \'$VIASH_PAR_WELL_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
||||
VIASH_PAR_WELL_ID="$2"
|
||||
[ $# -lt 2 ] && ViashError Not enough arguments passed to --well_id. Use "--help" to get more information on the parameters. && exit 1
|
||||
shift 2
|
||||
;;
|
||||
--well_id=*)
|
||||
[ -n "$VIASH_PAR_WELL_ID" ] && ViashError Bad arguments for option \'--well_id=*\': \'$VIASH_PAR_WELL_ID\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
||||
VIASH_PAR_WELL_ID=$(ViashRemoveFlags "$1")
|
||||
shift 1
|
||||
;;
|
||||
--processedBAMFile)
|
||||
[ -n "$VIASH_PAR_PROCESSEDBAMFILE" ] && ViashError Bad arguments for option \'--processedBAMFile\': \'$VIASH_PAR_PROCESSEDBAMFILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
||||
VIASH_PAR_PROCESSEDBAMFILE="$2"
|
||||
@@ -906,6 +926,10 @@ if [ -z ${VIASH_PAR_BARCODE+x} ]; then
|
||||
ViashError '--barcode' is a required argument. Use "--help" to get more information on the parameters.
|
||||
exit 1
|
||||
fi
|
||||
if [ -z ${VIASH_PAR_WELL_ID+x} ]; then
|
||||
ViashError '--well_id' is a required argument. Use "--help" to get more information on the parameters.
|
||||
exit 1
|
||||
fi
|
||||
if [ -z ${VIASH_META_NAME+x} ]; then
|
||||
ViashError 'name' is a required argument. Use "--help" to get more information on the parameters.
|
||||
exit 1
|
||||
@@ -1164,6 +1188,7 @@ import logging
|
||||
par = {
|
||||
'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
||||
'barcode': $( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "r'${VIASH_PAR_BARCODE//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
||||
'well_id': $( if [ ! -z ${VIASH_PAR_WELL_ID+x} ]; then echo "r'${VIASH_PAR_WELL_ID//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
||||
'processedBAMFile': $( if [ ! -z ${VIASH_PAR_PROCESSEDBAMFILE+x} ]; then echo "r'${VIASH_PAR_PROCESSEDBAMFILE//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
||||
'nrReadsNrGenesPerChrom': $( if [ ! -z ${VIASH_PAR_NRREADSNRGENESPERCHROM+x} ]; then echo "r'${VIASH_PAR_NRREADSNRGENESPERCHROM//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
||||
'nrReadsNrUMIsPerCB': $( if [ ! -z ${VIASH_PAR_NRREADSNRUMISPERCB+x} ]; then echo "r'${VIASH_PAR_NRREADSNRUMISPERCB//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
||||
@@ -1218,11 +1243,11 @@ if __name__ == "__main__":
|
||||
columns=tags_selection)
|
||||
tag_dataframe_to_write = tag_dataframe.copy()
|
||||
logger.info("Done reading BAM file. Found %i entries", tag_dataframe.shape[0])
|
||||
tag_dataframe.assign(WellBC=par["barcode"])\\
|
||||
tag_dataframe.assign(WellBC=par["barcode"], WellID=par["well_id"])\\
|
||||
.reset_index(names="Chr")\\
|
||||
.to_csv(par["processedBAMFile"], sep="\\t", na_rep="",
|
||||
header=True, index=False,
|
||||
columns=("WellBC", "Chr") + tags_selection)
|
||||
columns=("WellBC", "WellID", "Chr") + tags_selection)
|
||||
logger.info("Constructing of dataframe done.")
|
||||
# Number of genes that had a read mapped to them per chromosome,
|
||||
# and the number of reads mapped to those genes per chromosome.
|
||||
@@ -1232,19 +1257,19 @@ if __name__ == "__main__":
|
||||
)
|
||||
logger.info("Done calculating number of reads per gene and per chromesome. Writing to %s",
|
||||
par['nrReadsNrGenesPerChrom'])
|
||||
nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"])\\
|
||||
nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"], WellID=par["well_id"])\\
|
||||
.to_csv(par["nrReadsNrGenesPerChrom"], sep="\\t",
|
||||
header=True, index=False,
|
||||
columns=("WellBC", "Chr", "NumberOfReads", "NumberOfGenes"))
|
||||
columns=("WellBC", "WellID", "Chr", "NumberOfReads", "NumberOfGenes"))
|
||||
|
||||
# Number of reads mapped to the reference, grouped by UMI
|
||||
nr_read_per_umi = tag_dataframe.groupby('UB').size()\\
|
||||
.drop("", errors="ignore").sort_values(ascending=False).head(100)
|
||||
nr_read_per_umi_df = nr_read_per_umi.to_frame(name="N")
|
||||
logger.info("Done calculating number of mapped reads per UMI, writing to %s", par["umiFreqTop"])
|
||||
nr_read_per_umi_df.assign(WellBC=par["barcode"]).reset_index(names="UB")\\
|
||||
nr_read_per_umi_df.assign(WellBC=par["barcode"], WellID=par["well_id"]).reset_index(names="UB")\\
|
||||
.to_csv(par["umiFreqTop"], header=True, sep="\\t",
|
||||
index=False, columns=("WellBC", "UB", "N"))
|
||||
index=False, columns=("WellBC", "WellID", "UB", "N"))
|
||||
|
||||
# Total number of mapped reads and total number of UMIs (not grouped per chromosome)
|
||||
nr_reads_and_umi_per_barcode = tag_dataframe.groupby(by="CB").agg(
|
||||
@@ -1253,9 +1278,9 @@ if __name__ == "__main__":
|
||||
)
|
||||
logger.info("Done calculating number of mapped reads and number of UMIs per Cell Barcode, writing to %s",
|
||||
par["nrReadsNrUMIsPerCB"])
|
||||
nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"]).reset_index(names="CB")\\
|
||||
nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"], WellID=par["well_id"]).reset_index(names="CB")\\
|
||||
.to_csv(par["nrReadsNrUMIsPerCB"], sep="\\t", header=True,
|
||||
index=False, columns=("WellBC", "CB", "NumberOfReads", "nrUMIs"))
|
||||
index=False, columns=("WellBC", "WellID", "CB", "NumberOfReads", "nrUMIs"))
|
||||
logger.info("Finished!")
|
||||
VIASHMAIN
|
||||
python -B "\$tempscript" &
|
||||
|
||||
@@ -70,11 +70,14 @@ profiles {
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
|
||||
Reference in New Issue
Block a user