Build branch main with version main (82647a4)
Build pipeline: viash-hub.htrnaseq.main-8kbhw
Source commit: 82647a421d
Source message: Assert that the Well ID matches the required format (#22)
This commit is contained in:
@@ -1,6 +1,21 @@
|
||||
name: "combine_star_logs"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
@@ -183,8 +198,8 @@ build_info:
|
||||
output: "target/nextflow/stats/combine_star_logs"
|
||||
executable: "target/nextflow/stats/combine_star_logs/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
|
||||
@@ -8,6 +8,9 @@
|
||||
// authors of this component should specify the license in the header of such
|
||||
// files, or include a separate license file detailing the licenses of all included
|
||||
// files.
|
||||
//
|
||||
// Component authors:
|
||||
// * Dries Schaumont (author, maintainer)
|
||||
|
||||
////////////////////////////
|
||||
// VDSL3 helper functions //
|
||||
@@ -2807,6 +2810,30 @@ meta = [
|
||||
"name" : "combine_star_logs",
|
||||
"namespace" : "stats",
|
||||
"version" : "main",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries Schaumont",
|
||||
"roles" : [
|
||||
"author",
|
||||
"maintainer"
|
||||
],
|
||||
"info" : {
|
||||
"links" : {
|
||||
"email" : "dries@data-intuitive.com",
|
||||
"github" : "DriesSchaumont",
|
||||
"orcid" : "0000-0002-4389-0440",
|
||||
"linkedin" : "dries-schaumont"
|
||||
},
|
||||
"organizations" : [
|
||||
{
|
||||
"name" : "Data Intuitive",
|
||||
"href" : "https://www.data-intuitive.com",
|
||||
"role" : "Data Scientist"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"argument_groups" : [
|
||||
{
|
||||
"name" : "Arguments",
|
||||
@@ -3033,8 +3060,8 @@ meta = [
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/combine_star_logs",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "65dd41d8b1b4a307735c72320c96c0880c75f17f",
|
||||
"git_remote" : "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
"git_commit" : "82647a421dae521a9563f7f02050f13a1319eb4a",
|
||||
"git_remote" : "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "htrnaseq",
|
||||
|
||||
@@ -3,6 +3,7 @@ manifest {
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'main'
|
||||
author = 'Dries Schaumont'
|
||||
}
|
||||
|
||||
process.container = 'nextflow/bash:latest'
|
||||
|
||||
@@ -70,11 +70,14 @@ profiles {
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
|
||||
@@ -60,7 +60,7 @@
|
||||
"description": "Type: `file`, default: `$id.$key.output.txt`. Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)\ngathered from the different input files",
|
||||
"help_text": "Type: `file`, default: `$id.$key.output.txt`. Tab-delimited file describing for each barcode (as the rows), the metrics (as columns)\ngathered from the different input files. \n"
|
||||
,
|
||||
"default": "$id.$key.output.txt"
|
||||
"default":"$id.$key.output.txt"
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,33 @@
|
||||
name: "generate_pool_statistics"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
@@ -155,8 +182,8 @@ build_info:
|
||||
output: "target/nextflow/stats/generate_pool_statistics"
|
||||
executable: "target/nextflow/stats/generate_pool_statistics/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
// authors of this component should specify the license in the header of such
|
||||
// files, or include a separate license file detailing the licenses of all included
|
||||
// files.
|
||||
//
|
||||
// Component authors:
|
||||
// * Dries Schaumont (author, maintainer)
|
||||
// * Marijke Van Moerbeke (contributor)
|
||||
|
||||
////////////////////////////
|
||||
// VDSL3 helper functions //
|
||||
@@ -2807,6 +2811,50 @@ meta = [
|
||||
"name" : "generate_pool_statistics",
|
||||
"namespace" : "stats",
|
||||
"version" : "main",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries Schaumont",
|
||||
"roles" : [
|
||||
"author",
|
||||
"maintainer"
|
||||
],
|
||||
"info" : {
|
||||
"links" : {
|
||||
"email" : "dries@data-intuitive.com",
|
||||
"github" : "DriesSchaumont",
|
||||
"orcid" : "0000-0002-4389-0440",
|
||||
"linkedin" : "dries-schaumont"
|
||||
},
|
||||
"organizations" : [
|
||||
{
|
||||
"name" : "Data Intuitive",
|
||||
"href" : "https://www.data-intuitive.com",
|
||||
"role" : "Data Scientist"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "Marijke Van Moerbeke",
|
||||
"roles" : [
|
||||
"contributor"
|
||||
],
|
||||
"info" : {
|
||||
"links" : {
|
||||
"github" : "mvanmoerbeke",
|
||||
"orcid" : "0000-0002-3097-5621",
|
||||
"linkedin" : "marijke-van-moerbeke-84303a34"
|
||||
},
|
||||
"organizations" : [
|
||||
{
|
||||
"name" : "OpenAnalytics",
|
||||
"href" : "https://www.openanalytics.eu",
|
||||
"role" : "Statistical Consultant"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"argument_groups" : [
|
||||
{
|
||||
"name" : "Arguments",
|
||||
@@ -2996,8 +3044,8 @@ meta = [
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/generate_pool_statistics",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "65dd41d8b1b4a307735c72320c96c0880c75f17f",
|
||||
"git_remote" : "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
"git_commit" : "82647a421dae521a9563f7f02050f13a1319eb4a",
|
||||
"git_remote" : "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "htrnaseq",
|
||||
@@ -3082,6 +3130,8 @@ dep = {
|
||||
|
||||
### VIASH END
|
||||
|
||||
INDEX_COL = ["WellBC", "WellID"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
#########
|
||||
# nrReadsNrGenesPerChrom file
|
||||
@@ -3091,17 +3141,18 @@ if __name__ == "__main__":
|
||||
nr_reads_nr_genes_wells.append(pd.read_csv(nr_reads_nr_genes_file,
|
||||
header=0, delimiter="\\\\t",
|
||||
dtype={"WellBC": pd.StringDtype(),
|
||||
"WellID": pd.StringDtype(),
|
||||
"Chr": pd.StringDtype(),
|
||||
"NumberOfReads": pd.UInt64Dtype(),
|
||||
"NumberOfGenes": pd.UInt64Dtype()}))
|
||||
nr_reads_nr_genes_pool = pd.concat(nr_reads_nr_genes_wells, ignore_index=True,)
|
||||
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index="WellBC", columns="Chr",
|
||||
total_nr_reads_per_chromosome = nr_reads_nr_genes_pool.pivot_table(index=INDEX_COL, columns="Chr",
|
||||
values=["NumberOfReads"], fill_value=0,
|
||||
aggfunc="sum").droplevel(0, axis=1)
|
||||
total_nr_reads_per_chromosome.columns.name = None
|
||||
|
||||
##### Total number of genes from all chromosomes
|
||||
total_nr_genes = nr_reads_nr_genes_pool.loc[:,['WellBC', 'NumberOfGenes']].groupby("WellBC").sum()
|
||||
total_nr_genes = nr_reads_nr_genes_pool.loc[:, INDEX_COL + ['NumberOfGenes']].groupby(["WellBC", "WellID"]).sum()
|
||||
|
||||
##### Total counts across (irrespective of chromosome)
|
||||
total_sum_of_reads = total_nr_reads_per_chromosome.sum(numeric_only=True, axis=1)
|
||||
@@ -3150,10 +3201,10 @@ if __name__ == "__main__":
|
||||
**cols_to_add
|
||||
)
|
||||
|
||||
total_nr_reads_per_chromosome.reset_index(names="WellBC")\\\\
|
||||
total_nr_reads_per_chromosome.reset_index(names=INDEX_COL)\\\\
|
||||
.to_csv(par["nrReadsNrGenesPerChromPool"], sep="\\\\t",
|
||||
header=True, index=False, float_format="%g",
|
||||
columns=("WellBC",) + tuple(chromosome_names) + tuple(cols_to_add.keys())
|
||||
columns=tuple(INDEX_COL) + tuple(chromosome_names) + tuple(cols_to_add.keys())
|
||||
)
|
||||
VIASHMAIN
|
||||
python -B "$tempscript"
|
||||
|
||||
@@ -3,6 +3,7 @@ manifest {
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'main'
|
||||
author = 'Dries Schaumont, Marijke Van Moerbeke'
|
||||
}
|
||||
|
||||
process.container = 'nextflow/bash:latest'
|
||||
|
||||
@@ -70,11 +70,14 @@ profiles {
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
"description": "Type: List of `file`, default: `processedBamFile_well1.tsv;processedBamfile_well2.tsv`, multiple_sep: `\";\"`. Path to an output file that contains a ",
|
||||
"help_text": "Type: List of `file`, default: `processedBamFile_well1.tsv;processedBamfile_well2.tsv`, multiple_sep: `\";\"`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n"
|
||||
,
|
||||
"default": "processedBamFile_well1.tsv;processedBamfile_well2.tsv"
|
||||
"default":"processedBamFile_well1.tsv;processedBamfile_well2.tsv"
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
"description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChromPool.txt`, example: `nrReadsNrGenesPerChrom.txt`. Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files",
|
||||
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChromPool.txt`, example: `nrReadsNrGenesPerChrom.txt`. Pivot table in tsv format of the combined input nrReadsNrGenesPerChrom files. Describes\nper chromosome (as columns) the number of reads, as well as the total number \nof reads per cell barcode and the percentage of nuclear, ERCC and mitochondrial\nreads.\n"
|
||||
,
|
||||
"default": "$id.$key.nrReadsNrGenesPerChromPool.txt"
|
||||
"default":"$id.$key.nrReadsNrGenesPerChromPool.txt"
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,33 @@
|
||||
name: "generate_well_statistics"
|
||||
namespace: "stats"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "author"
|
||||
- "maintainer"
|
||||
info:
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Marijke Van Moerbeke"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
links:
|
||||
github: "mvanmoerbeke"
|
||||
orcid: "0000-0002-3097-5621"
|
||||
linkedin: "marijke-van-moerbeke-84303a34"
|
||||
organizations:
|
||||
- name: "OpenAnalytics"
|
||||
href: "https://www.openanalytics.eu"
|
||||
role: "Statistical Consultant"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
@@ -25,6 +52,15 @@ argument_groups:
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--well_id"
|
||||
description: "ID of this well. Only used to add a metadata column to the output\
|
||||
\ files.\n"
|
||||
info: null
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--processedBAMFile"
|
||||
description: "Path to a .tsv file listing, per read in the BAM file,\nthe value\
|
||||
@@ -226,8 +262,8 @@ build_info:
|
||||
output: "target/nextflow/stats/generate_well_statistics"
|
||||
executable: "target/nextflow/stats/generate_well_statistics/main.nf"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "65dd41d8b1b4a307735c72320c96c0880c75f17f"
|
||||
git_remote: "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
git_commit: "82647a421dae521a9563f7f02050f13a1319eb4a"
|
||||
git_remote: "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
version: "main"
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
// authors of this component should specify the license in the header of such
|
||||
// files, or include a separate license file detailing the licenses of all included
|
||||
// files.
|
||||
//
|
||||
// Component authors:
|
||||
// * Dries Schaumont (author, maintainer)
|
||||
// * Marijke Van Moerbeke (contributor)
|
||||
|
||||
////////////////////////////
|
||||
// VDSL3 helper functions //
|
||||
@@ -2807,6 +2811,50 @@ meta = [
|
||||
"name" : "generate_well_statistics",
|
||||
"namespace" : "stats",
|
||||
"version" : "main",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries Schaumont",
|
||||
"roles" : [
|
||||
"author",
|
||||
"maintainer"
|
||||
],
|
||||
"info" : {
|
||||
"links" : {
|
||||
"email" : "dries@data-intuitive.com",
|
||||
"github" : "DriesSchaumont",
|
||||
"orcid" : "0000-0002-4389-0440",
|
||||
"linkedin" : "dries-schaumont"
|
||||
},
|
||||
"organizations" : [
|
||||
{
|
||||
"name" : "Data Intuitive",
|
||||
"href" : "https://www.data-intuitive.com",
|
||||
"role" : "Data Scientist"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name" : "Marijke Van Moerbeke",
|
||||
"roles" : [
|
||||
"contributor"
|
||||
],
|
||||
"info" : {
|
||||
"links" : {
|
||||
"github" : "mvanmoerbeke",
|
||||
"orcid" : "0000-0002-3097-5621",
|
||||
"linkedin" : "marijke-van-moerbeke-84303a34"
|
||||
},
|
||||
"organizations" : [
|
||||
{
|
||||
"name" : "OpenAnalytics",
|
||||
"href" : "https://www.openanalytics.eu",
|
||||
"role" : "Statistical Consultant"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"argument_groups" : [
|
||||
{
|
||||
"name" : "Arguments",
|
||||
@@ -2834,6 +2882,15 @@ meta = [
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--well_id",
|
||||
"description" : "ID of this well. Only used to add a metadata column to the output files.\n",
|
||||
"required" : true,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--processedBAMFile",
|
||||
@@ -3078,8 +3135,8 @@ meta = [
|
||||
"engine" : "docker|native",
|
||||
"output" : "target/nextflow/stats/generate_well_statistics",
|
||||
"viash_version" : "0.9.0",
|
||||
"git_commit" : "65dd41d8b1b4a307735c72320c96c0880c75f17f",
|
||||
"git_remote" : "https://x-access-token:ghs_McZDF0yobnnHmOEb2Q4JaaB3pzr9mz1VbIOs@github.com/viash-hub/htrnaseq"
|
||||
"git_commit" : "82647a421dae521a9563f7f02050f13a1319eb4a",
|
||||
"git_remote" : "https://x-access-token:ghs_GvoC19gNBNw8DS3yDc8aa44laHZP4K2GBiY3@github.com/viash-hub/htrnaseq"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "htrnaseq",
|
||||
@@ -3138,6 +3195,7 @@ import logging
|
||||
par = {
|
||||
'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'barcode': $( if [ ! -z ${VIASH_PAR_BARCODE+x} ]; then echo "r'${VIASH_PAR_BARCODE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'well_id': $( if [ ! -z ${VIASH_PAR_WELL_ID+x} ]; then echo "r'${VIASH_PAR_WELL_ID//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'processedBAMFile': $( if [ ! -z ${VIASH_PAR_PROCESSEDBAMFILE+x} ]; then echo "r'${VIASH_PAR_PROCESSEDBAMFILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'nrReadsNrGenesPerChrom': $( if [ ! -z ${VIASH_PAR_NRREADSNRGENESPERCHROM+x} ]; then echo "r'${VIASH_PAR_NRREADSNRGENESPERCHROM//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'nrReadsNrUMIsPerCB': $( if [ ! -z ${VIASH_PAR_NRREADSNRUMISPERCB+x} ]; then echo "r'${VIASH_PAR_NRREADSNRUMISPERCB//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
@@ -3192,11 +3250,11 @@ if __name__ == "__main__":
|
||||
columns=tags_selection)
|
||||
tag_dataframe_to_write = tag_dataframe.copy()
|
||||
logger.info("Done reading BAM file. Found %i entries", tag_dataframe.shape[0])
|
||||
tag_dataframe.assign(WellBC=par["barcode"])\\\\
|
||||
tag_dataframe.assign(WellBC=par["barcode"], WellID=par["well_id"])\\\\
|
||||
.reset_index(names="Chr")\\\\
|
||||
.to_csv(par["processedBAMFile"], sep="\\\\t", na_rep="",
|
||||
header=True, index=False,
|
||||
columns=("WellBC", "Chr") + tags_selection)
|
||||
columns=("WellBC", "WellID", "Chr") + tags_selection)
|
||||
logger.info("Constructing of dataframe done.")
|
||||
# Number of genes that had a read mapped to them per chromosome,
|
||||
# and the number of reads mapped to those genes per chromosome.
|
||||
@@ -3206,19 +3264,19 @@ if __name__ == "__main__":
|
||||
)
|
||||
logger.info("Done calculating number of reads per gene and per chromesome. Writing to %s",
|
||||
par['nrReadsNrGenesPerChrom'])
|
||||
nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"])\\\\
|
||||
nr_reads_nr_genes.reset_index(names="Chr").assign(WellBC=par["barcode"], WellID=par["well_id"])\\\\
|
||||
.to_csv(par["nrReadsNrGenesPerChrom"], sep="\\\\t",
|
||||
header=True, index=False,
|
||||
columns=("WellBC", "Chr", "NumberOfReads", "NumberOfGenes"))
|
||||
columns=("WellBC", "WellID", "Chr", "NumberOfReads", "NumberOfGenes"))
|
||||
|
||||
# Number of reads mapped to the reference, grouped by UMI
|
||||
nr_read_per_umi = tag_dataframe.groupby('UB').size()\\\\
|
||||
.drop("", errors="ignore").sort_values(ascending=False).head(100)
|
||||
nr_read_per_umi_df = nr_read_per_umi.to_frame(name="N")
|
||||
logger.info("Done calculating number of mapped reads per UMI, writing to %s", par["umiFreqTop"])
|
||||
nr_read_per_umi_df.assign(WellBC=par["barcode"]).reset_index(names="UB")\\\\
|
||||
nr_read_per_umi_df.assign(WellBC=par["barcode"], WellID=par["well_id"]).reset_index(names="UB")\\\\
|
||||
.to_csv(par["umiFreqTop"], header=True, sep="\\\\t",
|
||||
index=False, columns=("WellBC", "UB", "N"))
|
||||
index=False, columns=("WellBC", "WellID", "UB", "N"))
|
||||
|
||||
# Total number of mapped reads and total number of UMIs (not grouped per chromosome)
|
||||
nr_reads_and_umi_per_barcode = tag_dataframe.groupby(by="CB").agg(
|
||||
@@ -3227,9 +3285,9 @@ if __name__ == "__main__":
|
||||
)
|
||||
logger.info("Done calculating number of mapped reads and number of UMIs per Cell Barcode, writing to %s",
|
||||
par["nrReadsNrUMIsPerCB"])
|
||||
nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"]).reset_index(names="CB")\\\\
|
||||
nr_reads_and_umi_per_barcode.assign(WellBC=par["barcode"], WellID=par["well_id"]).reset_index(names="CB")\\\\
|
||||
.to_csv(par["nrReadsNrUMIsPerCB"], sep="\\\\t", header=True,
|
||||
index=False, columns=("WellBC", "CB", "NumberOfReads", "nrUMIs"))
|
||||
index=False, columns=("WellBC", "WellID", "CB", "NumberOfReads", "nrUMIs"))
|
||||
logger.info("Finished!")
|
||||
VIASHMAIN
|
||||
python -B "$tempscript"
|
||||
|
||||
@@ -4,6 +4,7 @@ manifest {
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'main'
|
||||
description = 'Generate summary statistics from BAM files generated by STAR solo.'
|
||||
author = 'Dries Schaumont, Marijke Van Moerbeke'
|
||||
}
|
||||
|
||||
process.container = 'nextflow/bash:latest'
|
||||
|
||||
@@ -70,11 +70,14 @@ profiles {
|
||||
local {
|
||||
// This config is for local processing.
|
||||
process {
|
||||
withName: ".*parallel_map_process" {
|
||||
maxForks = 1
|
||||
}
|
||||
maxMemory = 25.GB
|
||||
withLabel: verylowcpu { cpus = 2 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 6 }
|
||||
withLabel: highcpu { cpus = 12 }
|
||||
withLabel: highcpu { cpus = 8 }
|
||||
|
||||
withLabel: lowmem { memory = { get_memory( 8.GB * task.attempt ) } }
|
||||
withLabel: midmem { memory = { get_memory( 12.GB * task.attempt ) } }
|
||||
|
||||
@@ -33,6 +33,16 @@
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"well_id": {
|
||||
"type":
|
||||
"string",
|
||||
"description": "Type: `string`, required. ID of this well",
|
||||
"help_text": "Type: `string`, required. ID of this well. Only used to add a metadata column to the output files.\n"
|
||||
|
||||
}
|
||||
|
||||
|
||||
,
|
||||
"processedBAMFile": {
|
||||
"type":
|
||||
@@ -40,7 +50,7 @@
|
||||
"description": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a ",
|
||||
"help_text": "Type: `file`, default: `$id.$key.processedBAMFile.txt`. Path to a .tsv file listing, per read in the BAM file,\nthe value for the \"CB\", \"UX\", \"GX\" and \"GN\" tag, together with the\nchromsome to which the read was mapped to.\n"
|
||||
,
|
||||
"default": "$id.$key.processedBAMFile.txt"
|
||||
"default":"$id.$key.processedBAMFile.txt"
|
||||
}
|
||||
|
||||
|
||||
@@ -51,7 +61,7 @@
|
||||
"description": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a ",
|
||||
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrGenesPerChrom.txt`. Path to an output file that contains a .tsv formatted table describing\nper chromosome the number of reads that were mapped to that chromosome (NumberOfReads\ncolumn) and the number of genes on that chromosome that had at least one\nread mapped to it (NumberOfGenes).\n"
|
||||
,
|
||||
"default": "$id.$key.nrReadsNrGenesPerChrom.txt"
|
||||
"default":"$id.$key.nrReadsNrGenesPerChrom.txt"
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +72,7 @@
|
||||
"description": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a ",
|
||||
"help_text": "Type: `file`, default: `$id.$key.nrReadsNrUMIsPerCB.txt`. Path to an output file that contains a .tsv formatted table describing\nper barcode the number of UMI\u0027s (nrUMIs) and the total number of reads (NumberOfReads).\n"
|
||||
,
|
||||
"default": "$id.$key.nrReadsNrUMIsPerCB.txt"
|
||||
"default":"$id.$key.nrReadsNrUMIsPerCB.txt"
|
||||
}
|
||||
|
||||
|
||||
@@ -73,7 +83,7 @@
|
||||
"description": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a ",
|
||||
"help_text": "Type: `file`, default: `$id.$key.umiFreqTop.txt`. Path to an output file that contains a .tsv formatted table describing\nper UMI (column UB) the frequency at which they occur in the reads (column\nN). Only the top 100 UMIs are included.\n"
|
||||
,
|
||||
"default": "$id.$key.umiFreqTop.txt"
|
||||
"default":"$id.$key.umiFreqTop.txt"
|
||||
}
|
||||
|
||||
|
||||
@@ -84,7 +94,7 @@
|
||||
"description": "Type: `integer`, default: `1`. Number of threads to use for decompressing BAM files",
|
||||
"help_text": "Type: `integer`, default: `1`. Number of threads to use for decompressing BAM files.\n"
|
||||
,
|
||||
"default": "1"
|
||||
"default":1
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user