Build branch openpipeline_spatial/main with version main to openpipeline_spatial on branch main (4449aa6)

Build pipeline: openpipelines-bio.openpipeline-spatial.main-rbsrs

Source commit: 4449aa6094

Source message: Merge pull request #56 from openpipelines-bio/op_v4_1_0

Update OpenPipeline dependency to v4.1.0
This commit is contained in:
CI
2026-06-01 14:56:23 +00:00
parent 55eb76cde5
commit 4ba32ce343
366 changed files with 15045 additions and 4149 deletions

View File

@@ -8,10 +8,24 @@
* `convert/from_h5mu_to_seurat_with_fov`: Added converter component for H5MU data to Seurat objects with spatial FOV (PR #51).
## MAJOR CHANGES
* Pin OpenPipeline dependency to v4.1.0 (PR #56).
## MINOR CHANGES
* Pin ome-zarr to 0.13.0 to avoid a chunk shape incompatibility with zarr 3.x (PR #48).
* Bump Viash to 0.9.7 (PR #57).
* Bump anndata to 0.12.16 (PR #57).
* Bump mudata to 0.3.8 (PR #57).
* Bump scanpy to 1.11.4 (PR #57)
* Testing: bump viashpy to 0.10.0 (PR #57).
## BUG FIXES
* `convert/from_h5mu_to_spatialdata`: Make sure the AnnData table is properly parsed before inserting into the new SpatialData object (PR #53).

View File

@@ -1,4 +1,4 @@
viash_version: 0.9.4
viash_version: 0.9.7
source: src
target: target
name: openpipeline_spatial
@@ -10,7 +10,7 @@ repositories:
- name: openpipeline
repo: openpipeline
type: vsh
tag: v4.0.3
tag: v4.1.0
info:
test_resources:
- type: s3

View File

@@ -1,3 +1,4 @@
packages:
- anndata~=0.12.7
- awkward
- anndata~=0.12.16
- awkward
- scipy~=1.17.1 # Exclude scipy 1.17.0 because https://github.com/scverse/anndata/issues/339

View File

@@ -1,5 +1,5 @@
__merge__: [/src/base/requirements/anndata.yaml, .]
packages:
- mudata~=0.3.2
- mudata~=0.3.8
script: |
exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")

View File

@@ -1,2 +1,2 @@
packages:
- scanpy~=1.10.4
- scanpy~=1.11.4

View File

@@ -1,2 +1,2 @@
packages:
- viashpy==0.9.0
- viashpy==0.10.0

View File

@@ -144,6 +144,15 @@ argument_groups:
type: integer
description: Minimum of non-zero values per protein.
- name: "Cross-modality filtering"
arguments:
- name: "--intersect_obs"
type: boolean_true
description: |
After per-modality filtering and multisample processing, remove observations
that are not present in all processed modalities so that each modality shares
the same set of cells.
- name: "Highly variable features detection"
arguments:
- name: "--highly_variable_features_var_output"

View File

@@ -34,6 +34,7 @@ workflow run_wf {
"prot_min_proteins_per_cell": state.prot_min_proteins_per_cell,
"prot_max_proteins_per_cell": state.prot_max_proteins_per_cell,
"prot_min_cells_per_protein": state.prot_min_cells_per_protein,
"intersect_obs": state.intersect_obs,
"highly_variable_features_var_output": state.highly_variable_features_var_output,
"highly_variable_features_obs_batch_key": state.highly_variable_features_obs_batch_key,
"var_gene_names": state.var_gene_names,

View File

@@ -112,7 +112,7 @@ repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v4.0.3"
tag: "v4.1.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
@@ -201,7 +201,7 @@ engines:
- type: "python"
user: false
packages:
- "scanpy~=1.10.4"
- "scanpy~=1.11.4"
- "squidpy~=1.8.1"
upgrade: true
test_setup:
@@ -212,7 +212,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.9.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
@@ -226,8 +226,8 @@ build_info:
engine: "docker|native"
output: "target/_private/executable/filter/subset_cosmx"
executable: "target/_private/executable/filter/subset_cosmx/subset_cosmx"
viash_version: "0.9.4"
git_commit: "e087099616271bd0f11e825b7817ec14ebef62d7"
viash_version: "0.9.7"
git_commit: "4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"
@@ -241,8 +241,8 @@ package_config:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v4.0.3"
viash_version: "0.9.4"
tag: "v4.1.0"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# subset_cosmx main
#
# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -454,13 +454,13 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "scanpy~=1.10.4" "squidpy~=1.8.1"
pip install --upgrade --no-cache-dir "scanpy~=1.11.4" "squidpy~=1.8.1"
LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz"
LABEL org.opencontainers.image.description="Companion container for running component filter subset_cosmx"
LABEL org.opencontainers.image.created="2026-05-22T11:53:11Z"
LABEL org.opencontainers.image.created="2026-06-01T12:50:40Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="e087099616271bd0f11e825b7817ec14ebef62d7"
LABEL org.opencontainers.image.revision="4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -112,7 +112,7 @@ repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v4.0.3"
tag: "v4.1.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
@@ -201,7 +201,7 @@ engines:
- type: "python"
user: false
packages:
- "scanpy~=1.10.4"
- "scanpy~=1.11.4"
- "squidpy~=1.8.1"
upgrade: true
test_setup:
@@ -212,7 +212,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.9.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
@@ -226,8 +226,8 @@ build_info:
engine: "docker|native"
output: "target/_private/nextflow/filter/subset_cosmx"
executable: "target/_private/nextflow/filter/subset_cosmx/main.nf"
viash_version: "0.9.4"
git_commit: "e087099616271bd0f11e825b7817ec14ebef62d7"
viash_version: "0.9.7"
git_commit: "4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"
@@ -241,8 +241,8 @@ package_config:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v4.0.3"
viash_version: "0.9.4"
tag: "v4.1.0"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// subset_cosmx main
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1354,47 +1354,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1710,10 +1705,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3187,7 +3197,7 @@ meta = [
"type" : "vsh",
"name" : "openpipeline",
"repo" : "openpipeline",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
}
],
"links" : {
@@ -3295,7 +3305,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"scanpy~=1.10.4",
"scanpy~=1.11.4",
"squidpy~=1.8.1"
],
"upgrade" : true
@@ -3313,7 +3323,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.9.0"
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3332,8 +3342,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/_private/nextflow/filter/subset_cosmx",
"viash_version" : "0.9.4",
"git_commit" : "e087099616271bd0f11e825b7817ec14ebef62d7",
"viash_version" : "0.9.7",
"git_commit" : "4449aa6094a686af0a5fbe99b9aaafd01ae75aff",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {
@@ -3353,10 +3363,10 @@ meta = [
"type" : "vsh",
"name" : "openpipeline",
"repo" : "openpipeline",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
}
],
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [

View File

@@ -48,7 +48,7 @@ repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v4.0.3"
tag: "v4.1.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
@@ -135,10 +135,11 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "viashpy==0.9.0"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
script:
@@ -156,8 +157,8 @@ build_info:
engine: "docker|native"
output: "target/_test/executable/test_workflows/ingestion/spaceranger_mapping_test"
executable: "target/_test/executable/test_workflows/ingestion/spaceranger_mapping_test/spaceranger_mapping_test"
viash_version: "0.9.4"
git_commit: "e087099616271bd0f11e825b7817ec14ebef62d7"
viash_version: "0.9.7"
git_commit: "4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"
@@ -171,8 +172,8 @@ package_config:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v4.0.3"
viash_version: "0.9.4"
tag: "v4.1.0"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:

View File

@@ -2,7 +2,7 @@
# spaceranger_mapping_test main
#
# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
# This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive.
#
@@ -453,15 +453,15 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "anndata~=0.12.7" "awkward" "mudata~=0.3.2" "viashpy==0.9.0" && \
pip install --upgrade --no-cache-dir "anndata~=0.12.16" "awkward" "scipy~=1.17.1" "mudata~=0.3.8" "viashpy==0.10.0" && \
pip install --upgrade --no-cache-dir "git+https://github.com/openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" && \
python -c 'exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")'
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component test_workflows/ingestion spaceranger_mapping_test"
LABEL org.opencontainers.image.created="2026-05-22T11:53:09Z"
LABEL org.opencontainers.image.created="2026-06-01T12:50:39Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="e087099616271bd0f11e825b7817ec14ebef62d7"
LABEL org.opencontainers.image.revision="4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER

View File

@@ -48,7 +48,7 @@ repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v4.0.3"
tag: "v4.1.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
@@ -135,10 +135,11 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "viashpy==0.9.0"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
script:
@@ -156,8 +157,8 @@ build_info:
engine: "docker|native"
output: "target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test"
executable: "target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test/main.nf"
viash_version: "0.9.4"
git_commit: "e087099616271bd0f11e825b7817ec14ebef62d7"
viash_version: "0.9.7"
git_commit: "4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"
@@ -171,8 +172,8 @@ package_config:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v4.0.3"
viash_version: "0.9.4"
tag: "v4.1.0"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:

View File

@@ -1,6 +1,6 @@
// spaceranger_mapping_test main
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3104,7 +3114,7 @@ meta = [
"type" : "vsh",
"name" : "openpipeline",
"repo" : "openpipeline",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
}
],
"links" : {
@@ -3209,10 +3219,11 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2",
"viashpy==0.9.0"
"scipy~=1.17.1",
"mudata~=0.3.8",
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3234,8 +3245,8 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test",
"viash_version" : "0.9.4",
"git_commit" : "e087099616271bd0f11e825b7817ec14ebef62d7",
"viash_version" : "0.9.7",
"git_commit" : "4449aa6094a686af0a5fbe99b9aaafd01ae75aff",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {
@@ -3255,10 +3266,10 @@ meta = [
"type" : "vsh",
"name" : "openpipeline",
"repo" : "openpipeline",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
}
],
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [

View File

@@ -1,31 +0,0 @@
def subset_vars(adata, subset_col):
"""Subset AnnData object on highly variable genes
Parameters
----------
adata : AnnData
Annotated data object
subset_col : str
Name of the boolean column in `adata.var` that contains the information if features should be used or not
Returns
-------
AnnData
Copy of `adata` with subsetted features
"""
if subset_col not in adata.var.columns:
raise ValueError(
f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
)
if adata.var[subset_col].dtype == "boolean":
assert adata.var[subset_col].isna().sum() == 0, (
f"The .var column `{subset_col}` contains NaN values. Can not subset data."
)
adata.var[subset_col] = adata.var[subset_col].astype("bool")
assert adata.var[subset_col].dtype == "bool", (
f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
)
return adata[:, adata.var[subset_col]].copy()

View File

@@ -0,0 +1,567 @@
name: "process_singlesample_base"
namespace: "workflows/multiomics"
version: "v4.1.0"
authors:
- name: "Dorien Roosen"
roles:
- "author"
info:
role: "Core Team Member"
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Output State"
arguments:
- type: "string"
name: "--output_modality"
info: null
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Path to the sample."
info: null
example:
- "input.h5mu"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--rna_layer"
description: "Input layer for the gene expression modality. If not specified,\
\ .X is used."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--prot_layer"
description: "Input layer for the antibody capture modality. If not specified,\
\ .X is used."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gdo_layer"
description: "Input layer for the guide-derived oligonucleotide (GDO) data. If\
\ not specified, .X is used."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Destination path to the output."
info: null
example:
- "output.h5mu"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Sample ID options"
description: "Options for adding the id to .obs on the MuData object. Having a sample\
\ \nid present in a requirement of several components for this pipeline.\n"
arguments:
- type: "boolean"
name: "--add_id_to_obs"
description: "Add the value passed with --id to .obs."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--add_id_obs_output"
description: ".Obs column to add the sample IDs to. Required and only used when\
\ \n--add_id_to_obs is set to 'true'\n"
info: null
default:
- "sample_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--add_id_make_observation_keys_unique"
description: "Join the id to the .obs index (.obs_names). \nOnly used when --add_id_to_obs\
\ is set to 'true'.\n"
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "RNA filtering options"
arguments:
- type: "integer"
name: "--rna_min_counts"
description: "Minimum number of counts captured per cell."
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--rna_max_counts"
description: "Maximum number of counts captured per cell."
info: null
example:
- 5000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--rna_min_genes_per_cell"
description: "Minimum of non-zero values per cell."
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--rna_max_genes_per_cell"
description: "Maximum of non-zero values per cell."
info: null
example:
- 1500000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--rna_min_cells_per_gene"
description: "Minimum of non-zero values per gene."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--rna_min_fraction_mito"
description: "Minimum fraction of UMIs that are mitochondrial."
info: null
example:
- 0.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--rna_max_fraction_mito"
description: "Maximum fraction of UMIs that are mitochondrial."
info: null
example:
- 0.2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--rna_min_fraction_ribo"
description: "Minimum fraction of UMIs that are mitochondrial."
info: null
example:
- 0.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--rna_max_fraction_ribo"
description: "Maximum fraction of UMIs that are mitochondrial."
info: null
example:
- 0.2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--skip_scrublet_doublet_detection"
description: "Skip the scrublet doublet detection step."
info: null
direction: "input"
- name: "CITE-seq filtering options"
arguments:
- type: "integer"
name: "--prot_min_counts"
description: "Minimum number of counts per cell."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--prot_max_counts"
description: "Minimum number of counts per cell."
info: null
example:
- 5000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--prot_min_proteins_per_cell"
description: "Minimum of non-zero values per cell."
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--prot_max_proteins_per_cell"
description: "Maximum of non-zero values per cell."
info: null
example:
- 100000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--prot_min_cells_per_protein"
description: "Minimum of non-zero values per protein."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "GDO filtering options"
arguments:
- type: "integer"
name: "--gdo_min_counts"
description: "Minimum number of counts per cell."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gdo_max_counts"
description: "Minimum number of counts per cell."
info: null
example:
- 5000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gdo_min_guides_per_cell"
description: "Minimum of non-zero values per cell."
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gdo_max_guides_per_cell"
description: "Maximum of non-zero values per cell."
info: null
example:
- 100000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gdo_min_cells_per_guide"
description: "Minimum of non-zero values per guide."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Cross-modality filtering"
arguments:
- type: "boolean_true"
name: "--intersect_obs"
description: "After per-modality filtering, remove observations that are not present\n\
in all processed modalities so that each modality shares the same set of cells.\n"
info: null
direction: "input"
- name: "Mitochondrial & Ribosomal Gene Detection"
arguments:
- type: "string"
name: "--var_gene_names"
description: ".var column name to be used to detect mitochondrial/ribosomal genes\
\ instead of .var_names (default if not set).\nGene names matching with the\
\ regex value from --mitochondrial_gene_regex or --ribosomal_gene_regex will\
\ be \nidentified as mitochondrial or ribosomal genes, respectively. \n"
info: null
example:
- "gene_symbol"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--var_name_mitochondrial_genes"
description: "In which .var slot to store a boolean array corresponding the mitochondrial\
\ genes.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--obs_name_mitochondrial_fraction"
description: "When specified, write the fraction of counts originating from mitochondrial\
\ genes \n(based on --mitochondrial_gene_regex) to an .obs column with the specified\
\ name.\nRequires --var_name_mitochondrial_genes.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--mitochondrial_gene_regex"
description: "Regex string that identifies mitochondrial genes from --var_gene_names.\n\
By default will detect human and mouse mitochondrial genes from a gene symbol.\n"
info: null
default:
- "^[mM][tT]-"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--var_name_ribosomal_genes"
description: "In which .var slot to store a boolean array corresponding the ribosomal\
\ genes.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--obs_name_ribosomal_fraction"
description: "When specified, write the fraction of counts originating from ribosomal\
\ genes \n(based on --ribosomal_gene_regex) to an .obs column with the specified\
\ name.\nRequires --var_name_ribosomal_genes.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--ribosomal_gene_regex"
description: "Regex string that identifies ribosomal genes from --var_gene_names.\n\
By default will detect human and mouse ribosomal genes from a gene symbol.\n"
info: null
default:
- "^[Mm]?[Rr][Pp][LlSs]"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
- type: "file"
path: "utils"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "A pipeline to analyse a single multiomics sample."
info: null
status: "enabled"
scope:
image: "private"
target: "private"
dependencies:
- name: "metadata/add_id"
repository:
type: "local"
- name: "workflows/multiomics/split_modalities"
alias: "split_modalities_workflow"
repository:
type: "local"
- name: "workflows/rna/rna_singlesample"
repository:
type: "local"
- name: "workflows/prot/prot_singlesample"
repository:
type: "local"
- name: "workflows/gdo/gdo_singlesample"
repository:
type: "local"
license: "MIT"
links:
repository: "https://github.com/openpipelines-bio/openpipeline"
docker_registry: "ghcr.io"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/multiomics/process_singlesample_base/config.vsh.yaml"
runner: "nextflow"
engine: "native"
output: "target/_private/nextflow/workflows/multiomics/process_singlesample_base"
executable: "target/_private/nextflow/workflows/multiomics/process_singlesample_base/main.nf"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
dependencies:
- "target/nextflow/metadata/add_id"
- "target/_private/nextflow/workflows/multiomics/split_modalities"
- "target/nextflow/workflows/rna/rna_singlesample"
- "target/nextflow/workflows/prot/prot_singlesample"
- "target/nextflow/workflows/gdo/gdo_singlesample"
package_config:
name: "openpipeline"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
\nIn terms of workflows, the following has been made available, but keep in mind\
\ that\nindividual tools and functionality can be executed as standalone components\
\ as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
\ * Ingestion: Read mapping and generating a count matrix.\n * Single sample\
\ processing: cell filtering and doublet detection.\n * Multisample processing:\
\ Count transformation, normalization, QC metric calulations.\n * Integration:\
\ Clustering, integration and batch correction using single and multimodal methods.\n\
\ * Downstream analysis workflows\n"
info:
test_resources:
- type: "s3"
path: "s3://openpipelines-data"
dest: "resources_test"
nextflow_labels_ci:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/openpipelines-bio/openpipeline"
docker_registry: "ghcr.io"
homepage: "https://openpipelines.bio"
documentation: "https://openpipelines.bio/fundamentals"
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"

View File

@@ -0,0 +1,126 @@
manifest {
name = 'workflows/multiomics/process_singlesample_base'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.1.0'
description = 'A pipeline to analyse a single multiomics sample.'
author = 'Dorien Roosen'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}
includeConfig("nextflow_labels.config")

View File

@@ -1,6 +1,6 @@
name: "split_modalities"
namespace: "workflows/multiomics"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries Schaumont"
roles:
@@ -182,14 +182,14 @@ build_info:
engine: "native"
output: "target/_private/nextflow/workflows/multiomics/split_modalities"
executable: "target/_private/nextflow/workflows/multiomics/split_modalities/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
dependencies:
- "target/nextflow/dataflow/split_modalities"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -210,7 +210,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -219,7 +219,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// split_modalities v4.0.3
// split_modalities v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "split_modalities",
"namespace" : "workflows/multiomics",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries Schaumont",
@@ -3273,13 +3283,13 @@ meta = [
"runner" : "nextflow",
"engine" : "native",
"output" : "/workdir/root/repo/target/_private/nextflow/workflows/multiomics/split_modalities",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3297,14 +3307,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'workflows/multiomics/split_modalities'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'A pipeline to split a multimodal mudata files into several unimodal mudata files.'
author = 'Dries Schaumont'
}

View File

@@ -1,6 +1,6 @@
name: "log_normalize"
namespace: "workflows/rna"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries Schaumont"
roles:
@@ -197,8 +197,8 @@ build_info:
engine: "native"
output: "target/_private/nextflow/workflows/rna/log_normalize"
executable: "target/_private/nextflow/workflows/rna/log_normalize/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
dependencies:
- "target/nextflow/transform/normalize_total"
@@ -206,7 +206,7 @@ build_info:
- "target/nextflow/transform/delete_layer"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -227,7 +227,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -236,7 +236,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// log_normalize v4.0.3
// log_normalize v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "log_normalize",
"namespace" : "workflows/rna",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries Schaumont",
@@ -3293,13 +3303,13 @@ meta = [
"runner" : "nextflow",
"engine" : "native",
"output" : "/workdir/root/repo/target/_private/nextflow/workflows/rna/log_normalize",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3317,14 +3327,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'workflows/rna/log_normalize'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'Performs normalization and subsequent log-transformation of raw count data.'
author = 'Dries Schaumont'
}

View File

@@ -1,6 +1,6 @@
name: "leiden"
namespace: "cluster"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries De Maeyer"
roles:
@@ -77,6 +77,41 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--flavor"
description: "Which package's implementation to use.\n"
info: null
default:
- "leidenalg"
required: false
choices:
- "leidenalg"
- "igraph"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_iterations"
description: "How many iterations of the Leiden clustering algorithm to perform.\n\
When defined, positive values above 2 define the total number of iterations\
\ to perform.\nWhen not set, the algorithm will run until it reaches its optimal\
\ clustering.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--seed"
description: "Fix the initialization of the optimization. Can be used to increase\
\ reproducibility.\n"
info: null
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--resolution"
description: "A parameter value controlling the coarseness of the clustering.\
@@ -91,8 +126,8 @@ argument_groups:
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
@@ -215,7 +250,7 @@ engines:
id: "docker"
image: "python:3.13-slim"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "apt"
@@ -225,11 +260,12 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
- "scanpy~=1.11.4"
- "leidenalg~=0.10.0"
- "leidenalg~=0.11.0"
script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -243,7 +279,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
@@ -257,12 +293,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/cluster/leiden"
executable: "target/nextflow/cluster/leiden/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -283,7 +319,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -292,7 +328,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// leiden v4.0.3
// leiden v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "leiden",
"namespace" : "cluster",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries De Maeyer",
@@ -3133,6 +3143,42 @@ meta = [
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--flavor",
"description" : "Which package's implementation to use.\n",
"default" : [
"leidenalg"
],
"required" : false,
"choices" : [
"leidenalg",
"igraph"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--n_iterations",
"description" : "How many iterations of the Leiden clustering algorithm to perform.\nWhen defined, positive values above 2 define the total number of iterations to perform.\nWhen not set, the algorithm will run until it reaches its optimal clustering.\n",
"required" : false,
"min" : 1,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--seed",
"description" : "Fix the initialization of the optimization. Can be used to increase reproducibility.\n",
"required" : false,
"min" : 0,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "double",
"name" : "--resolution",
@@ -3148,7 +3194,7 @@ meta = [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
@@ -3294,7 +3340,7 @@ meta = [
"id" : "docker",
"image" : "python:3.13-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
@@ -3308,11 +3354,12 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2",
"scipy~=1.17.1",
"mudata~=0.3.8",
"scanpy~=1.11.4",
"leidenalg~=0.10.0"
"leidenalg~=0.11.0"
],
"script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3332,7 +3379,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3351,13 +3398,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/cluster/leiden",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3375,14 +3422,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -3416,7 +3463,6 @@ import os
import time
import logging
import logging.handlers
import warnings
import h5py
import mudata as mu
import pandas as pd
@@ -3438,6 +3484,9 @@ par = {
'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'obsm_name': $( if [ ! -z ${VIASH_PAR_OBSM_NAME+x} ]; then echo "r'${VIASH_PAR_OBSM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'flavor': $( if [ ! -z ${VIASH_PAR_FLAVOR+x} ]; then echo "r'${VIASH_PAR_FLAVOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'n_iterations': $( if [ ! -z ${VIASH_PAR_N_ITERATIONS+x} ]; then echo "int(r'${VIASH_PAR_N_ITERATIONS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'resolution': $( if [ ! -z ${VIASH_PAR_RESOLUTION+x} ]; then echo "list(map(float, r'${VIASH_PAR_RESOLUTION//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ),
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
}
@@ -3473,6 +3522,9 @@ from compress_h5mu import compress_h5mu
_shared_logger_name = "leiden"
if not par["n_iterations"]:
par["n_iterations"] = -1
# Function to check available space in /dev/shm
def get_available_shared_memory():
@@ -3571,18 +3623,18 @@ def run_single_resolution(shared_csr_matrix, obs_names, resolution):
try:
connectivities = shared_csr_matrix.to_csr_matrix()
adata = create_empty_anndata_with_connectivities(connectivities, obs_names)
with warnings.catch_warnings():
# In the future, the default backend for leiden will be igraph instead of leidenalg.
warnings.simplefilter(action="ignore", category=FutureWarning)
adata_out = sc.tl.leiden(
adata,
resolution=resolution,
key_added=str(resolution),
obsp="connectivities",
copy=True,
)
sc.tl.leiden(
adata,
resolution=resolution,
key_added=str(resolution),
obsp="connectivities",
flavor=par["flavor"],
n_iterations=par["n_iterations"],
random_state=par["seed"],
copy=False, # A copy was already created above
)
logger.info(f"Returning result for resolution {resolution}")
return adata_out.obs[str(resolution)]
return adata.obs[str(resolution)]
finally:
obs_names.shm.close()
shared_csr_matrix.close()
@@ -4147,7 +4199,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/cluster/leiden",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"highcpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'cluster/leiden'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'Cluster cells using the [Leiden algorithm] [Traag18] implemented in the [Scanpy framework] [Wolf18]. \nLeiden is an improved version of the [Louvain algorithm] [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15] [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\n[Blondel08]: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \n[Levine15]: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \n[Traag18]: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \n[Wolf18]: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n'
author = 'Dries De Maeyer'
}

View File

@@ -41,6 +41,26 @@
"help_text": "Type: `string`, multiple: `False`, default: `\"leiden\"`. ",
"default": "leiden"
},
"flavor": {
"type": "string",
"description": "Which package's implementation to use.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"leidenalg\"`, choices: ``leidenalg`, `igraph``. ",
"enum": [
"leidenalg",
"igraph"
],
"default": "leidenalg"
},
"n_iterations": {
"type": "integer",
"description": "How many iterations of the Leiden clustering algorithm to perform.\nWhen defined, positive values above 2 define the total number of iterations to perform.\nWhen not set, the algorithm will run until it reaches its optimal clustering.\n",
"help_text": "Type: `integer`, multiple: `False`. "
},
"seed": {
"type": "integer",
"description": "Fix the initialization of the optimization",
"help_text": "Type: `integer`, multiple: `False`. "
},
"resolution": {
"type": "array",
"items": {
@@ -54,7 +74,7 @@
},
"output_compression": {
"type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [
"gzip",

View File

@@ -1,6 +1,6 @@
name: "concatenate_h5mu"
namespace: "dataflow"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries Schaumont"
roles:
@@ -136,8 +136,8 @@ argument_groups:
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
@@ -253,7 +253,7 @@ engines:
id: "docker"
image: "python:3.13-slim"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "apt"
@@ -263,9 +263,10 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -279,14 +280,14 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
- "pytest-benchmark"
upgrade: true
entrypoint: []
@@ -299,12 +300,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/dataflow/concatenate_h5mu"
executable: "target/nextflow/dataflow/concatenate_h5mu/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -325,7 +326,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -334,7 +335,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// concatenate_h5mu v4.0.3
// concatenate_h5mu v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "concatenate_h5mu",
"namespace" : "dataflow",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries Schaumont",
@@ -3179,7 +3189,7 @@ meta = [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
@@ -3328,7 +3338,7 @@ meta = [
"id" : "docker",
"image" : "python:3.13-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
@@ -3342,9 +3352,10 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2"
"scipy~=1.17.1",
"mudata~=0.3.8"
],
"script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3364,7 +3375,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3375,7 +3386,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0",
"viashpy==0.10.0",
"pytest-benchmark"
],
"upgrade" : true
@@ -3392,13 +3403,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dataflow/concatenate_h5mu",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3416,14 +3427,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -4266,7 +4277,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dataflow/concatenate_h5mu",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"midcpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'dataflow/concatenate_h5mu'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'Concatenate observations from samples in several (uni- and/or multi-modal) MuData files into a single file.\n'
author = 'Dries Schaumont'
}

View File

@@ -85,7 +85,7 @@
},
"output_compression": {
"type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [
"gzip",

View File

@@ -1,6 +1,6 @@
name: "merge"
namespace: "dataflow"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries Schaumont"
roles:
@@ -25,6 +25,9 @@ argument_groups:
- "-i"
description: "Paths to the single-modality .h5mu files that need to be combined"
info: null
example:
- "/path/to/modality_1.h5mu"
- "/path/to/modality_2.h5mu"
default:
- "sample_paths"
must_exist: true
@@ -163,7 +166,7 @@ engines:
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "apt"
@@ -173,9 +176,10 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -189,7 +193,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
@@ -203,12 +207,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/dataflow/merge"
executable: "target/nextflow/dataflow/merge/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -229,7 +233,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -238,7 +242,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// merge v4.0.3
// merge v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "merge",
"namespace" : "dataflow",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries Schaumont",
@@ -3071,6 +3081,10 @@ meta = [
"-i"
],
"description" : "Paths to the single-modality .h5mu files that need to be combined",
"example" : [
"/path/to/modality_1.h5mu",
"/path/to/modality_2.h5mu"
],
"default" : [
"sample_paths"
],
@@ -3246,7 +3260,7 @@ meta = [
"id" : "docker",
"image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
@@ -3260,9 +3274,10 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2"
"scipy~=1.17.1",
"mudata~=0.3.8"
],
"script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3282,7 +3297,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3301,13 +3316,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dataflow/merge",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3325,14 +3340,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -3847,7 +3862,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dataflow/merge",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"singlecpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'dataflow/merge'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'Combine one or more single-modality .h5mu files together into one .h5mu file.\n'
author = 'Dries Schaumont'
}

View File

@@ -17,7 +17,7 @@
"format": "path",
"exists": true,
"description": "Paths to the single-modality .h5mu files that need to be combined",
"help_text": "Type: `file`, multiple: `True`, required, default: `[\"sample_paths\"]`, direction: `input`. ",
"help_text": "Type: `file`, multiple: `True`, required, default: `[\"sample_paths\"]`, direction: `input`, example: `[\"/path/to/modality_1.h5mu\";\"/path/to/modality_2.h5mu\"]`. ",
"default": [
"sample_paths"
]

View File

@@ -1,6 +1,6 @@
name: "split_modalities"
namespace: "dataflow"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries Schaumont"
roles:
@@ -42,6 +42,8 @@ argument_groups:
- "-i"
description: "Path to a single .h5mu file."
info: null
example:
- "input.h5mu"
default:
- "sample_path"
must_exist: true
@@ -79,8 +81,8 @@ argument_groups:
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
@@ -97,6 +99,8 @@ resources:
is_executable: true
- type: "file"
path: "setup_logger.py"
- type: "file"
path: "mudata_opener.py"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
@@ -190,7 +194,7 @@ engines:
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "apt"
@@ -200,9 +204,10 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -216,7 +221,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
@@ -230,12 +235,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/dataflow/split_modalities"
executable: "target/nextflow/dataflow/split_modalities/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -256,7 +261,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -265,7 +270,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// split_modalities v4.0.3
// split_modalities v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1354,47 +1354,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1710,10 +1705,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3036,7 +3046,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "split_modalities",
"namespace" : "dataflow",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries Schaumont",
@@ -3099,6 +3109,9 @@ meta = [
"-i"
],
"description" : "Path to a single .h5mu file.",
"example" : [
"input.h5mu"
],
"default" : [
"sample_path"
],
@@ -3143,7 +3156,7 @@ meta = [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
@@ -3169,6 +3182,10 @@ meta = [
"type" : "file",
"path" : "/src/utils/setup_logger.py"
},
{
"type" : "file",
"path" : "/src/utils/mudata_opener.py"
},
{
"type" : "file",
"path" : "/src/workflows/utils/labels.config",
@@ -3280,7 +3297,7 @@ meta = [
"id" : "docker",
"image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
@@ -3294,9 +3311,10 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2"
"scipy~=1.17.1",
"mudata~=0.3.8"
],
"script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3316,7 +3334,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3335,13 +3353,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dataflow/split_modalities",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3359,14 +3377,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -3395,11 +3413,14 @@ def innerWorkflowFactory(args) {
tempscript=".viash_script.py"
cat > "$tempscript" << VIASHMAIN
from __future__ import annotations
from functools import partial
import sys
import mudata as md
import anndata as ad
from pathlib import Path
import pandas as pd
### VIASH START
# The following code has been auto-generated by Viash.
par = {
@@ -3436,6 +3457,7 @@ dep = {
sys.path.append(meta["resources_dir"])
from setup_logger import setup_logger
from mudata_opener import mudata_opener
logger = setup_logger()
@@ -3447,36 +3469,54 @@ def main() -> None:
logger.info("Creating %s", output_dir)
output_dir.mkdir(parents=True)
logger.info("Reading input file '%s'", par["input"])
input_file = Path(par["input"].strip())
sample = md.read_h5mu(input_file)
logger.info("Creating output types CSV.")
modalities = list(sample.mod.keys())
logger.info("Found the following modalities:\\\\n%s", "\\\\n".join(modalities))
names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" for mod_name in modalities}
output_files = list(names.values())
logger.info(
"Will be creating the following output .h5mu files:\\\\n%s",
"\\\\n".join(output_files),
)
df = pd.DataFrame({"name": modalities, "filename": output_files})
logger.info("Writing output_types CSV file to '%s'.", par["output_types"])
df.to_csv(par["output_types"], index=False)
logger.info("Splitting input file into unimodal output files.")
for mod_name, mod in sample.mod.items():
logger.info("Processing modality '%s'", mod_name)
new_sample = md.MuData({mod_name: mod})
input_file = Path(par["input"])
logger.info("Checking which modalities exist for '%s'", par["input"])
with mudata_opener(input_file, mode="r") as (open_mudata, input_is_zarr):
logger.info(
"Writing to '%s', with compression '%s'",
names[mod_name],
par["output_compression"],
"Opened %s in %s format.", par["input"], "zarr" if input_is_zarr else "h5"
)
new_sample.write_h5mu(
output_dir / names[mod_name], compression=par["output_compression"]
modalities = list(open_mudata["mod"].keys())
logger.info("Found the following modalities:\\\\n%s", "\\\\n".join(modalities))
logger.info("Creating output types CSV.")
output_extension = "zarr" if input_is_zarr else "h5mu"
names = {
mod_name: f"{input_file.stem}_{mod_name}.{output_extension}"
for mod_name in modalities
}
output_files = list(names.values())
logger.info(
"Will be creating the following output .%s files:\\\\n%s",
output_extension,
"\\\\n".join(output_files),
)
df = pd.DataFrame({"name": modalities, "filename": output_files})
logger.info("Writing output_types CSV file to '%s'.", par["output_types"])
df.to_csv(par["output_types"], index=False)
logger.info("Splitting input file into unimodal output files.")
for mod_name in modalities:
logger.info("Processing modality '%s'", mod_name)
elem_key = f"/mod/{mod_name}"
elem = open_mudata[elem_key]
logger.info("Reading %s", elem_key)
new_ad = ad.io.read_elem(elem)
logger.info("Creating MuData object.")
new_sample = md.MuData({mod_name: new_ad})
logger.info(
"Writing to '%s', with compression '%s'",
names[mod_name],
par["output_compression"],
)
writer = (
partial(md.MuData.write_zarr, zarr_format=3)
if input_is_zarr
else partial(
md.MuData.write_h5mu, compression=par["output_compression"]
)
)
writer(new_sample, output_dir / names[mod_name])
logger.info("Done writing output file.")
logger.info("Finished")
@@ -3865,7 +3905,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dataflow/split_modalities",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"singlecpu",

View File

@@ -0,0 +1,27 @@
import zarr
import h5py
from contextlib import contextmanager
@contextmanager
def mudata_opener(file_loc, mode=None):
open_mudata = None
input_is_zarr = False
try:
open_mudata = zarr.open(file_loc, zarr_format=3, mode=mode)
input_is_zarr = True
yield open_mudata, input_is_zarr
except (zarr.errors.GroupNotFoundError, NotADirectoryError):
try:
open_mudata = h5py.File(file_loc, mode=mode)
yield open_mudata, input_is_zarr
except (FileNotFoundError, IsADirectoryError, KeyError) as e:
e.add_note(f"Could not open file {file_loc}.")
raise e
finally:
try:
if open_mudata:
open_mudata.close()
del open_mudata
except (AttributeError, UnboundLocalError):
pass

View File

@@ -2,7 +2,7 @@ manifest {
name = 'dataflow/split_modalities'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n'
author = 'Dries Schaumont, Robrecht Cannoodt'
}

View File

@@ -14,7 +14,7 @@
"format": "path",
"exists": true,
"description": "Path to a single .h5mu file.",
"help_text": "Type: `file`, multiple: `False`, required, default: `\"sample_path\"`, direction: `input`. ",
"help_text": "Type: `file`, multiple: `False`, required, default: `\"sample_path\"`, direction: `input`, example: `\"input.h5mu\"`. ",
"default": "sample_path"
},
"output": {
@@ -33,7 +33,7 @@
},
"output_compression": {
"type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [
"gzip",

View File

@@ -1,6 +1,6 @@
name: "pca"
namespace: "dimred"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries De Maeyer"
roles:
@@ -17,6 +17,22 @@ authors:
role: "Principal Scientist"
argument_groups:
- name: "Arguments"
arguments:
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
required: false
choices:
- "gzip"
- "lzf"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "inputs"
arguments:
- type: "file"
name: "--input"
@@ -62,6 +78,47 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Options"
arguments:
- type: "integer"
name: "--num_components"
description: "Number of principal components to compute. Defaults to 50, or 1\
\ - minimum dimension size of selected representation."
info: null
example:
- 25
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--chunked"
description: "If True, perform an incremental PCA on segments of a predefined\
\ size. Setting this flag automatically implies zero centering.\nMust be specified\
\ together with --chunk_size.\n"
info: null
direction: "input"
- type: "integer"
name: "--chunk_size"
description: "Number of observations to include in each chunk. Required if chunked=True\
\ was passed.\n"
info: null
required: false
min: 2
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--seed"
description: "Used to set the initial states for the optimization. \n"
info: null
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
alternatives:
@@ -106,36 +163,11 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--num_components"
description: "Number of principal components to compute. Defaults to 50, or 1\
\ - minimum dimension size of selected representation."
info: null
example:
- 25
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--overwrite"
description: "Allow overwriting .obsm, .varm and .uns slots."
info: null
direction: "input"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
info: null
example:
- "gzip"
required: false
choices:
- "gzip"
- "lzf"
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
@@ -147,8 +179,7 @@ resources:
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Computes PCA coordinates, loadings and variance decomposition. Uses\
\ the implementation of scikit-learn [Pedregosa11].\n"
description: "Computes PCA coordinates, loadings and variance decomposition.\n"
test_resources:
- type: "python_script"
path: "test.py"
@@ -238,9 +269,9 @@ runners:
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
image: "python:3.13-slim"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "apt"
@@ -250,9 +281,10 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
- "scanpy~=1.11.4"
script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
@@ -260,10 +292,16 @@ engines:
).partition(\\\".\\\")[0]) > 2\")"
upgrade: true
test_setup:
- type: "apt"
packages:
- "git"
interactive: false
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
entrypoint: []
cmd: null
@@ -275,12 +313,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/dimred/pca"
executable: "target/nextflow/dimred/pca/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -301,7 +339,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -310,7 +348,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// pca v4.0.3
// pca v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "pca",
"namespace" : "dimred",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries De Maeyer",
@@ -3062,6 +3072,27 @@ meta = [
"argument_groups" : [
{
"name" : "Arguments",
"arguments" : [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
"required" : false,
"choices" : [
"gzip",
"lzf"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"name" : "inputs",
"arguments" : [
{
"type" : "file",
@@ -3112,7 +3143,55 @@ meta = [
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"name" : "Options",
"arguments" : [
{
"type" : "integer",
"name" : "--num_components",
"description" : "Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.",
"example" : [
25
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean_true",
"name" : "--chunked",
"description" : "If True, perform an incremental PCA on segments of a predefined size. Setting this flag automatically implies zero centering.\nMust be specified together with --chunk_size.\n",
"direction" : "input"
},
{
"type" : "integer",
"name" : "--chunk_size",
"description" : "Number of observations to include in each chunk. Required if chunked=True was passed.\n",
"required" : false,
"min" : 2,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--seed",
"description" : "Used to set the initial states for the optimization. \n",
"required" : false,
"min" : 0,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"name" : "Outputs",
"arguments" : [
{
"type" : "file",
"name" : "--output",
@@ -3166,39 +3245,11 @@ meta = [
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--num_components",
"description" : "Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.",
"example" : [
25
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean_true",
"name" : "--overwrite",
"description" : "Allow overwriting .obsm, .varm and .uns slots.",
"direction" : "input"
},
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
"required" : false,
"choices" : [
"gzip",
"lzf"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
}
@@ -3223,7 +3274,7 @@ meta = [
"dest" : "nextflow_labels.config"
}
],
"description" : "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n",
"description" : "Computes PCA coordinates, loadings and variance decomposition.\n",
"test_resources" : [
{
"type" : "python_script",
@@ -3331,9 +3382,9 @@ meta = [
{
"type" : "docker",
"id" : "docker",
"image" : "python:3.12-slim",
"image" : "python:3.13-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
@@ -3347,9 +3398,10 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2",
"scipy~=1.17.1",
"mudata~=0.3.8",
"scanpy~=1.11.4"
],
"script" : [
@@ -3359,11 +3411,21 @@ meta = [
}
],
"test_setup" : [
{
"type" : "apt",
"packages" : [
"git"
],
"interactive" : false
},
{
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
],
"upgrade" : true
}
@@ -3379,13 +3441,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dimred/pca",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3403,14 +3465,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -3441,22 +3503,26 @@ cat > "$tempscript" << VIASHMAIN
import scanpy as sc
import mudata as mu
import sys
import pandas as pd
from anndata import AnnData
## VIASH START
# The following code has been auto-generated by Viash.
par = {
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'chunked': $( if [ ! -z ${VIASH_PAR_CHUNKED+x} ]; then echo "r'${VIASH_PAR_CHUNKED//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ),
'chunk_size': $( if [ ! -z ${VIASH_PAR_CHUNK_SIZE+x} ]; then echo "int(r'${VIASH_PAR_CHUNK_SIZE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'varm_output': $( if [ ! -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_VARM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ),
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi )
}
meta = {
'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
@@ -3496,11 +3562,24 @@ data = mu.read_h5ad(par["input"], mod=par["modality"])
logger.info("Computing PCA components for modality '%s'", par["modality"])
if par["layer"] and par["layer"] not in data.layers:
raise ValueError(f"{par['layer']} was not found in modality {par['modality']}.")
layer = data.X if not par["layer"] else data.layers[par["layer"]]
adata_input_layer = AnnData(layer)
adata_input_layer.var.index = data.var.index
use_highly_variable = False
chunked, chunk_size = par["chunked"], par["chunk_size"]
if chunked:
if not chunk_size:
raise ValueError(
"Requested to perform an incremental PCA "
"('chunked'), but the chunk size is not set."
)
if chunk_size < par["num_components"]:
raise ValueError(
f"The requested chunk size ({chunk_size}) must not be smaller "
f"than the number of components ({par['num_components']})"
)
layer = data.X if not par["layer"] else data.layers[par["layer"]]
adata_input_layer = AnnData(layer, var=pd.DataFrame([], index=data.var.index))
mask_var = None
if par["var_input"]:
if par["var_input"] not in data.var.columns:
raise ValueError(
@@ -3508,15 +3587,18 @@ if par["var_input"]:
"as a selection of genes to run the PCA on, "
f"but the column is not available for modality {par['modality']}"
)
use_highly_variable = True
adata_input_layer.var["highly_variable"] = data.var[par["var_input"]]
mask_var = data.var[par["var_input"]]
# run pca
output_adata = sc.tl.pca(
sc.tl.pca(
adata_input_layer,
n_comps=par["num_components"],
copy=True,
use_highly_variable=use_highly_variable,
copy=False, # A copy was already created
return_info=True,
mask_var=mask_var,
chunked=chunked,
chunk_size=chunk_size,
random_state=par["seed"],
)
# store output in specific objects
@@ -3535,11 +3617,11 @@ for parameter_name, field in check_exist_dict.items():
)
del getattr(data, field)[par[parameter_name]]
data.obsm[par["obsm_output"]] = output_adata.obsm["X_pca"]
data.varm[par["varm_output"]] = output_adata.varm["PCs"]
data.obsm[par["obsm_output"]] = adata_input_layer.obsm["X_pca"]
data.varm[par["varm_output"]] = adata_input_layer.varm["PCs"]
data.uns[par["uns_output"]] = {
"variance": output_adata.uns["pca"]["variance"],
"variance_ratio": output_adata.uns["pca"]["variance_ratio"],
"variance": adata_input_layer.uns["pca"]["variance"],
"variance_ratio": adata_input_layer.uns["pca"]["variance_ratio"],
}
@@ -3933,7 +4015,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dimred/pca",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"highcpu",

View File

@@ -2,8 +2,8 @@ manifest {
name = 'dimred/pca'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
description = 'Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n'
version = 'v4.1.0'
description = 'Computes PCA coordinates, loadings and variance decomposition.\n'
author = 'Dries De Maeyer'
}

View File

@@ -1,37 +1,14 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "pca",
"description": "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n",
"description": "Computes PCA coordinates, loadings and variance decomposition.\n",
"type": "object",
"$defs": {
"arguments": {
"title": "Arguments",
"outputs": {
"title": "Outputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type": "string",
"format": "path",
"exists": true,
"description": "Input h5mu file",
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
},
"modality": {
"type": "string",
"description": "Which modality from the input MuData file to process.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
"default": "rna"
},
"layer": {
"type": "string",
"description": "Use specified layer for expression values instead of the .X object from the modality.",
"help_text": "Type: `string`, multiple: `False`. "
},
"var_input": {
"type": "string",
"description": "Column name in .var matrix that will be used to select which genes to run the PCA on.",
"help_text": "Type: `string`, multiple: `False`, example: `\"filter_with_hvg\"`. "
},
"output": {
"type": "string",
"format": "path",
@@ -57,20 +34,22 @@
"help_text": "Type: `string`, multiple: `False`, default: `\"pca_variance\"`. ",
"default": "pca_variance"
},
"num_components": {
"type": "integer",
"description": "Number of principal components to compute",
"help_text": "Type: `integer`, multiple: `False`, example: `25`. "
},
"overwrite": {
"type": "boolean",
"description": "Allow overwriting .obsm, .varm and .uns slots.",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
}
}
},
"arguments": {
"title": "Arguments",
"type": "object",
"description": "No description",
"properties": {
"output_compression": {
"type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [
"gzip",
@@ -79,6 +58,64 @@
}
}
},
"inputs": {
"title": "inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type": "string",
"format": "path",
"exists": true,
"description": "Input h5mu file",
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
},
"modality": {
"type": "string",
"description": "Which modality from the input MuData file to process.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
"default": "rna"
},
"layer": {
"type": "string",
"description": "Use specified layer for expression values instead of the .X object from the modality.",
"help_text": "Type: `string`, multiple: `False`. "
},
"var_input": {
"type": "string",
"description": "Column name in .var matrix that will be used to select which genes to run the PCA on.",
"help_text": "Type: `string`, multiple: `False`, example: `\"filter_with_hvg\"`. "
}
}
},
"options": {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"num_components": {
"type": "integer",
"description": "Number of principal components to compute",
"help_text": "Type: `integer`, multiple: `False`, example: `25`. "
},
"chunked": {
"type": "boolean",
"description": "If True, perform an incremental PCA on segments of a predefined size",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"chunk_size": {
"type": "integer",
"description": "Number of observations to include in each chunk",
"help_text": "Type: `integer`, multiple: `False`. "
},
"seed": {
"type": "integer",
"description": "Used to set the initial states for the optimization",
"help_text": "Type: `integer`, multiple: `False`. "
}
}
},
"nextflow input-output arguments": {
"title": "Nextflow input-output arguments",
"type": "object",
@@ -93,9 +130,18 @@
}
},
"allOf": [
{
"$ref": "#/$defs/outputs"
},
{
"$ref": "#/$defs/arguments"
},
{
"$ref": "#/$defs/inputs"
},
{
"$ref": "#/$defs/options"
},
{
"$ref": "#/$defs/nextflow input-output arguments"
}

View File

@@ -1,6 +1,6 @@
name: "umap"
namespace: "dimred"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries De Maeyer"
roles:
@@ -78,8 +78,8 @@ argument_groups:
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
@@ -294,7 +294,7 @@ engines:
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "apt"
@@ -304,9 +304,10 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
- "scanpy~=1.11.4"
script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
@@ -317,7 +318,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
upgrade: true
entrypoint: []
cmd: null
@@ -329,12 +330,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/dimred/umap"
executable: "target/nextflow/dimred/umap/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -355,7 +356,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -364,7 +365,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// umap v4.0.3
// umap v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "umap",
"namespace" : "dimred",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries De Maeyer",
@@ -3138,7 +3148,7 @@ meta = [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
@@ -3382,7 +3392,7 @@ meta = [
"id" : "docker",
"image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
@@ -3396,9 +3406,10 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2",
"scipy~=1.17.1",
"mudata~=0.3.8",
"scanpy~=1.11.4"
],
"script" : [
@@ -3412,7 +3423,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"upgrade" : true
}
@@ -3428,13 +3439,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dimred/umap",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3452,14 +3463,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -3971,7 +3982,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dimred/umap",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"highcpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'dimred/umap'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n'
author = 'Dries De Maeyer'
}

View File

@@ -50,7 +50,7 @@
},
"output_compression": {
"type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [
"gzip",

View File

@@ -1,6 +1,6 @@
name: "highly_variable_features_scanpy"
namespace: "feature_annotation"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries De Maeyer"
roles:
@@ -75,6 +75,20 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--features_to_exclude"
description: "User-defined list of feature names to exclude before HVG calculation.\
\ \nThese features will be excluded from HVG selection but will remain in the\
\ output data.\n"
info: null
example:
- "MT-CO1"
- "MT-CO2"
- "MT-ND1"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--output"
description: "Output h5mu file."
@@ -214,8 +228,8 @@ argument_groups:
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
@@ -342,15 +356,16 @@ engines:
id: "docker"
image: "python:3.12"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
- "scanpy~=1.11.4"
- "scikit-misc"
script:
@@ -366,7 +381,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
@@ -380,12 +395,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/feature_annotation/highly_variable_features_scanpy"
executable: "target/nextflow/feature_annotation/highly_variable_features_scanpy/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -406,7 +421,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -415,7 +430,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// highly_variable_features_scanpy v4.0.3
// highly_variable_features_scanpy v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1354,47 +1354,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1710,10 +1705,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3036,7 +3046,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "highly_variable_features_scanpy",
"namespace" : "feature_annotation",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries De Maeyer",
@@ -3136,6 +3146,20 @@ meta = [
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--features_to_exclude",
"description" : "User-defined list of feature names to exclude before HVG calculation. \nThese features will be excluded from HVG selection but will remain in the output data.\n",
"example" : [
"MT-CO1",
"MT-CO2",
"MT-ND1"
],
"required" : false,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ";"
},
{
"type" : "file",
"name" : "--output",
@@ -3281,7 +3305,7 @@ meta = [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
@@ -3430,16 +3454,17 @@ meta = [
"id" : "docker",
"image" : "python:3.12",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2",
"scipy~=1.17.1",
"mudata~=0.3.8",
"scanpy~=1.11.4",
"scikit-misc"
],
@@ -3461,7 +3486,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3480,13 +3505,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/feature_annotation/highly_variable_features_scanpy",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3504,14 +3529,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -3553,6 +3578,7 @@ par = {
'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'features_to_exclude': $( if [ ! -z ${VIASH_PAR_FEATURES_TO_EXCLUDE+x} ]; then echo "r'${VIASH_PAR_FEATURES_TO_EXCLUDE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ),
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'varm_name': $( if [ ! -z ${VIASH_PAR_VARM_NAME+x} ]; then echo "r'${VIASH_PAR_VARM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
@@ -3651,6 +3677,38 @@ if par["var_input"]:
input_anndata.var[par["var_input"]] = data.var[par["var_input"]]
input_anndata = subset_vars(input_anndata, par["var_input"])
# Exclude user-specified features from HVG calculation
excluded_features_mask = None
if par.get("features_to_exclude"):
features_to_exclude = set(par["features_to_exclude"])
logger.info(
"\\\\tExcluding %d specified features from HVG calculation",
len(features_to_exclude),
)
excluded_features_mask = input_anndata.var_names.isin(features_to_exclude)
n_excluded = excluded_features_mask.sum()
n_not_found = len(features_to_exclude) - n_excluded
if n_not_found > 0:
not_found = features_to_exclude - set(
input_anndata.var_names[excluded_features_mask]
)
logger.warning(
"\\\\t%d features to exclude were not found in the data: %s",
n_not_found,
list(not_found)[:10],
)
logger.info("\\\\tExcluding %d features from HVG calculation", n_excluded)
if n_excluded == input_anndata.n_vars:
raise ValueError(
f"All features ({n_excluded}) are in the exclusion list. "
"Please check your --features_to_exclude list."
)
# Store original var_names for later reindexing
original_var_names = input_anndata.var_names.copy()
# Subset to non-excluded features for HVG calculation using subset_vars
input_anndata = subset_vars(input_anndata, ~excluded_features_mask)
logger.info("\\\\t%d features remaining for HVG calculation", input_anndata.n_vars)
logger.info("\\\\tUnfiltered data: %s", data)
logger.info("\\\\tComputing hvg")
@@ -3695,6 +3753,17 @@ try:
assert (out.index == data.var.index).all(), (
"Expected output index values to be equivalent to the input index"
)
elif par.get("features_to_exclude") is not None:
# Reindex to include excluded features, marking them as non-HVG
out.index = input_anndata.var.index
out = out.reindex(index=original_var_names, method=None)
out.highly_variable = out.highly_variable.fillna(False)
# Further reindex to match data.var.index (for consistency with var_input path)
out = out.reindex(index=data.var.index, method=None)
out.highly_variable = out.highly_variable.fillna(False)
assert (out.index == data.var.index).all(), (
"Expected output index values to be equivalent to the input index"
)
elif par["obs_batch_key"] is not None:
out = out.reindex(index=data.var.index, method=None)
assert (out.index == data.var.index).all(), (
@@ -4110,7 +4179,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/feature_annotation/highly_variable_features_scanpy",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"singlecpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'feature_annotation/highly_variable_features_scanpy'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'Annotate highly variable features [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor=\'seurat_v3\' in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for features falling into a given bin for mean expression of features. This means that for each bin of mean expression, highly variable features are selected.\n\nFor [Stuart19], a normalized variance for each feature is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each feature after the transformation. Features are ranked by the normalized variance.\n'
author = 'Dries De Maeyer, Robrecht Cannoodt'
}

View File

@@ -32,6 +32,14 @@
"description": "If specified, use boolean array in adata.var[var_input] to calculate hvg on subset of vars.\n",
"help_text": "Type: `string`, multiple: `False`. "
},
"features_to_exclude": {
"type": "array",
"items": {
"type": "string"
},
"description": "User-defined list of feature names to exclude before HVG calculation",
"help_text": "Type: `string`, multiple: `True`, example: `[\"MT-CO1\";\"MT-CO2\";\"MT-ND1\"]`. "
},
"output": {
"type": "string",
"format": "path",
@@ -109,7 +117,7 @@
},
"output_compression": {
"type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [
"gzip",

View File

@@ -0,0 +1,49 @@
def subset_vars(adata, subset_col):
"""
Subset AnnData object on highly variable genes or a boolean mask.
Parameters
----------
adata : AnnData
Annotated data object
subset_col : str, pd.Series, pd.Index, or np.ndarray
Name of the boolean column in `adata.var` that contains the information if features should be used or not,
or a boolean mask (same length as adata.var)
Returns
-------
AnnData
Copy of `adata` with subsetted features
"""
import pandas as pd
import numpy as np
# Convert all input types to a pandas Series
if isinstance(subset_col, str):
if subset_col not in adata.var.columns:
raise ValueError(
f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
)
mask = adata.var[subset_col]
elif isinstance(subset_col, pd.Series):
mask = subset_col
elif isinstance(subset_col, (pd.Index, np.ndarray, list)):
mask = pd.Series(subset_col, index=adata.var.index)
else:
raise TypeError(
"subset_col must be a string (column name) or a boolean mask (Series, Index, ndarray, or list)."
)
# Validate mask
if not pd.api.types.is_bool_dtype(mask):
raise ValueError(
f"Expected mask to be boolean, but found {mask.dtype}. Can not subset data."
)
if mask.isna().sum() > 0:
raise ValueError("Mask contains NaN values. Can not subset data.")
if len(mask) != adata.n_vars:
raise ValueError(
f"Mask length {len(mask)} does not match number of variables {adata.n_vars}."
)
return adata[:, mask].copy()

View File

@@ -1,6 +1,6 @@
name: "delimit_fraction"
namespace: "filter"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Dries Schaumont"
roles:
@@ -78,8 +78,8 @@ argument_groups:
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
@@ -219,7 +219,7 @@ engines:
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "apt"
@@ -229,9 +229,10 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -245,7 +246,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true
@@ -259,12 +260,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/filter/delimit_fraction"
executable: "target/nextflow/filter/delimit_fraction/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -285,7 +286,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -294,7 +295,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// delimit_fraction v4.0.3
// delimit_fraction v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "delimit_fraction",
"namespace" : "filter",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Dries Schaumont",
@@ -3134,7 +3144,7 @@ meta = [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
@@ -3312,7 +3322,7 @@ meta = [
"id" : "docker",
"image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
@@ -3326,9 +3336,10 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2"
"scipy~=1.17.1",
"mudata~=0.3.8"
],
"script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3348,7 +3359,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3367,13 +3378,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/filter/delimit_fraction",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3391,14 +3402,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -3928,7 +3939,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/filter/delimit_fraction",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"singlecpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'filter/delimit_fraction'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3'
version = 'v4.1.0'
description = 'Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n'
author = 'Dries Schaumont'
}

View File

@@ -48,7 +48,7 @@
},
"output_compression": {
"type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [
"gzip",

View File

@@ -1,6 +1,6 @@
name: "do_filter"
namespace: "filter"
version: "v4.0.3"
version: "v4.1.0"
authors:
- name: "Robrecht Cannoodt"
roles:
@@ -79,8 +79,8 @@ argument_groups:
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
@@ -193,7 +193,7 @@ engines:
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "v4.0.3"
target_tag: "v4.1.0"
namespace_separator: "/"
setup:
- type: "apt"
@@ -203,9 +203,10 @@ engines:
- type: "python"
user: false
packages:
- "anndata~=0.12.7"
- "anndata~=0.12.16"
- "awkward"
- "mudata~=0.3.2"
- "scipy~=1.17.1"
- "mudata~=0.3.8"
script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -215,7 +216,7 @@ engines:
- type: "python"
user: false
packages:
- "viashpy==0.8.0"
- "viashpy==0.10.0"
upgrade: true
entrypoint: []
cmd: null
@@ -227,12 +228,12 @@ build_info:
engine: "docker|native"
output: "target/nextflow/filter/do_filter"
executable: "target/nextflow/filter/do_filter/main.nf"
viash_version: "0.9.4"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config:
name: "openpipeline"
version: "v4.0.3"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -253,7 +254,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.4"
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
@@ -262,7 +263,7 @@ package_config:
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"

View File

@@ -1,6 +1,6 @@
// do_filter v4.0.3
// do_filter v4.1.0
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive.
//
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile)
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1
def header = null
while (br.ready() && header == null) {
def line = br.readLine()
row++
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
while (header == null && (line = br.readLine()) != null) {
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect { field ->
def m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
output.add(dataMap)
row++
}
assert header != null : "CSV file should contain a header"
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
}
}
@@ -1709,10 +1704,25 @@ process publishFilesProc {
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
]
} else {
// no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{
"name" : "do_filter",
"namespace" : "filter",
"version" : "v4.0.3",
"version" : "v4.1.0",
"authors" : [
{
"name" : "Robrecht Cannoodt",
@@ -3137,7 +3147,7 @@ meta = [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
@@ -3282,7 +3292,7 @@ meta = [
"id" : "docker",
"image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3",
"target_tag" : "v4.1.0",
"namespace_separator" : "/",
"setup" : [
{
@@ -3296,9 +3306,10 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"anndata~=0.12.7",
"anndata~=0.12.16",
"awkward",
"mudata~=0.3.2"
"scipy~=1.17.1",
"mudata~=0.3.8"
],
"script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3311,7 +3322,7 @@ meta = [
"type" : "python",
"user" : false,
"packages" : [
"viashpy==0.8.0"
"viashpy==0.10.0"
],
"upgrade" : true
}
@@ -3327,13 +3338,13 @@ meta = [
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/filter/do_filter",
"viash_version" : "0.9.4",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
"viash_version" : "0.9.7",
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
},
"package_config" : {
"name" : "openpipeline",
"version" : "v4.0.3",
"version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : {
@@ -3351,14 +3362,14 @@ meta = [
}
]
},
"viash_version" : "0.9.4",
"viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target",
"config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
],
"keywords" : [
"single-cell",
@@ -3851,7 +3862,7 @@ meta["defaults"] = [
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/filter/do_filter",
"tag" : "v4.0.3"
"tag" : "v4.1.0"
},
"label" : [
"singlecpu",

Some files were not shown because too many files have changed in this diff Show More