Build branch openpipeline_spatial/main with version main to openpipeline_spatial on branch main (4449aa6)

Build pipeline: openpipelines-bio.openpipeline-spatial.main-rbsrs

Source commit: 4449aa6094

Source message: Merge pull request #56 from openpipelines-bio/op_v4_1_0

Update OpenPipeline dependency to v4.1.0
This commit is contained in:
CI
2026-06-01 14:56:23 +00:00
parent 55eb76cde5
commit 4ba32ce343
366 changed files with 15045 additions and 4149 deletions

View File

@@ -8,10 +8,24 @@
* `convert/from_h5mu_to_seurat_with_fov`: Added converter component for H5MU data to Seurat objects with spatial FOV (PR #51). * `convert/from_h5mu_to_seurat_with_fov`: Added converter component for H5MU data to Seurat objects with spatial FOV (PR #51).
## MAJOR CHANGES
* Pin OpenPipeline dependency to v4.1.0 (PR #56).
## MINOR CHANGES ## MINOR CHANGES
* Pin ome-zarr to 0.13.0 to avoid a chunk shape incompatibility with zarr 3.x (PR #48). * Pin ome-zarr to 0.13.0 to avoid a chunk shape incompatibility with zarr 3.x (PR #48).
* Bump Viash to 0.9.7 (PR #57).
* Bump anndata to 0.12.16 (PR #57).
* Bump mudata to 0.3.8 (PR #57).
* Bump scanpy to 1.11.4 (PR #57)
* Testing: bump viashpy to 0.10.0 (PR #57).
## BUG FIXES ## BUG FIXES
* `convert/from_h5mu_to_spatialdata`: Make sure the AnnData table is properly parsed before inserting into the new SpatialData object (PR #53). * `convert/from_h5mu_to_spatialdata`: Make sure the AnnData table is properly parsed before inserting into the new SpatialData object (PR #53).

View File

@@ -1,4 +1,4 @@
viash_version: 0.9.4 viash_version: 0.9.7
source: src source: src
target: target target: target
name: openpipeline_spatial name: openpipeline_spatial
@@ -10,7 +10,7 @@ repositories:
- name: openpipeline - name: openpipeline
repo: openpipeline repo: openpipeline
type: vsh type: vsh
tag: v4.0.3 tag: v4.1.0
info: info:
test_resources: test_resources:
- type: s3 - type: s3

View File

@@ -1,3 +1,4 @@
packages: packages:
- anndata~=0.12.7 - anndata~=0.12.16
- awkward - awkward
- scipy~=1.17.1 # Exclude scipy 1.17.0 because https://github.com/scverse/anndata/issues/339

View File

@@ -1,5 +1,5 @@
__merge__: [/src/base/requirements/anndata.yaml, .] __merge__: [/src/base/requirements/anndata.yaml, .]
packages: packages:
- mudata~=0.3.2 - mudata~=0.3.8
script: | script: |
exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2") exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")

View File

@@ -1,2 +1,2 @@
packages: packages:
- scanpy~=1.10.4 - scanpy~=1.11.4

View File

@@ -1,2 +1,2 @@
packages: packages:
- viashpy==0.9.0 - viashpy==0.10.0

View File

@@ -144,6 +144,15 @@ argument_groups:
type: integer type: integer
description: Minimum of non-zero values per protein. description: Minimum of non-zero values per protein.
- name: "Cross-modality filtering"
arguments:
- name: "--intersect_obs"
type: boolean_true
description: |
After per-modality filtering and multisample processing, remove observations
that are not present in all processed modalities so that each modality shares
the same set of cells.
- name: "Highly variable features detection" - name: "Highly variable features detection"
arguments: arguments:
- name: "--highly_variable_features_var_output" - name: "--highly_variable_features_var_output"

View File

@@ -34,6 +34,7 @@ workflow run_wf {
"prot_min_proteins_per_cell": state.prot_min_proteins_per_cell, "prot_min_proteins_per_cell": state.prot_min_proteins_per_cell,
"prot_max_proteins_per_cell": state.prot_max_proteins_per_cell, "prot_max_proteins_per_cell": state.prot_max_proteins_per_cell,
"prot_min_cells_per_protein": state.prot_min_cells_per_protein, "prot_min_cells_per_protein": state.prot_min_cells_per_protein,
"intersect_obs": state.intersect_obs,
"highly_variable_features_var_output": state.highly_variable_features_var_output, "highly_variable_features_var_output": state.highly_variable_features_var_output,
"highly_variable_features_obs_batch_key": state.highly_variable_features_obs_batch_key, "highly_variable_features_obs_batch_key": state.highly_variable_features_obs_batch_key,
"var_gene_names": state.var_gene_names, "var_gene_names": state.var_gene_names,

View File

@@ -112,7 +112,7 @@ repositories:
- type: "vsh" - type: "vsh"
name: "openpipeline" name: "openpipeline"
repo: "openpipeline" repo: "openpipeline"
tag: "v4.0.3" tag: "v4.1.0"
links: links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial" repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io" docker_registry: "ghcr.io"
@@ -201,7 +201,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "scanpy~=1.10.4" - "scanpy~=1.11.4"
- "squidpy~=1.8.1" - "squidpy~=1.8.1"
upgrade: true upgrade: true
test_setup: test_setup:
@@ -212,7 +212,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.9.0" - "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
@@ -226,8 +226,8 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/_private/executable/filter/subset_cosmx" output: "target/_private/executable/filter/subset_cosmx"
executable: "target/_private/executable/filter/subset_cosmx/subset_cosmx" executable: "target/_private/executable/filter/subset_cosmx/subset_cosmx"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "e087099616271bd0f11e825b7817ec14ebef62d7" git_commit: "4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config: package_config:
name: "openpipeline_spatial" name: "openpipeline_spatial"
@@ -241,8 +241,8 @@ package_config:
- type: "vsh" - type: "vsh"
name: "openpipeline" name: "openpipeline"
repo: "openpipeline" repo: "openpipeline"
tag: "v4.0.3" tag: "v4.1.0"
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:

View File

@@ -2,7 +2,7 @@
# subset_cosmx main # subset_cosmx main
# #
# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive. # Intuitive.
# #
@@ -454,13 +454,13 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \ RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "scanpy~=1.10.4" "squidpy~=1.8.1" pip install --upgrade --no-cache-dir "scanpy~=1.11.4" "squidpy~=1.8.1"
LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz"
LABEL org.opencontainers.image.description="Companion container for running component filter subset_cosmx" LABEL org.opencontainers.image.description="Companion container for running component filter subset_cosmx"
LABEL org.opencontainers.image.created="2026-05-22T11:53:11Z" LABEL org.opencontainers.image.created="2026-06-01T12:50:40Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="e087099616271bd0f11e825b7817ec14ebef62d7" LABEL org.opencontainers.image.revision="4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
LABEL org.opencontainers.image.version="main" LABEL org.opencontainers.image.version="main"
VIASHDOCKER VIASHDOCKER

View File

@@ -112,7 +112,7 @@ repositories:
- type: "vsh" - type: "vsh"
name: "openpipeline" name: "openpipeline"
repo: "openpipeline" repo: "openpipeline"
tag: "v4.0.3" tag: "v4.1.0"
links: links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial" repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io" docker_registry: "ghcr.io"
@@ -201,7 +201,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "scanpy~=1.10.4" - "scanpy~=1.11.4"
- "squidpy~=1.8.1" - "squidpy~=1.8.1"
upgrade: true upgrade: true
test_setup: test_setup:
@@ -212,7 +212,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.9.0" - "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
@@ -226,8 +226,8 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/_private/nextflow/filter/subset_cosmx" output: "target/_private/nextflow/filter/subset_cosmx"
executable: "target/_private/nextflow/filter/subset_cosmx/main.nf" executable: "target/_private/nextflow/filter/subset_cosmx/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "e087099616271bd0f11e825b7817ec14ebef62d7" git_commit: "4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config: package_config:
name: "openpipeline_spatial" name: "openpipeline_spatial"
@@ -241,8 +241,8 @@ package_config:
- type: "vsh" - type: "vsh"
name: "openpipeline" name: "openpipeline"
repo: "openpipeline" repo: "openpipeline"
tag: "v4.0.3" tag: "v4.1.0"
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:

View File

@@ -1,6 +1,6 @@
// subset_cosmx main // subset_cosmx main
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1354,47 +1354,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1710,10 +1705,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3187,7 +3197,7 @@ meta = [
"type" : "vsh", "type" : "vsh",
"name" : "openpipeline", "name" : "openpipeline",
"repo" : "openpipeline", "repo" : "openpipeline",
"tag" : "v4.0.3" "tag" : "v4.1.0"
} }
], ],
"links" : { "links" : {
@@ -3295,7 +3305,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"scanpy~=1.10.4", "scanpy~=1.11.4",
"squidpy~=1.8.1" "squidpy~=1.8.1"
], ],
"upgrade" : true "upgrade" : true
@@ -3313,7 +3323,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.9.0" "viashpy==0.10.0"
], ],
"github" : [ "github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3332,8 +3342,8 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/_private/nextflow/filter/subset_cosmx", "output" : "/workdir/root/repo/target/_private/nextflow/filter/subset_cosmx",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "e087099616271bd0f11e825b7817ec14ebef62d7", "git_commit" : "4449aa6094a686af0a5fbe99b9aaafd01ae75aff",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
}, },
"package_config" : { "package_config" : {
@@ -3353,10 +3363,10 @@ meta = [
"type" : "vsh", "type" : "vsh",
"name" : "openpipeline", "name" : "openpipeline",
"repo" : "openpipeline", "repo" : "openpipeline",
"tag" : "v4.0.3" "tag" : "v4.1.0"
} }
], ],
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [

View File

@@ -48,7 +48,7 @@ repositories:
- type: "vsh" - type: "vsh"
name: "openpipeline" name: "openpipeline"
repo: "openpipeline" repo: "openpipeline"
tag: "v4.0.3" tag: "v4.1.0"
links: links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial" repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io" docker_registry: "ghcr.io"
@@ -135,10 +135,11 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "viashpy==0.9.0" - "mudata~=0.3.8"
- "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
script: script:
@@ -156,8 +157,8 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/_test/executable/test_workflows/ingestion/spaceranger_mapping_test" output: "target/_test/executable/test_workflows/ingestion/spaceranger_mapping_test"
executable: "target/_test/executable/test_workflows/ingestion/spaceranger_mapping_test/spaceranger_mapping_test" executable: "target/_test/executable/test_workflows/ingestion/spaceranger_mapping_test/spaceranger_mapping_test"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "e087099616271bd0f11e825b7817ec14ebef62d7" git_commit: "4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config: package_config:
name: "openpipeline_spatial" name: "openpipeline_spatial"
@@ -171,8 +172,8 @@ package_config:
- type: "vsh" - type: "vsh"
name: "openpipeline" name: "openpipeline"
repo: "openpipeline" repo: "openpipeline"
tag: "v4.0.3" tag: "v4.1.0"
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:

View File

@@ -2,7 +2,7 @@
# spaceranger_mapping_test main # spaceranger_mapping_test main
# #
# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
# Intuitive. # Intuitive.
# #
@@ -453,15 +453,15 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip && \ RUN pip install --upgrade pip && \
pip install --upgrade --no-cache-dir "anndata~=0.12.7" "awkward" "mudata~=0.3.2" "viashpy==0.9.0" && \ pip install --upgrade --no-cache-dir "anndata~=0.12.16" "awkward" "scipy~=1.17.1" "mudata~=0.3.8" "viashpy==0.10.0" && \
pip install --upgrade --no-cache-dir "git+https://github.com/openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" && \ pip install --upgrade --no-cache-dir "git+https://github.com/openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" && \
python -c 'exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")' python -c 'exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")'
LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component test_workflows/ingestion spaceranger_mapping_test" LABEL org.opencontainers.image.description="Companion container for running component test_workflows/ingestion spaceranger_mapping_test"
LABEL org.opencontainers.image.created="2026-05-22T11:53:09Z" LABEL org.opencontainers.image.created="2026-06-01T12:50:39Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="e087099616271bd0f11e825b7817ec14ebef62d7" LABEL org.opencontainers.image.revision="4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
LABEL org.opencontainers.image.version="main" LABEL org.opencontainers.image.version="main"
VIASHDOCKER VIASHDOCKER

View File

@@ -48,7 +48,7 @@ repositories:
- type: "vsh" - type: "vsh"
name: "openpipeline" name: "openpipeline"
repo: "openpipeline" repo: "openpipeline"
tag: "v4.0.3" tag: "v4.1.0"
links: links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial" repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io" docker_registry: "ghcr.io"
@@ -135,10 +135,11 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "viashpy==0.9.0" - "mudata~=0.3.8"
- "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
script: script:
@@ -156,8 +157,8 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test" output: "target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test"
executable: "target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test/main.nf" executable: "target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "e087099616271bd0f11e825b7817ec14ebef62d7" git_commit: "4449aa6094a686af0a5fbe99b9aaafd01ae75aff"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config: package_config:
name: "openpipeline_spatial" name: "openpipeline_spatial"
@@ -171,8 +172,8 @@ package_config:
- type: "vsh" - type: "vsh"
name: "openpipeline" name: "openpipeline"
repo: "openpipeline" repo: "openpipeline"
tag: "v4.0.3" tag: "v4.1.0"
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:

View File

@@ -1,6 +1,6 @@
// spaceranger_mapping_test main // spaceranger_mapping_test main
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3104,7 +3114,7 @@ meta = [
"type" : "vsh", "type" : "vsh",
"name" : "openpipeline", "name" : "openpipeline",
"repo" : "openpipeline", "repo" : "openpipeline",
"tag" : "v4.0.3" "tag" : "v4.1.0"
} }
], ],
"links" : { "links" : {
@@ -3209,10 +3219,11 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2", "scipy~=1.17.1",
"viashpy==0.9.0" "mudata~=0.3.8",
"viashpy==0.10.0"
], ],
"github" : [ "github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3234,8 +3245,8 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test", "output" : "/workdir/root/repo/target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "e087099616271bd0f11e825b7817ec14ebef62d7", "git_commit" : "4449aa6094a686af0a5fbe99b9aaafd01ae75aff",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
}, },
"package_config" : { "package_config" : {
@@ -3255,10 +3266,10 @@ meta = [
"type" : "vsh", "type" : "vsh",
"name" : "openpipeline", "name" : "openpipeline",
"repo" : "openpipeline", "repo" : "openpipeline",
"tag" : "v4.0.3" "tag" : "v4.1.0"
} }
], ],
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [

View File

@@ -1,31 +0,0 @@
def subset_vars(adata, subset_col):
"""Subset AnnData object on highly variable genes
Parameters
----------
adata : AnnData
Annotated data object
subset_col : str
Name of the boolean column in `adata.var` that contains the information if features should be used or not
Returns
-------
AnnData
Copy of `adata` with subsetted features
"""
if subset_col not in adata.var.columns:
raise ValueError(
f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
)
if adata.var[subset_col].dtype == "boolean":
assert adata.var[subset_col].isna().sum() == 0, (
f"The .var column `{subset_col}` contains NaN values. Can not subset data."
)
adata.var[subset_col] = adata.var[subset_col].astype("bool")
assert adata.var[subset_col].dtype == "bool", (
f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
)
return adata[:, adata.var[subset_col]].copy()

View File

@@ -0,0 +1,567 @@
name: "process_singlesample_base"
namespace: "workflows/multiomics"
version: "v4.1.0"
authors:
- name: "Dorien Roosen"
roles:
- "author"
info:
role: "Core Team Member"
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Output State"
arguments:
- type: "string"
name: "--output_modality"
info: null
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Path to the sample."
info: null
example:
- "input.h5mu"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--rna_layer"
description: "Input layer for the gene expression modality. If not specified,\
\ .X is used."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--prot_layer"
description: "Input layer for the antibody capture modality. If not specified,\
\ .X is used."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--gdo_layer"
description: "Input layer for the guide-derived oligonucleotide (GDO) data. If\
\ not specified, .X is used."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Destination path to the output."
info: null
example:
- "output.h5mu"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- name: "Sample ID options"
description: "Options for adding the id to .obs on the MuData object. Having a sample\
\ \nid present in a requirement of several components for this pipeline.\n"
arguments:
- type: "boolean"
name: "--add_id_to_obs"
description: "Add the value passed with --id to .obs."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--add_id_obs_output"
description: ".Obs column to add the sample IDs to. Required and only used when\
\ \n--add_id_to_obs is set to 'true'\n"
info: null
default:
- "sample_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--add_id_make_observation_keys_unique"
description: "Join the id to the .obs index (.obs_names). \nOnly used when --add_id_to_obs\
\ is set to 'true'.\n"
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "RNA filtering options"
arguments:
- type: "integer"
name: "--rna_min_counts"
description: "Minimum number of counts captured per cell."
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--rna_max_counts"
description: "Maximum number of counts captured per cell."
info: null
example:
- 5000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--rna_min_genes_per_cell"
description: "Minimum of non-zero values per cell."
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--rna_max_genes_per_cell"
description: "Maximum of non-zero values per cell."
info: null
example:
- 1500000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--rna_min_cells_per_gene"
description: "Minimum of non-zero values per gene."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--rna_min_fraction_mito"
description: "Minimum fraction of UMIs that are mitochondrial."
info: null
example:
- 0.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--rna_max_fraction_mito"
description: "Maximum fraction of UMIs that are mitochondrial."
info: null
example:
- 0.2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--rna_min_fraction_ribo"
description: "Minimum fraction of UMIs that are mitochondrial."
info: null
example:
- 0.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--rna_max_fraction_ribo"
description: "Maximum fraction of UMIs that are mitochondrial."
info: null
example:
- 0.2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--skip_scrublet_doublet_detection"
description: "Skip the scrublet doublet detection step."
info: null
direction: "input"
- name: "CITE-seq filtering options"
arguments:
- type: "integer"
name: "--prot_min_counts"
description: "Minimum number of counts per cell."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--prot_max_counts"
description: "Minimum number of counts per cell."
info: null
example:
- 5000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--prot_min_proteins_per_cell"
description: "Minimum of non-zero values per cell."
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--prot_max_proteins_per_cell"
description: "Maximum of non-zero values per cell."
info: null
example:
- 100000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--prot_min_cells_per_protein"
description: "Minimum of non-zero values per protein."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "GDO filtering options"
arguments:
- type: "integer"
name: "--gdo_min_counts"
description: "Minimum number of counts per cell."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gdo_max_counts"
description: "Minimum number of counts per cell."
info: null
example:
- 5000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gdo_min_guides_per_cell"
description: "Minimum of non-zero values per cell."
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gdo_max_guides_per_cell"
description: "Maximum of non-zero values per cell."
info: null
example:
- 100000000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gdo_min_cells_per_guide"
description: "Minimum of non-zero values per guide."
info: null
example:
- 3
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Cross-modality filtering"
arguments:
- type: "boolean_true"
name: "--intersect_obs"
description: "After per-modality filtering, remove observations that are not present\n\
in all processed modalities so that each modality shares the same set of cells.\n"
info: null
direction: "input"
- name: "Mitochondrial & Ribosomal Gene Detection"
arguments:
- type: "string"
name: "--var_gene_names"
description: ".var column name to be used to detect mitochondrial/ribosomal genes\
\ instead of .var_names (default if not set).\nGene names matching with the\
\ regex value from --mitochondrial_gene_regex or --ribosomal_gene_regex will\
\ be \nidentified as mitochondrial or ribosomal genes, respectively. \n"
info: null
example:
- "gene_symbol"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--var_name_mitochondrial_genes"
description: "In which .var slot to store a boolean array corresponding the mitochondrial\
\ genes.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--obs_name_mitochondrial_fraction"
description: "When specified, write the fraction of counts originating from mitochondrial\
\ genes \n(based on --mitochondrial_gene_regex) to an .obs column with the specified\
\ name.\nRequires --var_name_mitochondrial_genes.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--mitochondrial_gene_regex"
description: "Regex string that identifies mitochondrial genes from --var_gene_names.\n\
By default will detect human and mouse mitochondrial genes from a gene symbol.\n"
info: null
default:
- "^[mM][tT]-"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--var_name_ribosomal_genes"
description: "In which .var slot to store a boolean array corresponding the ribosomal\
\ genes.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--obs_name_ribosomal_fraction"
description: "When specified, write the fraction of counts originating from ribosomal\
\ genes \n(based on --ribosomal_gene_regex) to an .obs column with the specified\
\ name.\nRequires --var_name_ribosomal_genes.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--ribosomal_gene_regex"
description: "Regex string that identifies ribosomal genes from --var_gene_names.\n\
By default will detect human and mouse ribosomal genes from a gene symbol.\n"
info: null
default:
- "^[Mm]?[Rr][Pp][LlSs]"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
- type: "file"
path: "utils"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "A pipeline to analyse a single multiomics sample."
info: null
status: "enabled"
scope:
image: "private"
target: "private"
dependencies:
- name: "metadata/add_id"
repository:
type: "local"
- name: "workflows/multiomics/split_modalities"
alias: "split_modalities_workflow"
repository:
type: "local"
- name: "workflows/rna/rna_singlesample"
repository:
type: "local"
- name: "workflows/prot/prot_singlesample"
repository:
type: "local"
- name: "workflows/gdo/gdo_singlesample"
repository:
type: "local"
license: "MIT"
links:
repository: "https://github.com/openpipelines-bio/openpipeline"
docker_registry: "ghcr.io"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/multiomics/process_singlesample_base/config.vsh.yaml"
runner: "nextflow"
engine: "native"
output: "target/_private/nextflow/workflows/multiomics/process_singlesample_base"
executable: "target/_private/nextflow/workflows/multiomics/process_singlesample_base/main.nf"
viash_version: "0.9.7"
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline"
dependencies:
- "target/nextflow/metadata/add_id"
- "target/_private/nextflow/workflows/multiomics/split_modalities"
- "target/nextflow/workflows/rna/rna_singlesample"
- "target/nextflow/workflows/prot/prot_singlesample"
- "target/nextflow/workflows/gdo/gdo_singlesample"
package_config:
name: "openpipeline"
version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
\nIn terms of workflows, the following has been made available, but keep in mind\
\ that\nindividual tools and functionality can be executed as standalone components\
\ as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
\ * Ingestion: Read mapping and generating a count matrix.\n * Single sample\
\ processing: cell filtering and doublet detection.\n * Multisample processing:\
\ Count transformation, normalization, QC metric calulations.\n * Integration:\
\ Clustering, integration and batch correction using single and multimodal methods.\n\
\ * Downstream analysis workflows\n"
info:
test_resources:
- type: "s3"
path: "s3://openpipelines-data"
dest: "resources_test"
nextflow_labels_ci:
- path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI."
viash_version: "0.9.7"
source: "src"
target: "target"
config_mods:
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords:
- "single-cell"
- "multimodal"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/openpipelines-bio/openpipeline"
docker_registry: "ghcr.io"
homepage: "https://openpipelines.bio"
documentation: "https://openpipelines.bio/fundamentals"
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"

View File

@@ -0,0 +1,126 @@
manifest {
name = 'workflows/multiomics/process_singlesample_base'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'v4.1.0'
description = 'A pipeline to analyse a single multiomics sample.'
author = 'Dorien Roosen'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}
includeConfig("nextflow_labels.config")

View File

@@ -1,6 +1,6 @@
name: "split_modalities" name: "split_modalities"
namespace: "workflows/multiomics" namespace: "workflows/multiomics"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries Schaumont" - name: "Dries Schaumont"
roles: roles:
@@ -182,14 +182,14 @@ build_info:
engine: "native" engine: "native"
output: "target/_private/nextflow/workflows/multiomics/split_modalities" output: "target/_private/nextflow/workflows/multiomics/split_modalities"
executable: "target/_private/nextflow/workflows/multiomics/split_modalities/main.nf" executable: "target/_private/nextflow/workflows/multiomics/split_modalities/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
dependencies: dependencies:
- "target/nextflow/dataflow/split_modalities" - "target/nextflow/dataflow/split_modalities"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -210,7 +210,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -219,7 +219,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// split_modalities v4.0.3 // split_modalities v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "split_modalities", "name" : "split_modalities",
"namespace" : "workflows/multiomics", "namespace" : "workflows/multiomics",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries Schaumont", "name" : "Dries Schaumont",
@@ -3273,13 +3283,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "native", "engine" : "native",
"output" : "/workdir/root/repo/target/_private/nextflow/workflows/multiomics/split_modalities", "output" : "/workdir/root/repo/target/_private/nextflow/workflows/multiomics/split_modalities",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3297,14 +3307,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'workflows/multiomics/split_modalities' name = 'workflows/multiomics/split_modalities'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'A pipeline to split a multimodal mudata files into several unimodal mudata files.' description = 'A pipeline to split a multimodal mudata files into several unimodal mudata files.'
author = 'Dries Schaumont' author = 'Dries Schaumont'
} }

View File

@@ -1,6 +1,6 @@
name: "log_normalize" name: "log_normalize"
namespace: "workflows/rna" namespace: "workflows/rna"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries Schaumont" - name: "Dries Schaumont"
roles: roles:
@@ -197,8 +197,8 @@ build_info:
engine: "native" engine: "native"
output: "target/_private/nextflow/workflows/rna/log_normalize" output: "target/_private/nextflow/workflows/rna/log_normalize"
executable: "target/_private/nextflow/workflows/rna/log_normalize/main.nf" executable: "target/_private/nextflow/workflows/rna/log_normalize/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
dependencies: dependencies:
- "target/nextflow/transform/normalize_total" - "target/nextflow/transform/normalize_total"
@@ -206,7 +206,7 @@ build_info:
- "target/nextflow/transform/delete_layer" - "target/nextflow/transform/delete_layer"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -227,7 +227,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -236,7 +236,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// log_normalize v4.0.3 // log_normalize v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "log_normalize", "name" : "log_normalize",
"namespace" : "workflows/rna", "namespace" : "workflows/rna",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries Schaumont", "name" : "Dries Schaumont",
@@ -3293,13 +3303,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "native", "engine" : "native",
"output" : "/workdir/root/repo/target/_private/nextflow/workflows/rna/log_normalize", "output" : "/workdir/root/repo/target/_private/nextflow/workflows/rna/log_normalize",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3317,14 +3327,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'workflows/rna/log_normalize' name = 'workflows/rna/log_normalize'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'Performs normalization and subsequent log-transformation of raw count data.' description = 'Performs normalization and subsequent log-transformation of raw count data.'
author = 'Dries Schaumont' author = 'Dries Schaumont'
} }

View File

@@ -1,6 +1,6 @@
name: "leiden" name: "leiden"
namespace: "cluster" namespace: "cluster"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries De Maeyer" - name: "Dries De Maeyer"
roles: roles:
@@ -77,6 +77,41 @@ argument_groups:
direction: "input" direction: "input"
multiple: false multiple: false
multiple_sep: ";" multiple_sep: ";"
- type: "string"
name: "--flavor"
description: "Which package's implementation to use.\n"
info: null
default:
- "leidenalg"
required: false
choices:
- "leidenalg"
- "igraph"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_iterations"
description: "How many iterations of the Leiden clustering algorithm to perform.\n\
When defined, positive values above 2 define the total number of iterations\
\ to perform.\nWhen not set, the algorithm will run until it reaches its optimal\
\ clustering.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--seed"
description: "Fix the initialization of the optimization. Can be used to increase\
\ reproducibility.\n"
info: null
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double" - type: "double"
name: "--resolution" name: "--resolution"
description: "A parameter value controlling the coarseness of the clustering.\ description: "A parameter value controlling the coarseness of the clustering.\
@@ -91,8 +126,8 @@ argument_groups:
multiple_sep: ";" multiple_sep: ";"
- type: "string" - type: "string"
name: "--output_compression" name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ description: "Compression format to use for the output AnnData and/or Mudata H5\
By default no compression is applied.\n" \ files.\nBy default no compression is applied.\n"
info: null info: null
example: example:
- "gzip" - "gzip"
@@ -215,7 +250,7 @@ engines:
id: "docker" id: "docker"
image: "python:3.13-slim" image: "python:3.13-slim"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "apt" - type: "apt"
@@ -225,11 +260,12 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
- "scanpy~=1.11.4" - "scanpy~=1.11.4"
- "leidenalg~=0.10.0" - "leidenalg~=0.11.0"
script: script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\ - "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\ \ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -243,7 +279,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
@@ -257,12 +293,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/cluster/leiden" output: "target/nextflow/cluster/leiden"
executable: "target/nextflow/cluster/leiden/main.nf" executable: "target/nextflow/cluster/leiden/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -283,7 +319,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -292,7 +328,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// leiden v4.0.3 // leiden v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "leiden", "name" : "leiden",
"namespace" : "cluster", "namespace" : "cluster",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries De Maeyer", "name" : "Dries De Maeyer",
@@ -3133,6 +3143,42 @@ meta = [
"multiple" : false, "multiple" : false,
"multiple_sep" : ";" "multiple_sep" : ";"
}, },
{
"type" : "string",
"name" : "--flavor",
"description" : "Which package's implementation to use.\n",
"default" : [
"leidenalg"
],
"required" : false,
"choices" : [
"leidenalg",
"igraph"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--n_iterations",
"description" : "How many iterations of the Leiden clustering algorithm to perform.\nWhen defined, positive values above 2 define the total number of iterations to perform.\nWhen not set, the algorithm will run until it reaches its optimal clustering.\n",
"required" : false,
"min" : 1,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--seed",
"description" : "Fix the initialization of the optimization. Can be used to increase reproducibility.\n",
"required" : false,
"min" : 0,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{ {
"type" : "double", "type" : "double",
"name" : "--resolution", "name" : "--resolution",
@@ -3148,7 +3194,7 @@ meta = [
{ {
"type" : "string", "type" : "string",
"name" : "--output_compression", "name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [ "example" : [
"gzip" "gzip"
], ],
@@ -3294,7 +3340,7 @@ meta = [
"id" : "docker", "id" : "docker",
"image" : "python:3.13-slim", "image" : "python:3.13-slim",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
@@ -3308,11 +3354,12 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2", "scipy~=1.17.1",
"mudata~=0.3.8",
"scanpy~=1.11.4", "scanpy~=1.11.4",
"leidenalg~=0.10.0" "leidenalg~=0.11.0"
], ],
"script" : [ "script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")" "exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3332,7 +3379,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
], ],
"github" : [ "github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3351,13 +3398,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/cluster/leiden", "output" : "/workdir/root/repo/target/nextflow/cluster/leiden",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3375,14 +3422,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -3416,7 +3463,6 @@ import os
import time import time
import logging import logging
import logging.handlers import logging.handlers
import warnings
import h5py import h5py
import mudata as mu import mudata as mu
import pandas as pd import pandas as pd
@@ -3438,6 +3484,9 @@ par = {
'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'obsm_name': $( if [ ! -z ${VIASH_PAR_OBSM_NAME+x} ]; then echo "r'${VIASH_PAR_OBSM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'obsm_name': $( if [ ! -z ${VIASH_PAR_OBSM_NAME+x} ]; then echo "r'${VIASH_PAR_OBSM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'flavor': $( if [ ! -z ${VIASH_PAR_FLAVOR+x} ]; then echo "r'${VIASH_PAR_FLAVOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'n_iterations': $( if [ ! -z ${VIASH_PAR_N_ITERATIONS+x} ]; then echo "int(r'${VIASH_PAR_N_ITERATIONS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'resolution': $( if [ ! -z ${VIASH_PAR_RESOLUTION+x} ]; then echo "list(map(float, r'${VIASH_PAR_RESOLUTION//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ), 'resolution': $( if [ ! -z ${VIASH_PAR_RESOLUTION+x} ]; then echo "list(map(float, r'${VIASH_PAR_RESOLUTION//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ),
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
} }
@@ -3473,6 +3522,9 @@ from compress_h5mu import compress_h5mu
_shared_logger_name = "leiden" _shared_logger_name = "leiden"
if not par["n_iterations"]:
par["n_iterations"] = -1
# Function to check available space in /dev/shm # Function to check available space in /dev/shm
def get_available_shared_memory(): def get_available_shared_memory():
@@ -3571,18 +3623,18 @@ def run_single_resolution(shared_csr_matrix, obs_names, resolution):
try: try:
connectivities = shared_csr_matrix.to_csr_matrix() connectivities = shared_csr_matrix.to_csr_matrix()
adata = create_empty_anndata_with_connectivities(connectivities, obs_names) adata = create_empty_anndata_with_connectivities(connectivities, obs_names)
with warnings.catch_warnings(): sc.tl.leiden(
# In the future, the default backend for leiden will be igraph instead of leidenalg. adata,
warnings.simplefilter(action="ignore", category=FutureWarning) resolution=resolution,
adata_out = sc.tl.leiden( key_added=str(resolution),
adata, obsp="connectivities",
resolution=resolution, flavor=par["flavor"],
key_added=str(resolution), n_iterations=par["n_iterations"],
obsp="connectivities", random_state=par["seed"],
copy=True, copy=False, # A copy was already created above
) )
logger.info(f"Returning result for resolution {resolution}") logger.info(f"Returning result for resolution {resolution}")
return adata_out.obs[str(resolution)] return adata.obs[str(resolution)]
finally: finally:
obs_names.shm.close() obs_names.shm.close()
shared_csr_matrix.close() shared_csr_matrix.close()
@@ -4147,7 +4199,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/cluster/leiden", "image" : "vsh/openpipeline/cluster/leiden",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"highcpu", "highcpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'cluster/leiden' name = 'cluster/leiden'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'Cluster cells using the [Leiden algorithm] [Traag18] implemented in the [Scanpy framework] [Wolf18]. \nLeiden is an improved version of the [Louvain algorithm] [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15] [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\n[Blondel08]: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \n[Levine15]: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \n[Traag18]: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \n[Wolf18]: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n' description = 'Cluster cells using the [Leiden algorithm] [Traag18] implemented in the [Scanpy framework] [Wolf18]. \nLeiden is an improved version of the [Louvain algorithm] [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15] [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\n[Blondel08]: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \n[Levine15]: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \n[Traag18]: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \n[Wolf18]: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n'
author = 'Dries De Maeyer' author = 'Dries De Maeyer'
} }

View File

@@ -41,6 +41,26 @@
"help_text": "Type: `string`, multiple: `False`, default: `\"leiden\"`. ", "help_text": "Type: `string`, multiple: `False`, default: `\"leiden\"`. ",
"default": "leiden" "default": "leiden"
}, },
"flavor": {
"type": "string",
"description": "Which package's implementation to use.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"leidenalg\"`, choices: ``leidenalg`, `igraph``. ",
"enum": [
"leidenalg",
"igraph"
],
"default": "leidenalg"
},
"n_iterations": {
"type": "integer",
"description": "How many iterations of the Leiden clustering algorithm to perform.\nWhen defined, positive values above 2 define the total number of iterations to perform.\nWhen not set, the algorithm will run until it reaches its optimal clustering.\n",
"help_text": "Type: `integer`, multiple: `False`. "
},
"seed": {
"type": "integer",
"description": "Fix the initialization of the optimization",
"help_text": "Type: `integer`, multiple: `False`. "
},
"resolution": { "resolution": {
"type": "array", "type": "array",
"items": { "items": {
@@ -54,7 +74,7 @@
}, },
"output_compression": { "output_compression": {
"type": "string", "type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ", "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [ "enum": [
"gzip", "gzip",

View File

@@ -1,6 +1,6 @@
name: "concatenate_h5mu" name: "concatenate_h5mu"
namespace: "dataflow" namespace: "dataflow"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries Schaumont" - name: "Dries Schaumont"
roles: roles:
@@ -136,8 +136,8 @@ argument_groups:
multiple_sep: ";" multiple_sep: ";"
- type: "string" - type: "string"
name: "--output_compression" name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ description: "Compression format to use for the output AnnData and/or Mudata H5\
By default no compression is applied.\n" \ files.\nBy default no compression is applied.\n"
info: null info: null
example: example:
- "gzip" - "gzip"
@@ -253,7 +253,7 @@ engines:
id: "docker" id: "docker"
image: "python:3.13-slim" image: "python:3.13-slim"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "apt" - type: "apt"
@@ -263,9 +263,10 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
script: script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\ - "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\ \ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -279,14 +280,14 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
- "pytest-benchmark" - "pytest-benchmark"
upgrade: true upgrade: true
entrypoint: [] entrypoint: []
@@ -299,12 +300,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/dataflow/concatenate_h5mu" output: "target/nextflow/dataflow/concatenate_h5mu"
executable: "target/nextflow/dataflow/concatenate_h5mu/main.nf" executable: "target/nextflow/dataflow/concatenate_h5mu/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -325,7 +326,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -334,7 +335,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// concatenate_h5mu v4.0.3 // concatenate_h5mu v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "concatenate_h5mu", "name" : "concatenate_h5mu",
"namespace" : "dataflow", "namespace" : "dataflow",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries Schaumont", "name" : "Dries Schaumont",
@@ -3179,7 +3189,7 @@ meta = [
{ {
"type" : "string", "type" : "string",
"name" : "--output_compression", "name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [ "example" : [
"gzip" "gzip"
], ],
@@ -3328,7 +3338,7 @@ meta = [
"id" : "docker", "id" : "docker",
"image" : "python:3.13-slim", "image" : "python:3.13-slim",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
@@ -3342,9 +3352,10 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2" "scipy~=1.17.1",
"mudata~=0.3.8"
], ],
"script" : [ "script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")" "exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3364,7 +3375,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
], ],
"github" : [ "github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3375,7 +3386,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0", "viashpy==0.10.0",
"pytest-benchmark" "pytest-benchmark"
], ],
"upgrade" : true "upgrade" : true
@@ -3392,13 +3403,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dataflow/concatenate_h5mu", "output" : "/workdir/root/repo/target/nextflow/dataflow/concatenate_h5mu",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3416,14 +3427,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -4266,7 +4277,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dataflow/concatenate_h5mu", "image" : "vsh/openpipeline/dataflow/concatenate_h5mu",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"midcpu", "midcpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'dataflow/concatenate_h5mu' name = 'dataflow/concatenate_h5mu'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'Concatenate observations from samples in several (uni- and/or multi-modal) MuData files into a single file.\n' description = 'Concatenate observations from samples in several (uni- and/or multi-modal) MuData files into a single file.\n'
author = 'Dries Schaumont' author = 'Dries Schaumont'
} }

View File

@@ -85,7 +85,7 @@
}, },
"output_compression": { "output_compression": {
"type": "string", "type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ", "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [ "enum": [
"gzip", "gzip",

View File

@@ -1,6 +1,6 @@
name: "merge" name: "merge"
namespace: "dataflow" namespace: "dataflow"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries Schaumont" - name: "Dries Schaumont"
roles: roles:
@@ -25,6 +25,9 @@ argument_groups:
- "-i" - "-i"
description: "Paths to the single-modality .h5mu files that need to be combined" description: "Paths to the single-modality .h5mu files that need to be combined"
info: null info: null
example:
- "/path/to/modality_1.h5mu"
- "/path/to/modality_2.h5mu"
default: default:
- "sample_paths" - "sample_paths"
must_exist: true must_exist: true
@@ -163,7 +166,7 @@ engines:
id: "docker" id: "docker"
image: "python:3.12-slim" image: "python:3.12-slim"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "apt" - type: "apt"
@@ -173,9 +176,10 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
script: script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\ - "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\ \ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -189,7 +193,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
@@ -203,12 +207,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/dataflow/merge" output: "target/nextflow/dataflow/merge"
executable: "target/nextflow/dataflow/merge/main.nf" executable: "target/nextflow/dataflow/merge/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -229,7 +233,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -238,7 +242,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// merge v4.0.3 // merge v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "merge", "name" : "merge",
"namespace" : "dataflow", "namespace" : "dataflow",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries Schaumont", "name" : "Dries Schaumont",
@@ -3071,6 +3081,10 @@ meta = [
"-i" "-i"
], ],
"description" : "Paths to the single-modality .h5mu files that need to be combined", "description" : "Paths to the single-modality .h5mu files that need to be combined",
"example" : [
"/path/to/modality_1.h5mu",
"/path/to/modality_2.h5mu"
],
"default" : [ "default" : [
"sample_paths" "sample_paths"
], ],
@@ -3246,7 +3260,7 @@ meta = [
"id" : "docker", "id" : "docker",
"image" : "python:3.12-slim", "image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
@@ -3260,9 +3274,10 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2" "scipy~=1.17.1",
"mudata~=0.3.8"
], ],
"script" : [ "script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")" "exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3282,7 +3297,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
], ],
"github" : [ "github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3301,13 +3316,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dataflow/merge", "output" : "/workdir/root/repo/target/nextflow/dataflow/merge",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3325,14 +3340,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -3847,7 +3862,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dataflow/merge", "image" : "vsh/openpipeline/dataflow/merge",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"singlecpu", "singlecpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'dataflow/merge' name = 'dataflow/merge'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'Combine one or more single-modality .h5mu files together into one .h5mu file.\n' description = 'Combine one or more single-modality .h5mu files together into one .h5mu file.\n'
author = 'Dries Schaumont' author = 'Dries Schaumont'
} }

View File

@@ -17,7 +17,7 @@
"format": "path", "format": "path",
"exists": true, "exists": true,
"description": "Paths to the single-modality .h5mu files that need to be combined", "description": "Paths to the single-modality .h5mu files that need to be combined",
"help_text": "Type: `file`, multiple: `True`, required, default: `[\"sample_paths\"]`, direction: `input`. ", "help_text": "Type: `file`, multiple: `True`, required, default: `[\"sample_paths\"]`, direction: `input`, example: `[\"/path/to/modality_1.h5mu\";\"/path/to/modality_2.h5mu\"]`. ",
"default": [ "default": [
"sample_paths" "sample_paths"
] ]

View File

@@ -1,6 +1,6 @@
name: "split_modalities" name: "split_modalities"
namespace: "dataflow" namespace: "dataflow"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries Schaumont" - name: "Dries Schaumont"
roles: roles:
@@ -42,6 +42,8 @@ argument_groups:
- "-i" - "-i"
description: "Path to a single .h5mu file." description: "Path to a single .h5mu file."
info: null info: null
example:
- "input.h5mu"
default: default:
- "sample_path" - "sample_path"
must_exist: true must_exist: true
@@ -79,8 +81,8 @@ argument_groups:
multiple_sep: ";" multiple_sep: ";"
- type: "string" - type: "string"
name: "--output_compression" name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ description: "Compression format to use for the output AnnData and/or Mudata H5\
By default no compression is applied.\n" \ files.\nBy default no compression is applied.\n"
info: null info: null
example: example:
- "gzip" - "gzip"
@@ -97,6 +99,8 @@ resources:
is_executable: true is_executable: true
- type: "file" - type: "file"
path: "setup_logger.py" path: "setup_logger.py"
- type: "file"
path: "mudata_opener.py"
- type: "file" - type: "file"
path: "nextflow_labels.config" path: "nextflow_labels.config"
dest: "nextflow_labels.config" dest: "nextflow_labels.config"
@@ -190,7 +194,7 @@ engines:
id: "docker" id: "docker"
image: "python:3.12-slim" image: "python:3.12-slim"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "apt" - type: "apt"
@@ -200,9 +204,10 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
script: script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\ - "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\ \ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -216,7 +221,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
@@ -230,12 +235,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/dataflow/split_modalities" output: "target/nextflow/dataflow/split_modalities"
executable: "target/nextflow/dataflow/split_modalities/main.nf" executable: "target/nextflow/dataflow/split_modalities/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -256,7 +261,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -265,7 +270,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// split_modalities v4.0.3 // split_modalities v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1354,47 +1354,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1710,10 +1705,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3036,7 +3046,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "split_modalities", "name" : "split_modalities",
"namespace" : "dataflow", "namespace" : "dataflow",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries Schaumont", "name" : "Dries Schaumont",
@@ -3099,6 +3109,9 @@ meta = [
"-i" "-i"
], ],
"description" : "Path to a single .h5mu file.", "description" : "Path to a single .h5mu file.",
"example" : [
"input.h5mu"
],
"default" : [ "default" : [
"sample_path" "sample_path"
], ],
@@ -3143,7 +3156,7 @@ meta = [
{ {
"type" : "string", "type" : "string",
"name" : "--output_compression", "name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [ "example" : [
"gzip" "gzip"
], ],
@@ -3169,6 +3182,10 @@ meta = [
"type" : "file", "type" : "file",
"path" : "/src/utils/setup_logger.py" "path" : "/src/utils/setup_logger.py"
}, },
{
"type" : "file",
"path" : "/src/utils/mudata_opener.py"
},
{ {
"type" : "file", "type" : "file",
"path" : "/src/workflows/utils/labels.config", "path" : "/src/workflows/utils/labels.config",
@@ -3280,7 +3297,7 @@ meta = [
"id" : "docker", "id" : "docker",
"image" : "python:3.12-slim", "image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
@@ -3294,9 +3311,10 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2" "scipy~=1.17.1",
"mudata~=0.3.8"
], ],
"script" : [ "script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")" "exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3316,7 +3334,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
], ],
"github" : [ "github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3335,13 +3353,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dataflow/split_modalities", "output" : "/workdir/root/repo/target/nextflow/dataflow/split_modalities",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3359,14 +3377,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -3395,11 +3413,14 @@ def innerWorkflowFactory(args) {
tempscript=".viash_script.py" tempscript=".viash_script.py"
cat > "$tempscript" << VIASHMAIN cat > "$tempscript" << VIASHMAIN
from __future__ import annotations from __future__ import annotations
from functools import partial
import sys import sys
import mudata as md import mudata as md
import anndata as ad
from pathlib import Path from pathlib import Path
import pandas as pd import pandas as pd
### VIASH START ### VIASH START
# The following code has been auto-generated by Viash. # The following code has been auto-generated by Viash.
par = { par = {
@@ -3436,6 +3457,7 @@ dep = {
sys.path.append(meta["resources_dir"]) sys.path.append(meta["resources_dir"])
from setup_logger import setup_logger from setup_logger import setup_logger
from mudata_opener import mudata_opener
logger = setup_logger() logger = setup_logger()
@@ -3447,36 +3469,54 @@ def main() -> None:
logger.info("Creating %s", output_dir) logger.info("Creating %s", output_dir)
output_dir.mkdir(parents=True) output_dir.mkdir(parents=True)
logger.info("Reading input file '%s'", par["input"]) input_file = Path(par["input"])
input_file = Path(par["input"].strip()) logger.info("Checking which modalities exist for '%s'", par["input"])
sample = md.read_h5mu(input_file) with mudata_opener(input_file, mode="r") as (open_mudata, input_is_zarr):
logger.info("Creating output types CSV.")
modalities = list(sample.mod.keys())
logger.info("Found the following modalities:\\\\n%s", "\\\\n".join(modalities))
names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" for mod_name in modalities}
output_files = list(names.values())
logger.info(
"Will be creating the following output .h5mu files:\\\\n%s",
"\\\\n".join(output_files),
)
df = pd.DataFrame({"name": modalities, "filename": output_files})
logger.info("Writing output_types CSV file to '%s'.", par["output_types"])
df.to_csv(par["output_types"], index=False)
logger.info("Splitting input file into unimodal output files.")
for mod_name, mod in sample.mod.items():
logger.info("Processing modality '%s'", mod_name)
new_sample = md.MuData({mod_name: mod})
logger.info( logger.info(
"Writing to '%s', with compression '%s'", "Opened %s in %s format.", par["input"], "zarr" if input_is_zarr else "h5"
names[mod_name],
par["output_compression"],
) )
new_sample.write_h5mu(
output_dir / names[mod_name], compression=par["output_compression"] modalities = list(open_mudata["mod"].keys())
logger.info("Found the following modalities:\\\\n%s", "\\\\n".join(modalities))
logger.info("Creating output types CSV.")
output_extension = "zarr" if input_is_zarr else "h5mu"
names = {
mod_name: f"{input_file.stem}_{mod_name}.{output_extension}"
for mod_name in modalities
}
output_files = list(names.values())
logger.info(
"Will be creating the following output .%s files:\\\\n%s",
output_extension,
"\\\\n".join(output_files),
) )
df = pd.DataFrame({"name": modalities, "filename": output_files})
logger.info("Writing output_types CSV file to '%s'.", par["output_types"])
df.to_csv(par["output_types"], index=False)
logger.info("Splitting input file into unimodal output files.")
for mod_name in modalities:
logger.info("Processing modality '%s'", mod_name)
elem_key = f"/mod/{mod_name}"
elem = open_mudata[elem_key]
logger.info("Reading %s", elem_key)
new_ad = ad.io.read_elem(elem)
logger.info("Creating MuData object.")
new_sample = md.MuData({mod_name: new_ad})
logger.info(
"Writing to '%s', with compression '%s'",
names[mod_name],
par["output_compression"],
)
writer = (
partial(md.MuData.write_zarr, zarr_format=3)
if input_is_zarr
else partial(
md.MuData.write_h5mu, compression=par["output_compression"]
)
)
writer(new_sample, output_dir / names[mod_name])
logger.info("Done writing output file.") logger.info("Done writing output file.")
logger.info("Finished") logger.info("Finished")
@@ -3865,7 +3905,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dataflow/split_modalities", "image" : "vsh/openpipeline/dataflow/split_modalities",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"singlecpu", "singlecpu",

View File

@@ -0,0 +1,27 @@
import zarr
import h5py
from contextlib import contextmanager
@contextmanager
def mudata_opener(file_loc, mode=None):
open_mudata = None
input_is_zarr = False
try:
open_mudata = zarr.open(file_loc, zarr_format=3, mode=mode)
input_is_zarr = True
yield open_mudata, input_is_zarr
except (zarr.errors.GroupNotFoundError, NotADirectoryError):
try:
open_mudata = h5py.File(file_loc, mode=mode)
yield open_mudata, input_is_zarr
except (FileNotFoundError, IsADirectoryError, KeyError) as e:
e.add_note(f"Could not open file {file_loc}.")
raise e
finally:
try:
if open_mudata:
open_mudata.close()
del open_mudata
except (AttributeError, UnboundLocalError):
pass

View File

@@ -2,7 +2,7 @@ manifest {
name = 'dataflow/split_modalities' name = 'dataflow/split_modalities'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n' description = 'Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n'
author = 'Dries Schaumont, Robrecht Cannoodt' author = 'Dries Schaumont, Robrecht Cannoodt'
} }

View File

@@ -14,7 +14,7 @@
"format": "path", "format": "path",
"exists": true, "exists": true,
"description": "Path to a single .h5mu file.", "description": "Path to a single .h5mu file.",
"help_text": "Type: `file`, multiple: `False`, required, default: `\"sample_path\"`, direction: `input`. ", "help_text": "Type: `file`, multiple: `False`, required, default: `\"sample_path\"`, direction: `input`, example: `\"input.h5mu\"`. ",
"default": "sample_path" "default": "sample_path"
}, },
"output": { "output": {
@@ -33,7 +33,7 @@
}, },
"output_compression": { "output_compression": {
"type": "string", "type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ", "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [ "enum": [
"gzip", "gzip",

View File

@@ -1,6 +1,6 @@
name: "pca" name: "pca"
namespace: "dimred" namespace: "dimred"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries De Maeyer" - name: "Dries De Maeyer"
roles: roles:
@@ -17,6 +17,22 @@ authors:
role: "Principal Scientist" role: "Principal Scientist"
argument_groups: argument_groups:
- name: "Arguments" - name: "Arguments"
arguments:
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata H5\
\ files.\nBy default no compression is applied.\n"
info: null
example:
- "gzip"
required: false
choices:
- "gzip"
- "lzf"
direction: "input"
multiple: false
multiple_sep: ";"
- name: "inputs"
arguments: arguments:
- type: "file" - type: "file"
name: "--input" name: "--input"
@@ -62,6 +78,47 @@ argument_groups:
direction: "input" direction: "input"
multiple: false multiple: false
multiple_sep: ";" multiple_sep: ";"
- name: "Options"
arguments:
- type: "integer"
name: "--num_components"
description: "Number of principal components to compute. Defaults to 50, or 1\
\ - minimum dimension size of selected representation."
info: null
example:
- 25
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--chunked"
description: "If True, perform an incremental PCA on segments of a predefined\
\ size. Setting this flag automatically implies zero centering.\nMust be specified\
\ together with --chunk_size.\n"
info: null
direction: "input"
- type: "integer"
name: "--chunk_size"
description: "Number of observations to include in each chunk. Required if chunked=True\
\ was passed.\n"
info: null
required: false
min: 2
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--seed"
description: "Used to set the initial states for the optimization. \n"
info: null
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file" - type: "file"
name: "--output" name: "--output"
alternatives: alternatives:
@@ -106,36 +163,11 @@ argument_groups:
direction: "input" direction: "input"
multiple: false multiple: false
multiple_sep: ";" multiple_sep: ";"
- type: "integer"
name: "--num_components"
description: "Number of principal components to compute. Defaults to 50, or 1\
\ - minimum dimension size of selected representation."
info: null
example:
- 25
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true" - type: "boolean_true"
name: "--overwrite" name: "--overwrite"
description: "Allow overwriting .obsm, .varm and .uns slots." description: "Allow overwriting .obsm, .varm and .uns slots."
info: null info: null
direction: "input" direction: "input"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
info: null
example:
- "gzip"
required: false
choices:
- "gzip"
- "lzf"
direction: "input"
multiple: false
multiple_sep: ";"
resources: resources:
- type: "python_script" - type: "python_script"
path: "script.py" path: "script.py"
@@ -147,8 +179,7 @@ resources:
- type: "file" - type: "file"
path: "nextflow_labels.config" path: "nextflow_labels.config"
dest: "nextflow_labels.config" dest: "nextflow_labels.config"
description: "Computes PCA coordinates, loadings and variance decomposition. Uses\ description: "Computes PCA coordinates, loadings and variance decomposition.\n"
\ the implementation of scikit-learn [Pedregosa11].\n"
test_resources: test_resources:
- type: "python_script" - type: "python_script"
path: "test.py" path: "test.py"
@@ -238,9 +269,9 @@ runners:
engines: engines:
- type: "docker" - type: "docker"
id: "docker" id: "docker"
image: "python:3.12-slim" image: "python:3.13-slim"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "apt" - type: "apt"
@@ -250,9 +281,10 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
- "scanpy~=1.11.4" - "scanpy~=1.11.4"
script: script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\ - "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
@@ -260,10 +292,16 @@ engines:
).partition(\\\".\\\")[0]) > 2\")" ).partition(\\\".\\\")[0]) > 2\")"
upgrade: true upgrade: true
test_setup: test_setup:
- type: "apt"
packages:
- "git"
interactive: false
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
entrypoint: [] entrypoint: []
cmd: null cmd: null
@@ -275,12 +313,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/dimred/pca" output: "target/nextflow/dimred/pca"
executable: "target/nextflow/dimred/pca/main.nf" executable: "target/nextflow/dimred/pca/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -301,7 +339,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -310,7 +348,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// pca v4.0.3 // pca v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "pca", "name" : "pca",
"namespace" : "dimred", "namespace" : "dimred",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries De Maeyer", "name" : "Dries De Maeyer",
@@ -3062,6 +3072,27 @@ meta = [
"argument_groups" : [ "argument_groups" : [
{ {
"name" : "Arguments", "name" : "Arguments",
"arguments" : [
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
"required" : false,
"choices" : [
"gzip",
"lzf"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"name" : "inputs",
"arguments" : [ "arguments" : [
{ {
"type" : "file", "type" : "file",
@@ -3112,7 +3143,55 @@ meta = [
"direction" : "input", "direction" : "input",
"multiple" : false, "multiple" : false,
"multiple_sep" : ";" "multiple_sep" : ";"
}
]
},
{
"name" : "Options",
"arguments" : [
{
"type" : "integer",
"name" : "--num_components",
"description" : "Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.",
"example" : [
25
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}, },
{
"type" : "boolean_true",
"name" : "--chunked",
"description" : "If True, perform an incremental PCA on segments of a predefined size. Setting this flag automatically implies zero centering.\nMust be specified together with --chunk_size.\n",
"direction" : "input"
},
{
"type" : "integer",
"name" : "--chunk_size",
"description" : "Number of observations to include in each chunk. Required if chunked=True was passed.\n",
"required" : false,
"min" : 2,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--seed",
"description" : "Used to set the initial states for the optimization. \n",
"required" : false,
"min" : 0,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"name" : "Outputs",
"arguments" : [
{ {
"type" : "file", "type" : "file",
"name" : "--output", "name" : "--output",
@@ -3166,39 +3245,11 @@ meta = [
"multiple" : false, "multiple" : false,
"multiple_sep" : ";" "multiple_sep" : ";"
}, },
{
"type" : "integer",
"name" : "--num_components",
"description" : "Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.",
"example" : [
25
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{ {
"type" : "boolean_true", "type" : "boolean_true",
"name" : "--overwrite", "name" : "--overwrite",
"description" : "Allow overwriting .obsm, .varm and .uns slots.", "description" : "Allow overwriting .obsm, .varm and .uns slots.",
"direction" : "input" "direction" : "input"
},
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
"required" : false,
"choices" : [
"gzip",
"lzf"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
} }
] ]
} }
@@ -3223,7 +3274,7 @@ meta = [
"dest" : "nextflow_labels.config" "dest" : "nextflow_labels.config"
} }
], ],
"description" : "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n", "description" : "Computes PCA coordinates, loadings and variance decomposition.\n",
"test_resources" : [ "test_resources" : [
{ {
"type" : "python_script", "type" : "python_script",
@@ -3331,9 +3382,9 @@ meta = [
{ {
"type" : "docker", "type" : "docker",
"id" : "docker", "id" : "docker",
"image" : "python:3.12-slim", "image" : "python:3.13-slim",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
@@ -3347,9 +3398,10 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2", "scipy~=1.17.1",
"mudata~=0.3.8",
"scanpy~=1.11.4" "scanpy~=1.11.4"
], ],
"script" : [ "script" : [
@@ -3359,11 +3411,21 @@ meta = [
} }
], ],
"test_setup" : [ "test_setup" : [
{
"type" : "apt",
"packages" : [
"git"
],
"interactive" : false
},
{ {
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
],
"github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
], ],
"upgrade" : true "upgrade" : true
} }
@@ -3379,13 +3441,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dimred/pca", "output" : "/workdir/root/repo/target/nextflow/dimred/pca",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3403,14 +3465,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -3441,22 +3503,26 @@ cat > "$tempscript" << VIASHMAIN
import scanpy as sc import scanpy as sc
import mudata as mu import mudata as mu
import sys import sys
import pandas as pd
from anndata import AnnData from anndata import AnnData
## VIASH START ## VIASH START
# The following code has been auto-generated by Viash. # The following code has been auto-generated by Viash.
par = { par = {
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'chunked': $( if [ ! -z ${VIASH_PAR_CHUNKED+x} ]; then echo "r'${VIASH_PAR_CHUNKED//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ),
'chunk_size': $( if [ ! -z ${VIASH_PAR_CHUNK_SIZE+x} ]; then echo "int(r'${VIASH_PAR_CHUNK_SIZE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'varm_output': $( if [ ! -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_VARM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'varm_output': $( if [ ! -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_VARM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), 'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi )
'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ),
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
} }
meta = { meta = {
'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
@@ -3496,11 +3562,24 @@ data = mu.read_h5ad(par["input"], mod=par["modality"])
logger.info("Computing PCA components for modality '%s'", par["modality"]) logger.info("Computing PCA components for modality '%s'", par["modality"])
if par["layer"] and par["layer"] not in data.layers: if par["layer"] and par["layer"] not in data.layers:
raise ValueError(f"{par['layer']} was not found in modality {par['modality']}.") raise ValueError(f"{par['layer']} was not found in modality {par['modality']}.")
layer = data.X if not par["layer"] else data.layers[par["layer"]]
adata_input_layer = AnnData(layer)
adata_input_layer.var.index = data.var.index
use_highly_variable = False chunked, chunk_size = par["chunked"], par["chunk_size"]
if chunked:
if not chunk_size:
raise ValueError(
"Requested to perform an incremental PCA "
"('chunked'), but the chunk size is not set."
)
if chunk_size < par["num_components"]:
raise ValueError(
f"The requested chunk size ({chunk_size}) must not be smaller "
f"than the number of components ({par['num_components']})"
)
layer = data.X if not par["layer"] else data.layers[par["layer"]]
adata_input_layer = AnnData(layer, var=pd.DataFrame([], index=data.var.index))
mask_var = None
if par["var_input"]: if par["var_input"]:
if par["var_input"] not in data.var.columns: if par["var_input"] not in data.var.columns:
raise ValueError( raise ValueError(
@@ -3508,15 +3587,18 @@ if par["var_input"]:
"as a selection of genes to run the PCA on, " "as a selection of genes to run the PCA on, "
f"but the column is not available for modality {par['modality']}" f"but the column is not available for modality {par['modality']}"
) )
use_highly_variable = True mask_var = data.var[par["var_input"]]
adata_input_layer.var["highly_variable"] = data.var[par["var_input"]]
# run pca # run pca
output_adata = sc.tl.pca( sc.tl.pca(
adata_input_layer, adata_input_layer,
n_comps=par["num_components"], n_comps=par["num_components"],
copy=True, copy=False, # A copy was already created
use_highly_variable=use_highly_variable, return_info=True,
mask_var=mask_var,
chunked=chunked,
chunk_size=chunk_size,
random_state=par["seed"],
) )
# store output in specific objects # store output in specific objects
@@ -3535,11 +3617,11 @@ for parameter_name, field in check_exist_dict.items():
) )
del getattr(data, field)[par[parameter_name]] del getattr(data, field)[par[parameter_name]]
data.obsm[par["obsm_output"]] = output_adata.obsm["X_pca"] data.obsm[par["obsm_output"]] = adata_input_layer.obsm["X_pca"]
data.varm[par["varm_output"]] = output_adata.varm["PCs"] data.varm[par["varm_output"]] = adata_input_layer.varm["PCs"]
data.uns[par["uns_output"]] = { data.uns[par["uns_output"]] = {
"variance": output_adata.uns["pca"]["variance"], "variance": adata_input_layer.uns["pca"]["variance"],
"variance_ratio": output_adata.uns["pca"]["variance_ratio"], "variance_ratio": adata_input_layer.uns["pca"]["variance_ratio"],
} }
@@ -3933,7 +4015,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dimred/pca", "image" : "vsh/openpipeline/dimred/pca",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"highcpu", "highcpu",

View File

@@ -2,8 +2,8 @@ manifest {
name = 'dimred/pca' name = 'dimred/pca'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n' description = 'Computes PCA coordinates, loadings and variance decomposition.\n'
author = 'Dries De Maeyer' author = 'Dries De Maeyer'
} }

View File

@@ -1,37 +1,14 @@
{ {
"$schema": "https://json-schema.org/draft/2020-12/schema", "$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "pca", "title": "pca",
"description": "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n", "description": "Computes PCA coordinates, loadings and variance decomposition.\n",
"type": "object", "type": "object",
"$defs": { "$defs": {
"arguments": { "outputs": {
"title": "Arguments", "title": "Outputs",
"type": "object", "type": "object",
"description": "No description", "description": "No description",
"properties": { "properties": {
"input": {
"type": "string",
"format": "path",
"exists": true,
"description": "Input h5mu file",
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
},
"modality": {
"type": "string",
"description": "Which modality from the input MuData file to process.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
"default": "rna"
},
"layer": {
"type": "string",
"description": "Use specified layer for expression values instead of the .X object from the modality.",
"help_text": "Type: `string`, multiple: `False`. "
},
"var_input": {
"type": "string",
"description": "Column name in .var matrix that will be used to select which genes to run the PCA on.",
"help_text": "Type: `string`, multiple: `False`, example: `\"filter_with_hvg\"`. "
},
"output": { "output": {
"type": "string", "type": "string",
"format": "path", "format": "path",
@@ -57,20 +34,22 @@
"help_text": "Type: `string`, multiple: `False`, default: `\"pca_variance\"`. ", "help_text": "Type: `string`, multiple: `False`, default: `\"pca_variance\"`. ",
"default": "pca_variance" "default": "pca_variance"
}, },
"num_components": {
"type": "integer",
"description": "Number of principal components to compute",
"help_text": "Type: `integer`, multiple: `False`, example: `25`. "
},
"overwrite": { "overwrite": {
"type": "boolean", "type": "boolean",
"description": "Allow overwriting .obsm, .varm and .uns slots.", "description": "Allow overwriting .obsm, .varm and .uns slots.",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ", "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false "default": false
}, }
}
},
"arguments": {
"title": "Arguments",
"type": "object",
"description": "No description",
"properties": {
"output_compression": { "output_compression": {
"type": "string", "type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ", "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [ "enum": [
"gzip", "gzip",
@@ -79,6 +58,64 @@
} }
} }
}, },
"inputs": {
"title": "inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type": "string",
"format": "path",
"exists": true,
"description": "Input h5mu file",
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
},
"modality": {
"type": "string",
"description": "Which modality from the input MuData file to process.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ",
"default": "rna"
},
"layer": {
"type": "string",
"description": "Use specified layer for expression values instead of the .X object from the modality.",
"help_text": "Type: `string`, multiple: `False`. "
},
"var_input": {
"type": "string",
"description": "Column name in .var matrix that will be used to select which genes to run the PCA on.",
"help_text": "Type: `string`, multiple: `False`, example: `\"filter_with_hvg\"`. "
}
}
},
"options": {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"num_components": {
"type": "integer",
"description": "Number of principal components to compute",
"help_text": "Type: `integer`, multiple: `False`, example: `25`. "
},
"chunked": {
"type": "boolean",
"description": "If True, perform an incremental PCA on segments of a predefined size",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"chunk_size": {
"type": "integer",
"description": "Number of observations to include in each chunk",
"help_text": "Type: `integer`, multiple: `False`. "
},
"seed": {
"type": "integer",
"description": "Used to set the initial states for the optimization",
"help_text": "Type: `integer`, multiple: `False`. "
}
}
},
"nextflow input-output arguments": { "nextflow input-output arguments": {
"title": "Nextflow input-output arguments", "title": "Nextflow input-output arguments",
"type": "object", "type": "object",
@@ -93,9 +130,18 @@
} }
}, },
"allOf": [ "allOf": [
{
"$ref": "#/$defs/outputs"
},
{ {
"$ref": "#/$defs/arguments" "$ref": "#/$defs/arguments"
}, },
{
"$ref": "#/$defs/inputs"
},
{
"$ref": "#/$defs/options"
},
{ {
"$ref": "#/$defs/nextflow input-output arguments" "$ref": "#/$defs/nextflow input-output arguments"
} }

View File

@@ -1,6 +1,6 @@
name: "umap" name: "umap"
namespace: "dimred" namespace: "dimred"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries De Maeyer" - name: "Dries De Maeyer"
roles: roles:
@@ -78,8 +78,8 @@ argument_groups:
multiple_sep: ";" multiple_sep: ";"
- type: "string" - type: "string"
name: "--output_compression" name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ description: "Compression format to use for the output AnnData and/or Mudata H5\
By default no compression is applied.\n" \ files.\nBy default no compression is applied.\n"
info: null info: null
example: example:
- "gzip" - "gzip"
@@ -294,7 +294,7 @@ engines:
id: "docker" id: "docker"
image: "python:3.12-slim" image: "python:3.12-slim"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "apt" - type: "apt"
@@ -304,9 +304,10 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
- "scanpy~=1.11.4" - "scanpy~=1.11.4"
script: script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\ - "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
@@ -317,7 +318,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
upgrade: true upgrade: true
entrypoint: [] entrypoint: []
cmd: null cmd: null
@@ -329,12 +330,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/dimred/umap" output: "target/nextflow/dimred/umap"
executable: "target/nextflow/dimred/umap/main.nf" executable: "target/nextflow/dimred/umap/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -355,7 +356,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -364,7 +365,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// umap v4.0.3 // umap v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "umap", "name" : "umap",
"namespace" : "dimred", "namespace" : "dimred",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries De Maeyer", "name" : "Dries De Maeyer",
@@ -3138,7 +3148,7 @@ meta = [
{ {
"type" : "string", "type" : "string",
"name" : "--output_compression", "name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [ "example" : [
"gzip" "gzip"
], ],
@@ -3382,7 +3392,7 @@ meta = [
"id" : "docker", "id" : "docker",
"image" : "python:3.12-slim", "image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
@@ -3396,9 +3406,10 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2", "scipy~=1.17.1",
"mudata~=0.3.8",
"scanpy~=1.11.4" "scanpy~=1.11.4"
], ],
"script" : [ "script" : [
@@ -3412,7 +3423,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
], ],
"upgrade" : true "upgrade" : true
} }
@@ -3428,13 +3439,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dimred/umap", "output" : "/workdir/root/repo/target/nextflow/dimred/umap",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3452,14 +3463,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -3971,7 +3982,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/dimred/umap", "image" : "vsh/openpipeline/dimred/umap",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"highcpu", "highcpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'dimred/umap' name = 'dimred/umap'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n' description = 'UMAP (Uniform Manifold Approximation and Projection) is a manifold learning technique suitable for visualizing high-dimensional data. Besides tending to be faster than tSNE, it optimizes the embedding such that it best reflects the topology of the data, which we represent throughout Scanpy using a neighborhood graph. tSNE, by contrast, optimizes the distribution of nearest-neighbor distances in the embedding such that these best match the distribution of distances in the high-dimensional space. We use the implementation of umap-learn [McInnes18]. For a few comparisons of UMAP with tSNE, see this preprint.\n'
author = 'Dries De Maeyer' author = 'Dries De Maeyer'
} }

View File

@@ -50,7 +50,7 @@
}, },
"output_compression": { "output_compression": {
"type": "string", "type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ", "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [ "enum": [
"gzip", "gzip",

View File

@@ -1,6 +1,6 @@
name: "highly_variable_features_scanpy" name: "highly_variable_features_scanpy"
namespace: "feature_annotation" namespace: "feature_annotation"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries De Maeyer" - name: "Dries De Maeyer"
roles: roles:
@@ -75,6 +75,20 @@ argument_groups:
direction: "input" direction: "input"
multiple: false multiple: false
multiple_sep: ";" multiple_sep: ";"
- type: "string"
name: "--features_to_exclude"
description: "User-defined list of feature names to exclude before HVG calculation.\
\ \nThese features will be excluded from HVG selection but will remain in the\
\ output data.\n"
info: null
example:
- "MT-CO1"
- "MT-CO2"
- "MT-ND1"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file" - type: "file"
name: "--output" name: "--output"
description: "Output h5mu file." description: "Output h5mu file."
@@ -214,8 +228,8 @@ argument_groups:
multiple_sep: ";" multiple_sep: ";"
- type: "string" - type: "string"
name: "--output_compression" name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ description: "Compression format to use for the output AnnData and/or Mudata H5\
By default no compression is applied.\n" \ files.\nBy default no compression is applied.\n"
info: null info: null
example: example:
- "gzip" - "gzip"
@@ -342,15 +356,16 @@ engines:
id: "docker" id: "docker"
image: "python:3.12" image: "python:3.12"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
- "scanpy~=1.11.4" - "scanpy~=1.11.4"
- "scikit-misc" - "scikit-misc"
script: script:
@@ -366,7 +381,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
@@ -380,12 +395,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/feature_annotation/highly_variable_features_scanpy" output: "target/nextflow/feature_annotation/highly_variable_features_scanpy"
executable: "target/nextflow/feature_annotation/highly_variable_features_scanpy/main.nf" executable: "target/nextflow/feature_annotation/highly_variable_features_scanpy/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -406,7 +421,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -415,7 +430,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// highly_variable_features_scanpy v4.0.3 // highly_variable_features_scanpy v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1354,47 +1354,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1710,10 +1705,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3036,7 +3046,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "highly_variable_features_scanpy", "name" : "highly_variable_features_scanpy",
"namespace" : "feature_annotation", "namespace" : "feature_annotation",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries De Maeyer", "name" : "Dries De Maeyer",
@@ -3136,6 +3146,20 @@ meta = [
"multiple" : false, "multiple" : false,
"multiple_sep" : ";" "multiple_sep" : ";"
}, },
{
"type" : "string",
"name" : "--features_to_exclude",
"description" : "User-defined list of feature names to exclude before HVG calculation. \nThese features will be excluded from HVG selection but will remain in the output data.\n",
"example" : [
"MT-CO1",
"MT-CO2",
"MT-ND1"
],
"required" : false,
"direction" : "input",
"multiple" : true,
"multiple_sep" : ";"
},
{ {
"type" : "file", "type" : "file",
"name" : "--output", "name" : "--output",
@@ -3281,7 +3305,7 @@ meta = [
{ {
"type" : "string", "type" : "string",
"name" : "--output_compression", "name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [ "example" : [
"gzip" "gzip"
], ],
@@ -3430,16 +3454,17 @@ meta = [
"id" : "docker", "id" : "docker",
"image" : "python:3.12", "image" : "python:3.12",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2", "scipy~=1.17.1",
"mudata~=0.3.8",
"scanpy~=1.11.4", "scanpy~=1.11.4",
"scikit-misc" "scikit-misc"
], ],
@@ -3461,7 +3486,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
], ],
"github" : [ "github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3480,13 +3505,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/feature_annotation/highly_variable_features_scanpy", "output" : "/workdir/root/repo/target/nextflow/feature_annotation/highly_variable_features_scanpy",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3504,14 +3529,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -3553,6 +3578,7 @@ par = {
'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'features_to_exclude': $( if [ ! -z ${VIASH_PAR_FEATURES_TO_EXCLUDE+x} ]; then echo "r'${VIASH_PAR_FEATURES_TO_EXCLUDE//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ),
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'var_name_filter': $( if [ ! -z ${VIASH_PAR_VAR_NAME_FILTER+x} ]; then echo "r'${VIASH_PAR_VAR_NAME_FILTER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'varm_name': $( if [ ! -z ${VIASH_PAR_VARM_NAME+x} ]; then echo "r'${VIASH_PAR_VARM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'varm_name': $( if [ ! -z ${VIASH_PAR_VARM_NAME+x} ]; then echo "r'${VIASH_PAR_VARM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
@@ -3651,6 +3677,38 @@ if par["var_input"]:
input_anndata.var[par["var_input"]] = data.var[par["var_input"]] input_anndata.var[par["var_input"]] = data.var[par["var_input"]]
input_anndata = subset_vars(input_anndata, par["var_input"]) input_anndata = subset_vars(input_anndata, par["var_input"])
# Exclude user-specified features from HVG calculation
excluded_features_mask = None
if par.get("features_to_exclude"):
features_to_exclude = set(par["features_to_exclude"])
logger.info(
"\\\\tExcluding %d specified features from HVG calculation",
len(features_to_exclude),
)
excluded_features_mask = input_anndata.var_names.isin(features_to_exclude)
n_excluded = excluded_features_mask.sum()
n_not_found = len(features_to_exclude) - n_excluded
if n_not_found > 0:
not_found = features_to_exclude - set(
input_anndata.var_names[excluded_features_mask]
)
logger.warning(
"\\\\t%d features to exclude were not found in the data: %s",
n_not_found,
list(not_found)[:10],
)
logger.info("\\\\tExcluding %d features from HVG calculation", n_excluded)
if n_excluded == input_anndata.n_vars:
raise ValueError(
f"All features ({n_excluded}) are in the exclusion list. "
"Please check your --features_to_exclude list."
)
# Store original var_names for later reindexing
original_var_names = input_anndata.var_names.copy()
# Subset to non-excluded features for HVG calculation using subset_vars
input_anndata = subset_vars(input_anndata, ~excluded_features_mask)
logger.info("\\\\t%d features remaining for HVG calculation", input_anndata.n_vars)
logger.info("\\\\tUnfiltered data: %s", data) logger.info("\\\\tUnfiltered data: %s", data)
logger.info("\\\\tComputing hvg") logger.info("\\\\tComputing hvg")
@@ -3695,6 +3753,17 @@ try:
assert (out.index == data.var.index).all(), ( assert (out.index == data.var.index).all(), (
"Expected output index values to be equivalent to the input index" "Expected output index values to be equivalent to the input index"
) )
elif par.get("features_to_exclude") is not None:
# Reindex to include excluded features, marking them as non-HVG
out.index = input_anndata.var.index
out = out.reindex(index=original_var_names, method=None)
out.highly_variable = out.highly_variable.fillna(False)
# Further reindex to match data.var.index (for consistency with var_input path)
out = out.reindex(index=data.var.index, method=None)
out.highly_variable = out.highly_variable.fillna(False)
assert (out.index == data.var.index).all(), (
"Expected output index values to be equivalent to the input index"
)
elif par["obs_batch_key"] is not None: elif par["obs_batch_key"] is not None:
out = out.reindex(index=data.var.index, method=None) out = out.reindex(index=data.var.index, method=None)
assert (out.index == data.var.index).all(), ( assert (out.index == data.var.index).all(), (
@@ -4110,7 +4179,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/feature_annotation/highly_variable_features_scanpy", "image" : "vsh/openpipeline/feature_annotation/highly_variable_features_scanpy",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"singlecpu", "singlecpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'feature_annotation/highly_variable_features_scanpy' name = 'feature_annotation/highly_variable_features_scanpy'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'Annotate highly variable features [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor=\'seurat_v3\' in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for features falling into a given bin for mean expression of features. This means that for each bin of mean expression, highly variable features are selected.\n\nFor [Stuart19], a normalized variance for each feature is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each feature after the transformation. Features are ranked by the normalized variance.\n' description = 'Annotate highly variable features [Satija15] [Zheng17] [Stuart19].\n\nExpects logarithmized data, except when flavor=\'seurat_v3\' in which count data is expected.\n\nDepending on flavor, this reproduces the R-implementations of Seurat [Satija15], Cell Ranger [Zheng17], and Seurat v3 [Stuart19].\n\nFor the dispersion-based methods ([Satija15] and [Zheng17]), the normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for features falling into a given bin for mean expression of features. This means that for each bin of mean expression, highly variable features are selected.\n\nFor [Stuart19], a normalized variance for each feature is computed. First, the data are standardized (i.e., z-score normalization per feature) with a regularized standard deviation. Next, the normalized variance is computed as the variance of each feature after the transformation. Features are ranked by the normalized variance.\n'
author = 'Dries De Maeyer, Robrecht Cannoodt' author = 'Dries De Maeyer, Robrecht Cannoodt'
} }

View File

@@ -32,6 +32,14 @@
"description": "If specified, use boolean array in adata.var[var_input] to calculate hvg on subset of vars.\n", "description": "If specified, use boolean array in adata.var[var_input] to calculate hvg on subset of vars.\n",
"help_text": "Type: `string`, multiple: `False`. " "help_text": "Type: `string`, multiple: `False`. "
}, },
"features_to_exclude": {
"type": "array",
"items": {
"type": "string"
},
"description": "User-defined list of feature names to exclude before HVG calculation",
"help_text": "Type: `string`, multiple: `True`, example: `[\"MT-CO1\";\"MT-CO2\";\"MT-ND1\"]`. "
},
"output": { "output": {
"type": "string", "type": "string",
"format": "path", "format": "path",
@@ -109,7 +117,7 @@
}, },
"output_compression": { "output_compression": {
"type": "string", "type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ", "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [ "enum": [
"gzip", "gzip",

View File

@@ -0,0 +1,49 @@
def subset_vars(adata, subset_col):
"""
Subset AnnData object on highly variable genes or a boolean mask.
Parameters
----------
adata : AnnData
Annotated data object
subset_col : str, pd.Series, pd.Index, or np.ndarray
Name of the boolean column in `adata.var` that contains the information if features should be used or not,
or a boolean mask (same length as adata.var)
Returns
-------
AnnData
Copy of `adata` with subsetted features
"""
import pandas as pd
import numpy as np
# Convert all input types to a pandas Series
if isinstance(subset_col, str):
if subset_col not in adata.var.columns:
raise ValueError(
f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
)
mask = adata.var[subset_col]
elif isinstance(subset_col, pd.Series):
mask = subset_col
elif isinstance(subset_col, (pd.Index, np.ndarray, list)):
mask = pd.Series(subset_col, index=adata.var.index)
else:
raise TypeError(
"subset_col must be a string (column name) or a boolean mask (Series, Index, ndarray, or list)."
)
# Validate mask
if not pd.api.types.is_bool_dtype(mask):
raise ValueError(
f"Expected mask to be boolean, but found {mask.dtype}. Can not subset data."
)
if mask.isna().sum() > 0:
raise ValueError("Mask contains NaN values. Can not subset data.")
if len(mask) != adata.n_vars:
raise ValueError(
f"Mask length {len(mask)} does not match number of variables {adata.n_vars}."
)
return adata[:, mask].copy()

View File

@@ -1,6 +1,6 @@
name: "delimit_fraction" name: "delimit_fraction"
namespace: "filter" namespace: "filter"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Dries Schaumont" - name: "Dries Schaumont"
roles: roles:
@@ -78,8 +78,8 @@ argument_groups:
multiple_sep: ";" multiple_sep: ";"
- type: "string" - type: "string"
name: "--output_compression" name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ description: "Compression format to use for the output AnnData and/or Mudata H5\
By default no compression is applied.\n" \ files.\nBy default no compression is applied.\n"
info: null info: null
example: example:
- "gzip" - "gzip"
@@ -219,7 +219,7 @@ engines:
id: "docker" id: "docker"
image: "python:3.12-slim" image: "python:3.12-slim"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "apt" - type: "apt"
@@ -229,9 +229,10 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
script: script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\ - "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\ \ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -245,7 +246,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
github: github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
upgrade: true upgrade: true
@@ -259,12 +260,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/filter/delimit_fraction" output: "target/nextflow/filter/delimit_fraction"
executable: "target/nextflow/filter/delimit_fraction/main.nf" executable: "target/nextflow/filter/delimit_fraction/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -285,7 +286,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -294,7 +295,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// delimit_fraction v4.0.3 // delimit_fraction v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "delimit_fraction", "name" : "delimit_fraction",
"namespace" : "filter", "namespace" : "filter",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Dries Schaumont", "name" : "Dries Schaumont",
@@ -3134,7 +3144,7 @@ meta = [
{ {
"type" : "string", "type" : "string",
"name" : "--output_compression", "name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [ "example" : [
"gzip" "gzip"
], ],
@@ -3312,7 +3322,7 @@ meta = [
"id" : "docker", "id" : "docker",
"image" : "python:3.12-slim", "image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
@@ -3326,9 +3336,10 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2" "scipy~=1.17.1",
"mudata~=0.3.8"
], ],
"script" : [ "script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")" "exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3348,7 +3359,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
], ],
"github" : [ "github" : [
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
@@ -3367,13 +3378,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/filter/delimit_fraction", "output" : "/workdir/root/repo/target/nextflow/filter/delimit_fraction",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3391,14 +3402,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -3928,7 +3939,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/filter/delimit_fraction", "image" : "vsh/openpipeline/filter/delimit_fraction",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"singlecpu", "singlecpu",

View File

@@ -2,7 +2,7 @@ manifest {
name = 'filter/delimit_fraction' name = 'filter/delimit_fraction'
mainScript = 'main.nf' mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge' nextflowVersion = '!>=20.12.1-edge'
version = 'v4.0.3' version = 'v4.1.0'
description = 'Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n' description = 'Turns a column containing values between 0 and 1 into a boolean column based on thresholds.\n'
author = 'Dries Schaumont' author = 'Dries Schaumont'
} }

View File

@@ -48,7 +48,7 @@
}, },
"output_compression": { "output_compression": {
"type": "string", "type": "string",
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ", "help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
"enum": [ "enum": [
"gzip", "gzip",

View File

@@ -1,6 +1,6 @@
name: "do_filter" name: "do_filter"
namespace: "filter" namespace: "filter"
version: "v4.0.3" version: "v4.1.0"
authors: authors:
- name: "Robrecht Cannoodt" - name: "Robrecht Cannoodt"
roles: roles:
@@ -79,8 +79,8 @@ argument_groups:
multiple_sep: ";" multiple_sep: ";"
- type: "string" - type: "string"
name: "--output_compression" name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ description: "Compression format to use for the output AnnData and/or Mudata H5\
By default no compression is applied.\n" \ files.\nBy default no compression is applied.\n"
info: null info: null
example: example:
- "gzip" - "gzip"
@@ -193,7 +193,7 @@ engines:
id: "docker" id: "docker"
image: "python:3.12-slim" image: "python:3.12-slim"
target_registry: "images.viash-hub.com" target_registry: "images.viash-hub.com"
target_tag: "v4.0.3" target_tag: "v4.1.0"
namespace_separator: "/" namespace_separator: "/"
setup: setup:
- type: "apt" - type: "apt"
@@ -203,9 +203,10 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "anndata~=0.12.7" - "anndata~=0.12.16"
- "awkward" - "awkward"
- "mudata~=0.3.2" - "scipy~=1.17.1"
- "mudata~=0.3.8"
script: script:
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\ - "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\ \ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
@@ -215,7 +216,7 @@ engines:
- type: "python" - type: "python"
user: false user: false
packages: packages:
- "viashpy==0.8.0" - "viashpy==0.10.0"
upgrade: true upgrade: true
entrypoint: [] entrypoint: []
cmd: null cmd: null
@@ -227,12 +228,12 @@ build_info:
engine: "docker|native" engine: "docker|native"
output: "target/nextflow/filter/do_filter" output: "target/nextflow/filter/do_filter"
executable: "target/nextflow/filter/do_filter/main.nf" executable: "target/nextflow/filter/do_filter/main.nf"
viash_version: "0.9.4" viash_version: "0.9.7"
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206" git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
git_remote: "https://github.com/openpipelines-bio/openpipeline" git_remote: "https://github.com/openpipelines-bio/openpipeline"
package_config: package_config:
name: "openpipeline" name: "openpipeline"
version: "v4.0.3" version: "v4.1.0"
summary: "Best-practice workflows for single-cell multi-omics analyses.\n" summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\ description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\ \ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
@@ -253,7 +254,7 @@ package_config:
- path: "src/workflows/utils/labels_ci.config" - path: "src/workflows/utils/labels_ci.config"
description: "Adds the correct memory and CPU labels when running on the Viash\ description: "Adds the correct memory and CPU labels when running on the Viash\
\ Hub CI." \ Hub CI."
viash_version: "0.9.4" viash_version: "0.9.7"
source: "src" source: "src"
target: "target" target: "target"
config_mods: config_mods:
@@ -262,7 +263,7 @@ package_config:
)'" )'"
- ".engines += { type: \"native\" }" - ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'" - ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
keywords: keywords:
- "single-cell" - "single-cell"
- "multimodal" - "multimodal"

View File

@@ -1,6 +1,6 @@
// do_filter v4.0.3 // do_filter v4.1.0
// //
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
// Intuitive. // Intuitive.
// //
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
def br = java.nio.file.Files.newBufferedReader(inputFile) java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
def row = 0
def header = null
def line
def row = -1 while (header == null && (line = br.readLine()) != null) {
def header = null if (!line.startsWith("#")) {
while (br.ready() && header == null) { header = splitRegex.split(line, -1).collect { field ->
def line = br.readLine() def m = removeQuote.matcher(field)
row++ m.find() ? m.replaceFirst('$1') : field
if (!line.startsWith("#")) {
header = splitRegex.split(line, -1).collect{field ->
m = removeQuote.matcher(field)
m.find() ? m.replaceFirst('$1') : field
}
}
}
assert header != null: "CSV file should contain a header"
while (br.ready()) {
def line = br.readLine()
row++
if (line == null) {
br.close()
break
}
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect{field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
} }
} }
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" row++
}
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} assert header != null : "CSV file should contain a header"
output.add(dataMap)
while ((line = br.readLine()) != null) {
row++
if (!line.startsWith("#")) {
def predata = splitRegex.split(line, -1)
def data = predata.collect { field ->
if (field == "") {
return null
}
def m = removeQuote.matcher(field)
if (m.find()) {
return m.replaceFirst('$1')
} else {
return field
}
}
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
output.add(dataMap)
}
} }
} }
@@ -1709,10 +1704,25 @@ process publishFilesProc {
] ]
.transpose() .transpose()
.collectMany{infile, outfile -> .collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) { def infileString = infile.toString()
def outfileString = outfile.toString()
if (infileString != outfileString) {
/* Trailing slashes are removed from both the source and destination arguments.
From source arguments, this is useful when a source argument may have a trailing slash
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
the symbolic link.
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
For the destination path addding a trailing slash is a problem when publishing directories:
it requires the destination directory to exist. This fails because we only create the parent
directories first.
*/
def regexTrailingSlashes = ~/\/+$/
def infileNoTrailingSlash = infileString - regexTrailingSlashes
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
[ [
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", "[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'" "cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
] ]
} else { } else {
// no need to copy if infile is the same as outfile // no need to copy if infile is the same as outfile
@@ -3035,7 +3045,7 @@ meta = [
"config": processConfig(readJsonBlob('''{ "config": processConfig(readJsonBlob('''{
"name" : "do_filter", "name" : "do_filter",
"namespace" : "filter", "namespace" : "filter",
"version" : "v4.0.3", "version" : "v4.1.0",
"authors" : [ "authors" : [
{ {
"name" : "Robrecht Cannoodt", "name" : "Robrecht Cannoodt",
@@ -3137,7 +3147,7 @@ meta = [
{ {
"type" : "string", "type" : "string",
"name" : "--output_compression", "name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", "description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
"example" : [ "example" : [
"gzip" "gzip"
], ],
@@ -3282,7 +3292,7 @@ meta = [
"id" : "docker", "id" : "docker",
"image" : "python:3.12-slim", "image" : "python:3.12-slim",
"target_registry" : "images.viash-hub.com", "target_registry" : "images.viash-hub.com",
"target_tag" : "v4.0.3", "target_tag" : "v4.1.0",
"namespace_separator" : "/", "namespace_separator" : "/",
"setup" : [ "setup" : [
{ {
@@ -3296,9 +3306,10 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"anndata~=0.12.7", "anndata~=0.12.16",
"awkward", "awkward",
"mudata~=0.3.2" "scipy~=1.17.1",
"mudata~=0.3.8"
], ],
"script" : [ "script" : [
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")" "exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
@@ -3311,7 +3322,7 @@ meta = [
"type" : "python", "type" : "python",
"user" : false, "user" : false,
"packages" : [ "packages" : [
"viashpy==0.8.0" "viashpy==0.10.0"
], ],
"upgrade" : true "upgrade" : true
} }
@@ -3327,13 +3338,13 @@ meta = [
"runner" : "nextflow", "runner" : "nextflow",
"engine" : "docker|native", "engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/filter/do_filter", "output" : "/workdir/root/repo/target/nextflow/filter/do_filter",
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206", "git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline" "git_remote" : "https://github.com/openpipelines-bio/openpipeline"
}, },
"package_config" : { "package_config" : {
"name" : "openpipeline", "name" : "openpipeline",
"version" : "v4.0.3", "version" : "v4.1.0",
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n", "summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n", "description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
"info" : { "info" : {
@@ -3351,14 +3362,14 @@ meta = [
} }
] ]
}, },
"viash_version" : "0.9.4", "viash_version" : "0.9.7",
"source" : "/workdir/root/repo/src", "source" : "/workdir/root/repo/src",
"target" : "/workdir/root/repo/target", "target" : "/workdir/root/repo/target",
"config_mods" : [ "config_mods" : [
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
".engines += { type: \\"native\\" }", ".engines += { type: \\"native\\" }",
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
".engines[.type == 'docker'].target_tag := 'v4.0.3'" ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
], ],
"keywords" : [ "keywords" : [
"single-cell", "single-cell",
@@ -3851,7 +3862,7 @@ meta["defaults"] = [
"container" : { "container" : {
"registry" : "images.viash-hub.com", "registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline/filter/do_filter", "image" : "vsh/openpipeline/filter/do_filter",
"tag" : "v4.0.3" "tag" : "v4.1.0"
}, },
"label" : [ "label" : [
"singlecpu", "singlecpu",

Some files were not shown because too many files have changed in this diff Show More