Build branch openpipeline_spatial/niche-compass with version niche-compass to openpipeline_spatial on branch niche-compass (3edcea0)
Build pipeline: openpipelines-bio.openpipeline-spatial.niche-compass-sxfbq
Source commit: 3edcea085b
Source message: merge edits
This commit is contained in:
48
CHANGELOG.md
48
CHANGELOG.md
@@ -1,3 +1,45 @@
|
||||
# openpipeline_spatial (unreleased)
|
||||
|
||||
* `nichecompass/nichecompass`: Component to train a NicheCompass model and project latent space embeddings (PR #28).
|
||||
|
||||
* `workflows/niche/nichecompass_leiden`: Workflow to perform niche analysis using NicheCompass, including spatialneighborhood calculation, NicheCompass analysis and Leiden clustering (PR #28)
|
||||
|
||||
# openpipeline_spatial 0.5.0
|
||||
|
||||
## NEW FUNCTIONALITY
|
||||
|
||||
* `dataflow/concatenate_spatialdata`: Concatenate two or more SpatialData objects (PR #49).
|
||||
|
||||
* `dataflow/move_layer_spatialdata`: Move/rename a layer in a SpatialData object (PR #50).
|
||||
|
||||
* `convert/from_h5mu_to_seurat_with_fov`: Added converter component for H5MU data to Seurat objects with spatial FOV (PR #51).
|
||||
|
||||
## MAJOR CHANGES
|
||||
|
||||
* Pin OpenPipeline dependency to v4.1.0 (PR #56).
|
||||
|
||||
## MINOR CHANGES
|
||||
|
||||
* Pin ome-zarr to 0.13.0 to avoid a chunk shape incompatibility with zarr 3.x (PR #48).
|
||||
|
||||
* Bump Viash to 0.9.7 (PR #57).
|
||||
|
||||
* Bump anndata to 0.12.16 (PR #57).
|
||||
|
||||
* Bump mudata to 0.3.8 (PR #57).
|
||||
|
||||
* Bump scanpy to 1.11.4 (PR #57)
|
||||
|
||||
* Testing: bump viashpy to 0.10.0 (PR #57).
|
||||
|
||||
## BUG FIXES
|
||||
|
||||
* `convert/from_h5mu_to_spatialdata`: Make sure the AnnData table is properly parsed before inserting into the new SpatialData object (PR #53).
|
||||
|
||||
* `convert/from_h5mu_to_spatialexperiment`: Add `libuv1-dev` to the test image so reticulate imports successfully (#54).
|
||||
|
||||
* `nichecompass/gene_program_mask`: Bump container base image to `nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04` and pin `decoupler>=2.1.6` to pick up the updated HCOP orthology download URL (PR #55).
|
||||
|
||||
# openpipeline_spatial 0.4.0
|
||||
|
||||
## NEW FUNCTIONALITY
|
||||
@@ -14,6 +56,8 @@
|
||||
|
||||
* `convert/from_h5mu_to_spatialdata`: Added a converter component to convert from H5MU to SpatialData (PR #40)
|
||||
|
||||
* `nichecompass/gene_program_mask`: Added a component to create a prior knowledge gene program mask for NicheCompass analysis (PR #27).
|
||||
|
||||
## MINOR CHANGES
|
||||
|
||||
* Bump squidpy to 1.8.1 and spatialdata to 0.7.2 (PR #41).
|
||||
@@ -30,10 +74,6 @@
|
||||
|
||||
* `workflows/ingestion/spaceranger_mapping`: Added a workflow to ingest Visium data using Spaceranger and convert the count matrix to an H5MU file (PR #33).
|
||||
|
||||
* `nichecompass/nichecompass`: Component to train a NicheCompass model and project latent space embeddings (PR #28).
|
||||
|
||||
* `workflows/niche/nichecompass_leiden`: Workflow to perform niche analysis using NicheCompass, including spatialneighborhood calculation, NicheCompass analysis and Leiden clustering (PR #28)
|
||||
|
||||
## MINOR CHANGES
|
||||
|
||||
* Add `scope` to component and workflow configurations (PR #22).
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
viash_version: 0.9.4
|
||||
viash_version: 0.9.7
|
||||
source: src
|
||||
target: target
|
||||
name: openpipeline_spatial
|
||||
@@ -10,7 +10,7 @@ repositories:
|
||||
- name: openpipeline
|
||||
repo: openpipeline
|
||||
type: vsh
|
||||
tag: v4.0.3
|
||||
tag: v4.1.0
|
||||
info:
|
||||
test_resources:
|
||||
- type: s3
|
||||
|
||||
17
resources_test_scripts/niche_analysis.sh
Normal file → Executable file
17
resources_test_scripts/niche_analysis.sh
Normal file → Executable file
@@ -19,28 +19,37 @@ function clean_up {
|
||||
}
|
||||
trap clean_up EXIT
|
||||
|
||||
# Define file names
|
||||
orthologue_file="human_mouse_gene_orthologs.csv"
|
||||
enzymes_file="mouse_metabolite_enzymes.tsv"
|
||||
sensors_file="mouse_metabolite_sensors.tsv"
|
||||
|
||||
if [ ! -d "$DIR" ]; then
|
||||
mkdir -p "$DIR"
|
||||
|
||||
orthologue_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_annotations/human_mouse_gene_orthologs.csv"
|
||||
orthologue_file="human_mouse_gene_orthologs.csv"
|
||||
wget "$orthologue_url" -O "$DIR/$orthologue_file"
|
||||
|
||||
enzymes_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_programs/metabolite_enzyme_sensor_gps/mouse_metabolite_enzymes.tsv"
|
||||
enzymes_file="mouse_metabolite_enzymes.tsv"
|
||||
wget "$enzymes_url" -O "$DIR/$enzymes_file"
|
||||
|
||||
sensors_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_programs/metabolite_enzyme_sensor_gps/mouse_metabolite_sensors.tsv"
|
||||
sensors_file="mouse_metabolite_sensors.tsv"
|
||||
wget "$sensors_url" -O "$DIR/$sensors_file"
|
||||
fi
|
||||
|
||||
# Generate omnipath and collectri network files for testing
|
||||
# These files are used to avoid API rate limits when running tests in parallel
|
||||
omnipath_file="omnipath_lr_network.csv"
|
||||
collectri_file="collectri_tf_network.csv"
|
||||
|
||||
gp_mask="prior_knowledge_gp_mask.json"
|
||||
viash run src/nichecompass/gene_program_mask/config.vsh.yaml -- \
|
||||
--input_gene_orthologs_mapping_file "$DIR/$orthologue_file" \
|
||||
--input_metabolite_enzymes "$DIR/$enzymes_file" \
|
||||
--input_metabolite_sensors "$DIR/$sensors_file" \
|
||||
--output "${DIR}/${gp_mask}"
|
||||
--output "${DIR}/${gp_mask}" \
|
||||
--output_omnipath_lr_network "${DIR}/${omnipath_file}" \
|
||||
--output_collectri_tf_network "${DIR}/${collectri_file}"
|
||||
|
||||
# Sync to S3
|
||||
aws s3 sync \
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
packages:
|
||||
- anndata~=0.12.7
|
||||
- awkward
|
||||
- anndata~=0.12.16
|
||||
- awkward
|
||||
- scipy~=1.17.1 # Exclude scipy 1.17.0 because https://github.com/scverse/anndata/issues/339
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
__merge__: [/src/base/requirements/anndata.yaml, .]
|
||||
packages:
|
||||
- mudata~=0.3.2
|
||||
- mudata~=0.3.8
|
||||
script: |
|
||||
exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
packages:
|
||||
- scanpy~=1.10.4
|
||||
- scanpy~=1.11.4
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
packages:
|
||||
- spatialdata~=0.7.2
|
||||
- ome-zarr~=0.12.2
|
||||
- pyarrow~=18.0.0
|
||||
script: |
|
||||
exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")
|
||||
- ome-zarr~=0.13.0
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
packages:
|
||||
- viashpy==0.9.0
|
||||
- viashpy==0.10.0
|
||||
|
||||
86
src/convert/from_h5mu_to_seurat_with_fov/config.vsh.yaml
Normal file
86
src/convert/from_h5mu_to_seurat_with_fov/config.vsh.yaml
Normal file
@@ -0,0 +1,86 @@
|
||||
name: "from_h5mu_to_seurat_with_fov"
|
||||
namespace: "convert"
|
||||
scope: "public"
|
||||
description: |
|
||||
Converts a single modality of an h5mu file into a Seurat object with a Field of View (FOV)
|
||||
built from centroid coordinates of spatially-resolved single cells.
|
||||
authors:
|
||||
- __merge__: /src/authors/dorien_roosen.yaml
|
||||
roles: [ author, maintainer ]
|
||||
argument_groups:
|
||||
- name: Inputs
|
||||
arguments:
|
||||
- name: "--input"
|
||||
alternatives: ["-i"]
|
||||
type: file
|
||||
description: Input h5mu file
|
||||
direction: input
|
||||
required: true
|
||||
example: input.h5mu
|
||||
- name: "--modality"
|
||||
type: string
|
||||
default: "rna"
|
||||
description: Modality of the h5mu file to convert.
|
||||
- name: "--obsm_centroid_coordinates"
|
||||
type: string
|
||||
default: spatial
|
||||
description: |
|
||||
Key name of the .obsm slot in the input file that contains the spatial (centroid) coordinates.
|
||||
- name: Centroid arguments
|
||||
arguments:
|
||||
- name: "--centroid_nsides"
|
||||
type: integer
|
||||
required: false
|
||||
description: |
|
||||
Number of sides of the polygon to be created around each centroid coordinate to represent the cell shape.
|
||||
If not provided, circles will be created.
|
||||
- name: "--centroid_radius"
|
||||
type: double
|
||||
required: false
|
||||
description: |
|
||||
Radius of the shape around each centroid coordinate to represent the cell shape.
|
||||
If not provided, Seurat computes a default based on the centroid coordinates.
|
||||
- name: "--centroid_theta"
|
||||
type: double
|
||||
required: false
|
||||
description: |
|
||||
Angle to adjust the shapes around each centroid when plotting.
|
||||
If not provided, no adjustment will be made (theta = 0)
|
||||
- name: Outputs
|
||||
arguments:
|
||||
- name: "--output"
|
||||
alternatives: ["-o"]
|
||||
type: file
|
||||
description: Output Seurat file
|
||||
direction: output
|
||||
required: true
|
||||
example: output.rds
|
||||
- name: "--assay"
|
||||
type: string
|
||||
default: "RNA"
|
||||
description: Name of the assay to be created.
|
||||
resources:
|
||||
- type: r_script
|
||||
path: script.R
|
||||
test_resources:
|
||||
- type: r_script
|
||||
path: test.R
|
||||
- path: /resources_test/cosmx/Lung5_Rep2_tiny.h5mu
|
||||
- path: /resources_test/xenium/xenium_tiny.h5mu
|
||||
engines:
|
||||
- type: docker
|
||||
image: rocker/r2u:24.04
|
||||
setup:
|
||||
- type: apt
|
||||
packages: [ libhdf5-dev, libgeos-dev, hdf5-tools ]
|
||||
- type: r
|
||||
cran: [ anndata, hdf5r, Seurat, SeuratObject ]
|
||||
github: scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a
|
||||
test_setup:
|
||||
- type: r
|
||||
cran: [ testthat ]
|
||||
runners:
|
||||
- type: executable
|
||||
- type: nextflow
|
||||
directives:
|
||||
label: [lowmem, singlecpu]
|
||||
106
src/convert/from_h5mu_to_seurat_with_fov/script.R
Normal file
106
src/convert/from_h5mu_to_seurat_with_fov/script.R
Normal file
@@ -0,0 +1,106 @@
|
||||
library(anndataR)
|
||||
library(hdf5r)
|
||||
library(Seurat)
|
||||
|
||||
### VIASH START
|
||||
par <- list(
|
||||
input = "resources_test/xenium/xenium_tiny.h5mu",
|
||||
output = "test.rds",
|
||||
obsm_centroid_coordinates = "spatial",
|
||||
assay = "RNA",
|
||||
centroid_nsides = 8,
|
||||
centroid_radius = 3,
|
||||
centroid_theta = 0.1,
|
||||
modality = "rna"
|
||||
)
|
||||
### VIASH END
|
||||
|
||||
|
||||
h5mu_to_h5ad <- function(h5mu_path, modality_name) {
|
||||
tmp_path <- tempfile(fileext = ".h5ad")
|
||||
mod_location <- paste("mod", modality_name, sep = "/")
|
||||
h5src <- hdf5r::H5File$new(h5mu_path, "r")
|
||||
h5dest <- hdf5r::H5File$new(tmp_path, "w")
|
||||
# Copy over the child objects and the child attributes from root
|
||||
children <- hdf5r::list.objects(h5src,
|
||||
path = mod_location,
|
||||
full.names = FALSE, recursive = FALSE
|
||||
)
|
||||
for (child in children) {
|
||||
h5dest$obj_copy_from(
|
||||
h5src, paste(mod_location, child, sep = "/"),
|
||||
paste0("/", child)
|
||||
)
|
||||
}
|
||||
# Also copy the root attributes
|
||||
root_attrs <- hdf5r::h5attr_names(x = h5src)
|
||||
for (attr in root_attrs) {
|
||||
h5a <- h5src$attr_open(attr_name = attr)
|
||||
robj <- h5a$read()
|
||||
h5dest$create_attr_by_name(
|
||||
attr_name = attr,
|
||||
obj_name = ".",
|
||||
robj = robj,
|
||||
space = h5a$get_space(),
|
||||
dtype = h5a$get_type()
|
||||
)
|
||||
}
|
||||
h5src$close()
|
||||
h5dest$close()
|
||||
|
||||
tmp_path
|
||||
}
|
||||
|
||||
# Read in H5AD
|
||||
h5ad_path <- h5mu_to_h5ad(par$input, par$modality)
|
||||
|
||||
# Convert to Seurat
|
||||
seurat_obj <- read_h5ad(
|
||||
h5ad_path,
|
||||
mode = "r",
|
||||
as = "Seurat",
|
||||
assay_name = par$assay
|
||||
)
|
||||
|
||||
# Look up centroid coordinates in .obsm
|
||||
available_reductions <- names(seurat_obj@reductions)
|
||||
if (!par$obsm_centroid_coordinates %in% available_reductions) {
|
||||
stop(
|
||||
"Centroid coordinates '", par$obsm_centroid_coordinates,
|
||||
"' not found in .obsm. Available keys: ",
|
||||
paste(available_reductions, collapse = ", ")
|
||||
)
|
||||
}
|
||||
|
||||
reductions <- seurat_obj@reductions[[par$obsm_centroid_coordinates]]
|
||||
spatial_coords <- as.data.frame(reductions@cell.embeddings)
|
||||
if (ncol(spatial_coords) != 2) {
|
||||
stop(
|
||||
"Centroid coordinates must have 2 columns, but found ",
|
||||
ncol(spatial_coords), " columns"
|
||||
)
|
||||
}
|
||||
colnames(spatial_coords) <- c("x_coord", "y_coord")
|
||||
|
||||
if (is.null(par$centroid_nsides)) {
|
||||
par$centroid_nsides <- Inf
|
||||
}
|
||||
|
||||
if (is.null(par$centroid_theta)) {
|
||||
par$centroid_theta <- 0
|
||||
}
|
||||
|
||||
# Create Centroids object
|
||||
centroids <- CreateCentroids(
|
||||
coords = spatial_coords,
|
||||
nsides = par$centroid_nsides,
|
||||
radius = par$centroid_radius,
|
||||
theta = par$centroid_theta
|
||||
)
|
||||
|
||||
# Create FOV object
|
||||
fov <- CreateFOV(coords = centroids, assay = par$assay)
|
||||
seurat_obj[["fov"]] <- fov
|
||||
seurat_obj@reductions[[par$obsm_centroid_coordinates]] <- NULL
|
||||
|
||||
saveRDS(seurat_obj, file = par$output)
|
||||
182
src/convert/from_h5mu_to_seurat_with_fov/test.R
Normal file
182
src/convert/from_h5mu_to_seurat_with_fov/test.R
Normal file
@@ -0,0 +1,182 @@
|
||||
library(testthat, warn.conflicts = FALSE)
|
||||
library(hdf5r)
|
||||
library(Seurat)
|
||||
|
||||
## VIASH START
|
||||
meta <- list(
|
||||
executable = "target/executable/convert/from_h5mu_to_seurat_with_fov",
|
||||
resources_dir = "resources_test",
|
||||
name = "from_h5mu_to_seurat_with_fov"
|
||||
)
|
||||
## VIASH END
|
||||
|
||||
|
||||
# # ---- Xenium ----------------------------------------------------------
|
||||
cat("> Test conversion Xenium\n")
|
||||
|
||||
in_h5mu <- paste0(
|
||||
meta[["resources_dir"]],
|
||||
"/xenium_tiny.h5mu"
|
||||
)
|
||||
out_rds <- "output.rds"
|
||||
|
||||
cat("> Running ", meta[["name"]], "\n", sep = "")
|
||||
out <- processx::run(
|
||||
meta[["executable"]],
|
||||
c(
|
||||
"--input", in_h5mu,
|
||||
"--output", out_rds,
|
||||
"--assay", "Xenium"
|
||||
)
|
||||
)
|
||||
|
||||
cat("> Checking whether output file exists\n")
|
||||
expect_equal(out$status, 0)
|
||||
expect_true(file.exists(out_rds))
|
||||
|
||||
cat("> Reading output file\n")
|
||||
obj <- readRDS(file = out_rds)
|
||||
adata <- H5File$new(in_h5mu, mode = "r")[["/mod/rna/X"]]
|
||||
|
||||
cat("> Checking whether Seurat object is in the right format\n")
|
||||
expect_equal(Assays(obj), "Xenium")
|
||||
expect_true(all(Layers(obj) == c("counts")))
|
||||
|
||||
dim_rds <- dim(obj)
|
||||
dim_ad <- adata$attr_open("shape")$read()
|
||||
|
||||
expect_equal(dim_rds[1], dim_ad[2])
|
||||
expect_equal(dim_rds[2], dim_ad[1])
|
||||
|
||||
cat("> Checking FOV object\n")
|
||||
expect_true("fov" %in% names(obj))
|
||||
expect_true("fov" %in% Images(obj))
|
||||
|
||||
fov <- obj[["fov"]]
|
||||
expect_equal(fov@assay, "Xenium")
|
||||
expect_equal(fov@key, "Xenium_")
|
||||
|
||||
centroids <- fov@boundaries$centroids
|
||||
expect_equal(nrow(centroids@coords), dim_rds[2])
|
||||
|
||||
centroid_coords <- centroids@coords
|
||||
expect_true(is.numeric(centroid_coords[, 1]))
|
||||
expect_true(is.numeric(centroid_coords[, 2]))
|
||||
expect_false(any(is.na(centroid_coords)))
|
||||
|
||||
# # ---- Xenium with args-------------------------------------------------
|
||||
cat("> Test conversion Xenium with centroid arguments\n")
|
||||
|
||||
in_h5mu <- paste0(
|
||||
meta[["resources_dir"]],
|
||||
"/xenium_tiny.h5mu"
|
||||
)
|
||||
out_rds <- "output.rds"
|
||||
|
||||
cat("> Running ", meta[["name"]], "\n", sep = "")
|
||||
out <- processx::run(
|
||||
meta[["executable"]],
|
||||
c(
|
||||
"--input", in_h5mu,
|
||||
"--output", out_rds,
|
||||
"--assay", "Xenium",
|
||||
"--centroid_nsides", "8",
|
||||
"--centroid_radius", "3",
|
||||
"--centroid_theta", "0.1"
|
||||
)
|
||||
)
|
||||
|
||||
cat("> Checking whether output file exists\n")
|
||||
expect_equal(out$status, 0)
|
||||
expect_true(file.exists(out_rds))
|
||||
|
||||
cat("> Reading output file\n")
|
||||
obj <- readRDS(file = out_rds)
|
||||
adata <- H5File$new(in_h5mu, mode = "r")[["/mod/rna/X"]]
|
||||
|
||||
cat("> Checking FOV object\n")
|
||||
fov <- obj[["fov"]]
|
||||
centroids <- fov@boundaries$centroids
|
||||
expect_equal(centroids@nsides, 8)
|
||||
expect_equal(centroids@radius, 3)
|
||||
expect_equal(centroids@theta, 0.1)
|
||||
|
||||
|
||||
# ---- CosMx ----------------------------------------------------------
|
||||
|
||||
cat("> Test conversion CosMx\n")
|
||||
|
||||
in_h5mu <- paste0(
|
||||
meta[["resources_dir"]],
|
||||
"/Lung5_Rep2_tiny.h5mu"
|
||||
)
|
||||
out_rds <- "output.rds"
|
||||
|
||||
cat("> Running ", meta[["name"]], "\n", sep = "")
|
||||
out <- processx::run(
|
||||
meta[["executable"]],
|
||||
c(
|
||||
"--input", in_h5mu,
|
||||
"--output", out_rds,
|
||||
"--assay", "CosMx"
|
||||
)
|
||||
)
|
||||
|
||||
cat("> Checking whether output file exists\n")
|
||||
expect_equal(out$status, 0)
|
||||
expect_true(file.exists(out_rds))
|
||||
|
||||
cat("> Reading output file\n")
|
||||
obj <- readRDS(file = out_rds)
|
||||
adata <- H5File$new(in_h5mu, mode = "r")[["/mod/rna/X"]]
|
||||
|
||||
cat("> Checking whether Seurat object is in the right format\n")
|
||||
expect_equal(Assays(obj), "CosMx")
|
||||
expect_true(all(Layers(obj) == c("counts")))
|
||||
|
||||
dim_rds <- dim(obj)
|
||||
dim_ad <- adata$attr_open("shape")$read()
|
||||
|
||||
expect_equal(dim_rds[1], dim_ad[2])
|
||||
expect_equal(dim_rds[2], dim_ad[1])
|
||||
|
||||
cat("> Checking FOV object\n")
|
||||
expect_true("fov" %in% names(obj))
|
||||
expect_true("fov" %in% Images(obj))
|
||||
|
||||
fov <- obj[["fov"]]
|
||||
expect_equal(fov@assay, "CosMx")
|
||||
expect_equal(fov@key, "CosMx_")
|
||||
|
||||
centroids <- fov@boundaries$centroids
|
||||
expect_equal(nrow(centroids@coords), dim_rds[2])
|
||||
|
||||
centroid_coords <- centroids@coords
|
||||
expect_true(is.numeric(centroid_coords[, 1]))
|
||||
expect_true(is.numeric(centroid_coords[, 2]))
|
||||
expect_false(any(is.na(centroid_coords)))
|
||||
|
||||
|
||||
# ---- Missing obsm key ----------------------------------------------
|
||||
|
||||
cat("> Test that a missing obsm centroid key fails cleanly\n")
|
||||
|
||||
in_h5mu <- paste0(
|
||||
meta[["resources_dir"]],
|
||||
"/xenium_tiny.h5mu"
|
||||
)
|
||||
out_rds <- "output.rds"
|
||||
|
||||
out <- processx::run(
|
||||
meta[["executable"]],
|
||||
c(
|
||||
"--input", in_h5mu,
|
||||
"--output", out_rds,
|
||||
"--assay", "Xenium",
|
||||
"--obsm_centroid_coordinates", "does_not_exist"
|
||||
),
|
||||
error_on_status = FALSE
|
||||
)
|
||||
|
||||
expect_false(out$status == 0)
|
||||
expect_match(out$stderr, "does_not_exist", fixed = TRUE)
|
||||
@@ -49,6 +49,19 @@ logger.info("Creating SpatialData object...")
|
||||
if par.get("input_spatialdata", None) is not None:
|
||||
logger.info("Using existing SpatialData...")
|
||||
sdata = sdata_existing
|
||||
|
||||
# Make sure mod is a compatible SpatialData table
|
||||
attrs = mod.uns["spatialdata_attrs"]
|
||||
mod = sd.models.TableModel.parse(
|
||||
mod,
|
||||
region=attrs["region"].tolist()
|
||||
if hasattr(attrs["region"], "tolist")
|
||||
else attrs["region"],
|
||||
region_key=attrs["region_key"],
|
||||
instance_key=attrs["instance_key"],
|
||||
overwrite_metadata=True,
|
||||
)
|
||||
|
||||
sdata["table"] = mod
|
||||
else:
|
||||
logger.info("Creating new SpatialData...")
|
||||
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
import sys
|
||||
|
||||
import mudata as mu
|
||||
import numpy as np
|
||||
import pytest
|
||||
import spatialdata as sd
|
||||
|
||||
@@ -139,5 +140,50 @@ def test_execution_without_input_spatialdata(run_component, tmp_path):
|
||||
)
|
||||
|
||||
|
||||
def test_execution_with_numpy_region(run_component, tmp_path):
|
||||
"""Test that a numpy array region in spatialdata_attrs is handled correctly."""
|
||||
input_original = meta["resources_dir"] + "/xenium_tiny.h5mu"
|
||||
input_spatialdata = meta["resources_dir"] + "/xenium_tiny.zarr"
|
||||
output = tmp_path / "output_numpy_region.zarr"
|
||||
|
||||
# Read the original h5mu and force region to a numpy object array
|
||||
mdata = mu.read_h5mu(input_original)
|
||||
mod = mdata.mod["rna"]
|
||||
|
||||
attrs = mod.uns["spatialdata_attrs"]
|
||||
region = attrs["region"]
|
||||
region_list = (
|
||||
region.tolist()
|
||||
if hasattr(region, "tolist")
|
||||
else (region if isinstance(region, list) else [region])
|
||||
)
|
||||
mod.uns["spatialdata_attrs"]["region"] = np.array(region_list, dtype=object)
|
||||
|
||||
input_modified = tmp_path / "modified_numpy_region.h5mu"
|
||||
mdata.write_h5mu(str(input_modified))
|
||||
|
||||
run_component(
|
||||
[
|
||||
"--input",
|
||||
str(input_modified),
|
||||
"--input_spatialdata",
|
||||
input_spatialdata,
|
||||
"--output",
|
||||
str(output),
|
||||
]
|
||||
)
|
||||
assert os.path.exists(output), "output zarr was not created"
|
||||
|
||||
sdata = sd.read_zarr(output)
|
||||
table = sdata["table"]
|
||||
|
||||
assert table.n_obs == mod.n_obs, (
|
||||
"The number of observations in the SpatialData table does not match the selected modality."
|
||||
)
|
||||
assert table.obs_names.equals(mod.obs_names), (
|
||||
"The observation names in the SpatialData table do not match the selected modality."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__]))
|
||||
|
||||
@@ -64,7 +64,7 @@ engines:
|
||||
- python3-pip
|
||||
- python3-dev
|
||||
- python-is-python3
|
||||
- cmake
|
||||
- libuv1-dev
|
||||
- type: r
|
||||
cran: [ reticulate, testthat ]
|
||||
- type: python
|
||||
|
||||
127
src/dataflow/concatenate_spatialdata/config.vsh.yaml
Normal file
127
src/dataflow/concatenate_spatialdata/config.vsh.yaml
Normal file
@@ -0,0 +1,127 @@
|
||||
name: concatenate_spatialdata
|
||||
namespace: dataflow
|
||||
scope: public
|
||||
description: |
|
||||
Concatenate multiple SpatialData objects into one SpatialData output.
|
||||
|
||||
authors:
|
||||
- __merge__: /src/authors/dorien_roosen.yaml
|
||||
roles: [ maintainer ]
|
||||
- __merge__: /src/authors/luke_zappia.yaml
|
||||
roles: [ author ]
|
||||
|
||||
argument_groups:
|
||||
- name: Inputs
|
||||
arguments:
|
||||
- name: --inputs
|
||||
type: file
|
||||
multiple: true
|
||||
multiple_sep: ","
|
||||
required: true
|
||||
description: |
|
||||
Paths to Zarr stores containing the SpatialData objects to concatenate.
|
||||
|
||||
- name: Outputs
|
||||
arguments:
|
||||
- name: --output
|
||||
type: file
|
||||
direction: output
|
||||
default: output.zarr
|
||||
required: true
|
||||
description: |
|
||||
The output Zarr store containing the concatenated SpatialData object.
|
||||
|
||||
- name: SpatialData options
|
||||
description: |
|
||||
Options for how to concatenate the SpatialData objects, passed to
|
||||
`spatialdata.concatenate`.
|
||||
arguments:
|
||||
- name: --attrs_merge
|
||||
type: string
|
||||
choices: [same, unique, first, only]
|
||||
description: |
|
||||
The method to use for merging elements in `attrs` when concatenating
|
||||
the SpatialData objects. See `anndata_merge` for options and details.
|
||||
|
||||
- name: AnnData options
|
||||
description: |
|
||||
Options for how to concatenate the AnnData objects when concatenating the
|
||||
main tables of the SpatialData objects using `anndata.concat`.
|
||||
arguments:
|
||||
- name: --anndata_join
|
||||
type: string
|
||||
choices: [inner, outer]
|
||||
default: inner
|
||||
description: |
|
||||
The type of join to perform on the AnnData objects when concatenating
|
||||
the main tables of the SpatialData objects:
|
||||
|
||||
- "inner": Only variables (genes) that are present in all objects are
|
||||
kept.
|
||||
- "outer": All variables (genes) that are present in any object are
|
||||
kept. Sparse arrays are padded with zeros, other objects are filled
|
||||
with missing values.
|
||||
|
||||
See `anndata.concat` for details.
|
||||
- name: --anndata_merge
|
||||
type: string
|
||||
choices: [same, unique, first, only]
|
||||
description: |
|
||||
The method to use for merging elements that are not aligned with `obs`
|
||||
when concatenating the main tables of the SpatialData objects:
|
||||
|
||||
- None: No elements are kept.
|
||||
- "same": Only elements that are the same across all objects are kept.
|
||||
- "unique": Elements for which there is only one possible value are
|
||||
kept.
|
||||
- "first": The first element at each position is kept.
|
||||
- "only": Elements that are only in one object are kept.
|
||||
|
||||
See `anndata.concat` for details.
|
||||
- name: --anndata_uns_merge
|
||||
type: string
|
||||
choices: [same, unique, first, only]
|
||||
description: |
|
||||
The method to use for merging elements in `uns` when concatenating the
|
||||
main tables of the SpatialData objects. See `anndata_merge` for
|
||||
options and details. Merging is applied recursively.
|
||||
- name: --anndata_label
|
||||
type: string
|
||||
example: sample
|
||||
description: |
|
||||
The name of the column in `obs` of the concatenated main table to
|
||||
store batch label information. If not provided, no batch label column
|
||||
is added.
|
||||
- name: --anndata_pairwise
|
||||
type: boolean
|
||||
default: false
|
||||
description: |
|
||||
Whether pairwise elements (`obsp`/`varp`) are concatenated.
|
||||
|
||||
resources:
|
||||
- type: python_script
|
||||
path: script.py
|
||||
- path: /src/utils/setup_logger.py
|
||||
|
||||
test_resources:
|
||||
- type: python_script
|
||||
path: test.py
|
||||
- path: /resources_test/xenium/xenium_tiny.zarr
|
||||
|
||||
engines:
|
||||
- type: docker
|
||||
image: python:3.12-slim
|
||||
setup:
|
||||
- type: apt
|
||||
packages:
|
||||
- procps
|
||||
- type: python
|
||||
__merge__: [/src/base/requirements/spatialdata.yaml, /src/base/requirements/anndata.yaml]
|
||||
__merge__: [/src/base/requirements/python_test_setup.yaml, .]
|
||||
- type: native
|
||||
|
||||
runners:
|
||||
- type: executable
|
||||
- type: nextflow
|
||||
directives:
|
||||
label: [singlecpu, lowmem, lowdisk]
|
||||
92
src/dataflow/concatenate_spatialdata/script.py
Normal file
92
src/dataflow/concatenate_spatialdata/script.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import anndata as ad
|
||||
import spatialdata as sd
|
||||
|
||||
## VIASH START
|
||||
par = {
|
||||
"inputs": ["input1.zarr", "input2.zarr"],
|
||||
"output": "output.zarr",
|
||||
"attrs_merge": None,
|
||||
"anndata_join": "inner",
|
||||
"anndata_merge": None,
|
||||
"anndata_uns_merge": None,
|
||||
"anndata_label": None,
|
||||
"anndata_pairwise": False,
|
||||
}
|
||||
## VIASH END
|
||||
|
||||
sys.path.insert(0, meta["resources_dir"])
|
||||
from setup_logger import setup_logger # noqa: E402
|
||||
|
||||
|
||||
def get_unique_names_from_inputs(inputs):
|
||||
"""Derive unique dataset names from input file stems.
|
||||
|
||||
Duplicate stems are disambiguated with numeric suffixes (_2, _3, …).
|
||||
"""
|
||||
names = []
|
||||
names_seen = set()
|
||||
for input_path in inputs:
|
||||
name = Path(input_path).stem
|
||||
if name in names_seen:
|
||||
count = 2
|
||||
candidate = f"{name}_{count}"
|
||||
while candidate in names_seen:
|
||||
count += 1
|
||||
candidate = f"{name}_{count}"
|
||||
name = candidate
|
||||
names.append(name)
|
||||
names_seen.add(name)
|
||||
return names
|
||||
|
||||
|
||||
def main(par):
|
||||
logger = setup_logger()
|
||||
logger.info("Concatenate SpatialData (spatialdata v%s)", sd.__version__)
|
||||
|
||||
if len(par["inputs"]) == 1:
|
||||
logger.warning(
|
||||
"Only one input provided – writing to output without concatenation."
|
||||
)
|
||||
logger.info("Reading single input '%s'", par["inputs"][0])
|
||||
sdata = sd.read_zarr(par["inputs"][0])
|
||||
logger.info("Writing SpatialData to '%s'", par["output"])
|
||||
sdata.write(par["output"])
|
||||
logger.info("Done.")
|
||||
return 0
|
||||
|
||||
logger.info("Reading %d SpatialData objects…", len(par["inputs"]))
|
||||
names = get_unique_names_from_inputs(par["inputs"])
|
||||
sdatas = {}
|
||||
for name, input_path in zip(names, par["inputs"]):
|
||||
logger.info(" %s ← %s", name, input_path)
|
||||
sdatas[name] = sd.read_zarr(input_path)
|
||||
|
||||
logger.info("Concatenating SpatialData objects…")
|
||||
concatenated = sd.concatenate(
|
||||
sdatas,
|
||||
concatenate_tables=False,
|
||||
attrs_merge=par["attrs_merge"],
|
||||
)
|
||||
|
||||
logger.info("Concatenating main tables…")
|
||||
tables = {name: sdata["table"] for name, sdata in sdatas.items()}
|
||||
concatenated["table"] = ad.concat(
|
||||
tables,
|
||||
join=par["anndata_join"],
|
||||
merge=par["anndata_merge"],
|
||||
uns_merge=par["anndata_uns_merge"],
|
||||
label=par["anndata_label"],
|
||||
index_unique="-",
|
||||
pairwise=par["anndata_pairwise"],
|
||||
)
|
||||
|
||||
logger.info("Writing concatenated SpatialData to '%s'…", par["output"])
|
||||
concatenated.write(par["output"])
|
||||
logger.info("Done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(par))
|
||||
111
src/dataflow/concatenate_spatialdata/test.py
Normal file
111
src/dataflow/concatenate_spatialdata/test.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
import spatialdata as sd
|
||||
|
||||
|
||||
def test_two_inputs(run_component, tmp_path):
|
||||
zarr_path = meta["resources_dir"] + "/xenium_tiny.zarr"
|
||||
output = tmp_path / "output.zarr"
|
||||
|
||||
run_component(
|
||||
[
|
||||
"--inputs",
|
||||
zarr_path,
|
||||
"--inputs",
|
||||
zarr_path,
|
||||
"--output",
|
||||
str(output),
|
||||
]
|
||||
)
|
||||
|
||||
assert output.is_dir(), "Output Zarr store was not created"
|
||||
|
||||
result = sd.read_zarr(output)
|
||||
source = sd.read_zarr(zarr_path)
|
||||
|
||||
# Each spatial element should appear twice, once per dataset name
|
||||
# (xenium_tiny and xenium_tiny_2)
|
||||
for element_type, elements in [
|
||||
("images", source.images),
|
||||
("labels", source.labels),
|
||||
("points", source.points),
|
||||
("shapes", source.shapes),
|
||||
]:
|
||||
result_keys = set(getattr(result, element_type).keys())
|
||||
for key in elements.keys():
|
||||
assert f"{key}-xenium_tiny" in result_keys, (
|
||||
f"Expected '{key}-xenium_tiny' in {element_type}, got {result_keys}"
|
||||
)
|
||||
assert f"{key}-xenium_tiny_2" in result_keys, (
|
||||
f"Expected '{key}-xenium_tiny_2' in {element_type}, got {result_keys}"
|
||||
)
|
||||
|
||||
# Table should have 2× the original obs count
|
||||
assert "table" in result.tables, "Main table not found in output"
|
||||
assert result["table"].n_obs == source["table"].n_obs * 2, (
|
||||
f"Expected {source['table'].n_obs * 2} observations, got {result['table'].n_obs}"
|
||||
)
|
||||
assert result["table"].n_vars == source["table"].n_vars, (
|
||||
f"Expected {source['table'].n_vars} variables, got {result['table'].n_vars}"
|
||||
)
|
||||
|
||||
|
||||
def test_anndata_label(run_component, tmp_path):
|
||||
"""--anndata_label should add a batch column with one value per input."""
|
||||
zarr_path = meta["resources_dir"] + "/xenium_tiny.zarr"
|
||||
output = tmp_path / "output.zarr"
|
||||
|
||||
run_component(
|
||||
[
|
||||
"--inputs",
|
||||
zarr_path,
|
||||
"--inputs",
|
||||
zarr_path,
|
||||
"--output",
|
||||
str(output),
|
||||
"--anndata_label",
|
||||
"batch",
|
||||
]
|
||||
)
|
||||
|
||||
assert output.is_dir(), "Output Zarr store was not created"
|
||||
|
||||
result = sd.read_zarr(output)
|
||||
table = result["table"]
|
||||
|
||||
assert "batch" in table.obs.columns, "Batch label column not found in obs"
|
||||
assert len(table.obs["batch"].unique()) == 2, (
|
||||
f"Expected 2 batch values, got {table.obs['batch'].unique()}"
|
||||
)
|
||||
|
||||
|
||||
def test_single_input_passthrough(run_component, tmp_path):
|
||||
"""A single input should be written through unchanged."""
|
||||
zarr_path = meta["resources_dir"] + "/xenium_tiny.zarr"
|
||||
output = tmp_path / "output.zarr"
|
||||
|
||||
run_component(
|
||||
[
|
||||
"--inputs",
|
||||
zarr_path,
|
||||
"--output",
|
||||
str(output),
|
||||
]
|
||||
)
|
||||
|
||||
assert output.is_dir(), "Output Zarr store was not created"
|
||||
|
||||
source = sd.read_zarr(zarr_path)
|
||||
result = sd.read_zarr(output)
|
||||
|
||||
assert result["table"].n_obs == source["table"].n_obs, (
|
||||
"Single-input passthrough changed the table obs count"
|
||||
)
|
||||
assert result["table"].n_vars == source["table"].n_vars, (
|
||||
"Single-input passthrough changed the table var count"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__]))
|
||||
79
src/dataflow/move_layer_spatialdata/config.vsh.yaml
Normal file
79
src/dataflow/move_layer_spatialdata/config.vsh.yaml
Normal file
@@ -0,0 +1,79 @@
|
||||
name: move_layer_spatialdata
|
||||
namespace: dataflow
|
||||
label: Move layer (SpatialData)
|
||||
summary: Move a layer within a SpatialData object to a new name
|
||||
|
||||
authors:
|
||||
- __merge__: /src/authors/dorien_roosen.yaml
|
||||
roles: [ maintainer ]
|
||||
- __merge__: /src/authors/luke_zappia.yaml
|
||||
roles: [ author ]
|
||||
|
||||
argument_groups:
|
||||
- name: Inputs
|
||||
arguments:
|
||||
- name: --input
|
||||
type: file
|
||||
label: Input SpatialData
|
||||
description: Path to the input SpatialData Zarr store
|
||||
example: input.zarr
|
||||
required: true
|
||||
- name: --input_layer
|
||||
type: string
|
||||
label: Input layer name
|
||||
description: |
|
||||
Name of the layer to move. If not provided, X will be moved.
|
||||
example: old_layer
|
||||
|
||||
- name: Outputs
|
||||
arguments:
|
||||
- name: --output
|
||||
type: file
|
||||
direction: output
|
||||
default: output.zarr
|
||||
label: Output SpatialData
|
||||
description: Path to the output SpatialData Zarr store
|
||||
- name: --output_layer
|
||||
type: string
|
||||
label: Output layer name
|
||||
description: Name of the new layer to create with the moved data. If not provided, input layer will be moved to X.
|
||||
example: new_layer
|
||||
|
||||
- name: Options
|
||||
arguments:
|
||||
- name: --delete_input_layer
|
||||
type: boolean
|
||||
default: true
|
||||
description: |
|
||||
Whether to delete the input layer after moving it to the new layer.
|
||||
If true (default), the input layer is moved to the new location, if
|
||||
false it is copied to the new location and the original layer is
|
||||
retained.
|
||||
|
||||
resources:
|
||||
- type: python_script
|
||||
path: script.py
|
||||
- path: /src/utils/setup_logger.py
|
||||
|
||||
test_resources:
|
||||
- type: python_script
|
||||
path: test.py
|
||||
- path: /resources_test/xenium/xenium_tiny.zarr
|
||||
|
||||
engines:
|
||||
- type: docker
|
||||
image: python:3.13-slim
|
||||
setup:
|
||||
- type: apt
|
||||
packages:
|
||||
- procps
|
||||
- type: python
|
||||
__merge__: [/src/base/requirements/spatialdata.yaml]
|
||||
__merge__: [/src/base/requirements/python_test_setup.yaml, .]
|
||||
- type: native
|
||||
|
||||
runners:
|
||||
- type: executable
|
||||
- type: nextflow
|
||||
directives:
|
||||
label: [singlecpu, lowmem, lowdisk]
|
||||
73
src/dataflow/move_layer_spatialdata/script.py
Normal file
73
src/dataflow/move_layer_spatialdata/script.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import sys
|
||||
import spatialdata as sd
|
||||
|
||||
## VIASH START
|
||||
par = {
|
||||
"input": "input.zarr",
|
||||
"input_layer": None,
|
||||
"output": "output.zarr",
|
||||
"output_layer": None,
|
||||
"delete_input_layer": True,
|
||||
}
|
||||
meta = {"resources_dir": "src/utils"}
|
||||
## VIASH END
|
||||
|
||||
sys.path.append(meta["resources_dir"])
|
||||
from setup_logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def move_layer(sdata, input_layer, output_layer, delete_input):
|
||||
"""Move a layer within SpatialData object."""
|
||||
if input_layer:
|
||||
layer_data = sdata["table"].layers[input_layer]
|
||||
if delete_input:
|
||||
logger.info(f"Deleting input layer '{input_layer}'...")
|
||||
del sdata["table"].layers[input_layer]
|
||||
else:
|
||||
layer_data = sdata["table"].X
|
||||
if delete_input:
|
||||
logger.info("Deleting input X matrix...")
|
||||
sdata["table"].X = None
|
||||
|
||||
if output_layer:
|
||||
sdata["table"].layers[output_layer] = layer_data
|
||||
else:
|
||||
sdata["table"].X = layer_data
|
||||
|
||||
|
||||
logger.info(f"Move layer (spatialdata v{sd.__version__})")
|
||||
|
||||
logger.info(f"Loading SpatialData from '{par['input']}'...")
|
||||
sdata = sd.read_zarr(par["input"])
|
||||
logger.info(f"SpatialData: {sdata}")
|
||||
logger.info(f"Table: {sdata['table']}")
|
||||
|
||||
input_layer_name = par["input_layer"] if par["input_layer"] else "X"
|
||||
output_layer_name = par["output_layer"] if par["output_layer"] else "X"
|
||||
|
||||
if input_layer_name == output_layer_name:
|
||||
raise ValueError(
|
||||
f"Input layer '{input_layer_name}' and output layer '{output_layer_name}' are the same, aborting"
|
||||
)
|
||||
|
||||
if par["input_layer"] and input_layer_name not in sdata["table"].layers:
|
||||
raise ValueError(
|
||||
f"Input layer '{input_layer_name}' not found in SpatialData. Available layers: {list(sdata['table'].layers.keys())}"
|
||||
)
|
||||
|
||||
if par["output_layer"] and output_layer_name in sdata["table"].layers:
|
||||
logger.warning(
|
||||
f"Output layer '{output_layer_name}' already exists and will be overwritten"
|
||||
)
|
||||
|
||||
logger.info(f"Moving layer '{input_layer_name}' to '{output_layer_name}'...")
|
||||
move_layer(sdata, par["input_layer"], par["output_layer"], par["delete_input_layer"])
|
||||
logger.info(f"SpatialData: {sdata}")
|
||||
logger.info(f"Table: {sdata['table']}")
|
||||
|
||||
logger.info(f"Writing output to '{par['output']}'...")
|
||||
sdata.write(par["output"], overwrite=True)
|
||||
|
||||
logger.info("Done!")
|
||||
131
src/dataflow/move_layer_spatialdata/test.py
Normal file
131
src/dataflow/move_layer_spatialdata/test.py
Normal file
@@ -0,0 +1,131 @@
|
||||
import pytest
|
||||
import sys
|
||||
import spatialdata as sd
|
||||
|
||||
|
||||
def test_move_x_to_layer(run_component, tmp_path):
|
||||
"""Test moving X matrix to layer."""
|
||||
input_path = meta["resources_dir"] + "/xenium_tiny.zarr"
|
||||
output_path = tmp_path / "output.zarr"
|
||||
|
||||
run_component(
|
||||
[
|
||||
"--input",
|
||||
input_path,
|
||||
"--output",
|
||||
output_path,
|
||||
"--output_layer",
|
||||
"layer_1",
|
||||
]
|
||||
)
|
||||
|
||||
# Verify output
|
||||
assert output_path.exists(), "output zarr was not created"
|
||||
result = sd.read_zarr(str(output_path))
|
||||
|
||||
assert result["table"].X is None, "X matrix should be None after moving"
|
||||
assert "layer_1" in result["table"].layers, "layer_1 should exist"
|
||||
assert result["table"].layers["layer_1"] is not None, "layer_1 should have data"
|
||||
|
||||
|
||||
def test_move_named_layer_to_x(run_component, tmp_path):
|
||||
"""Test moving a named layer to X matrix."""
|
||||
input_path = meta["resources_dir"] + "/xenium_tiny.zarr"
|
||||
output_path = tmp_path / "output.zarr"
|
||||
|
||||
sdata = sd.read_zarr(input_path)
|
||||
sdata["table"].layers["test_layer"] = sdata["table"].X.copy()
|
||||
sdata["table"].X = None
|
||||
tmp_input = tmp_path / "input_with_layer.zarr"
|
||||
sdata.write(str(tmp_input))
|
||||
|
||||
run_component(
|
||||
[
|
||||
"--input",
|
||||
str(tmp_input),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--input_layer",
|
||||
"test_layer",
|
||||
]
|
||||
)
|
||||
|
||||
# Verify output
|
||||
assert output_path.exists(), "output zarr was not created"
|
||||
result = sd.read_zarr(str(output_path))
|
||||
|
||||
assert "test_layer" not in result["table"].layers, "test_layer should be removed"
|
||||
assert result["table"].X is not None, "X matrix should have data"
|
||||
|
||||
|
||||
def test_move_layer_to_layer(run_component, tmp_path):
|
||||
"""Test moving a named layer to another named layer."""
|
||||
input_path = meta["resources_dir"] + "/xenium_tiny.zarr"
|
||||
output_path = tmp_path / "output.zarr"
|
||||
|
||||
sdata = sd.read_zarr(input_path)
|
||||
sdata["table"].layers["test_layer"] = sdata["table"].X.copy()
|
||||
tmp_input = tmp_path / "input_with_layer.zarr"
|
||||
sdata.write(str(tmp_input))
|
||||
|
||||
run_component(
|
||||
[
|
||||
"--input",
|
||||
str(tmp_input),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--input_layer",
|
||||
"test_layer",
|
||||
"--output_layer",
|
||||
"layer_2",
|
||||
]
|
||||
)
|
||||
|
||||
# Verify output
|
||||
assert output_path.exists(), "output zarr was not created"
|
||||
result = sd.read_zarr(str(output_path))
|
||||
|
||||
assert "test_layer" not in result["table"].layers, "test_layer should be removed"
|
||||
assert "layer_2" in result["table"].layers, "layer_2 should exist"
|
||||
assert result["table"].layers["layer_2"] is not None, "layer_2 should have data"
|
||||
|
||||
|
||||
def test_copy_layer(run_component, tmp_path):
|
||||
"""Test copying a layer without deleting the input layer."""
|
||||
input_path = meta["resources_dir"] + "/xenium_tiny.zarr"
|
||||
output_path = tmp_path / "output.zarr"
|
||||
|
||||
sdata = sd.read_zarr(input_path)
|
||||
sdata["table"].layers["test_layer"] = sdata["table"].X.copy()
|
||||
tmp_input = tmp_path / "input_with_layer.zarr"
|
||||
sdata.write(str(tmp_input))
|
||||
|
||||
run_component(
|
||||
[
|
||||
"--input",
|
||||
str(tmp_input),
|
||||
"--output",
|
||||
str(output_path),
|
||||
"--input_layer",
|
||||
"test_layer",
|
||||
"--output_layer",
|
||||
"layer_2",
|
||||
"--delete_input_layer",
|
||||
"false",
|
||||
]
|
||||
)
|
||||
|
||||
# Verify output
|
||||
assert output_path.exists(), "output zarr was not created"
|
||||
result = sd.read_zarr(str(output_path))
|
||||
|
||||
assert "test_layer" in result["table"].layers, "test_layer should still exist"
|
||||
assert "layer_2" in result["table"].layers, "layer_2 should exist"
|
||||
assert result["table"].layers["layer_2"] is not None, "layer_2 should have data"
|
||||
assert result["table"].layers["test_layer"] is not None, (
|
||||
"test_layer should still have data"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__]))
|
||||
237
src/nichecompass/gene_program_mask/config.vsh.yaml
Normal file
237
src/nichecompass/gene_program_mask/config.vsh.yaml
Normal file
@@ -0,0 +1,237 @@
|
||||
name: gene_program_mask
|
||||
namespace: nichecompass
|
||||
scope: public
|
||||
description: Generation of a prior knowledge gene program mask for NicheCompass.
|
||||
|
||||
authors:
|
||||
- __merge__: /src/authors/dorien_roosen.yaml
|
||||
roles: [ maintainer ]
|
||||
- __merge__: /src/authors/jakub_majercik.yaml
|
||||
roles: [ maintainer ]
|
||||
- __merge__: /src/authors/weiwei_schultz.yaml
|
||||
roles: [ contributor ]
|
||||
argument_groups:
|
||||
- name: Inputs
|
||||
arguments:
|
||||
- name: "--input_gene_orthologs_mapping_file"
|
||||
type: file
|
||||
required: false
|
||||
description: |
|
||||
Path to a CSV file mapping human genes to mouse orthologs.
|
||||
Required for the OmniPath and NicheNet masks if `--species mouse`.
|
||||
- name: "--input_metabolite_enzymes"
|
||||
type: file
|
||||
required: false
|
||||
description: |
|
||||
Path to the MeBocost metabolite-enzymes TSV file.
|
||||
Required for generating the MeBocost gene program mask.
|
||||
- name: "--input_metabolite_sensors"
|
||||
type: file
|
||||
required: false
|
||||
description: |
|
||||
Path to the MeBocost metabolite-sensors TSV file.
|
||||
Required for generating the MeBocost gene program mask.
|
||||
- name: "--input_omnipath_lr_network"
|
||||
type: file
|
||||
required: false
|
||||
description: |
|
||||
Path to the OmniPath ligand-receptor network CSV file.
|
||||
If provided, the network will be loaded from this file instead of querying the OmniPath API.
|
||||
Cannot be used together with `--output_omnipath_lr_network`.
|
||||
- name: "--input_nichenet_ligand_target_matrix"
|
||||
type: file
|
||||
required: false
|
||||
description: |
|
||||
Path to the NicheNet ligand-target gene regulatory potential matrix file.
|
||||
If provided, the matrix will be loaded from this file instead of querying the NicheNet API.
|
||||
example: nichenet_ligand_target_matrix.csv
|
||||
- name: "--input_nichenet_lrt_network"
|
||||
type: file
|
||||
required: false
|
||||
description: |
|
||||
Path to the NicheNet ligand-receptor network CSV file.
|
||||
If provided, the network will be loaded from this file instead of querying the NicheNet API.
|
||||
- name: "--input_collectri_tf_network"
|
||||
type: file
|
||||
required: false
|
||||
description: |
|
||||
Path to the CollecTRI TF-target gene regulatory network CSV file.
|
||||
If provided, the network will be loaded from this file instead of querying the CollecTRI API.
|
||||
|
||||
- name: Parameters
|
||||
arguments:
|
||||
- name: "--species"
|
||||
type: string
|
||||
choices: ["human", "mouse"]
|
||||
default: "human"
|
||||
description: Species of the organism (human or mouse).
|
||||
- name: "--create_omnipath_gene_program_mask"
|
||||
type: boolean
|
||||
default: true
|
||||
description: Whether to create the OmniPath gene program mask.
|
||||
- name: "--create_nichenet_gene_program_mask"
|
||||
type: boolean
|
||||
default: true
|
||||
description: Whether to create the NicheNet gene program mask.
|
||||
- name: "--create_mebocost_gene_program_mask"
|
||||
type: boolean
|
||||
default: true
|
||||
description: Whether to create the MeBocost gene program mask.
|
||||
- name: "--create_collectri_tf_gene_program_mask"
|
||||
type: boolean
|
||||
default: true
|
||||
description: Whether to create the CollecTRI TF gene program mask.
|
||||
- name: "--overlap_thresh_target_genes"
|
||||
type: double
|
||||
default: 1.0
|
||||
min: 0.0
|
||||
max: 1.0
|
||||
description: |
|
||||
The minimum ratio of target genes that need to overlap between a GP without source genes and another GP for the GP to be dropped.
|
||||
Gene programs with different source genes are never combined or dropped.
|
||||
|
||||
- name: Omnipath Parameters
|
||||
arguments:
|
||||
- name: "--omnipath_min_curation_effort"
|
||||
type: integer
|
||||
default: 2
|
||||
description: Minimum number of times an interaction has to be described in a paper and mentioned in a database to be included in the OmniPath gene programs.
|
||||
|
||||
- name: NicheNet Parameters
|
||||
arguments:
|
||||
- name: "--nichenet_version"
|
||||
type: string
|
||||
choices: ["v1", "v2"]
|
||||
default: "v2"
|
||||
description: |
|
||||
Version of the NicheNet ligand receptor network and ligand target gene regulatory potential matrix.
|
||||
`v2` is an improved version of `v1`, and has separate files for mouse and human.
|
||||
- name: "--nichenet_keep_target_genes_ratio"
|
||||
type: double
|
||||
default: 1.0
|
||||
description: |
|
||||
Ratio of target genes that are kept compared to total target genes.
|
||||
This ratio is applied over the entire matrix (not on gene program level), and determines the `all_gps_score_keep_threshold`, which will be used to filter target genes according to their regulatory potential scores.
|
||||
- name: "--nichenet_max_n_target_genes_per_gp"
|
||||
type: integer
|
||||
default: 250
|
||||
description: |
|
||||
Maximum number of target genes per gene program. If a gene program has more target genes than `max_n_target_genes_per_gp`, only the `max_n_target_genes_per_gp` gene programs with the highest regulatory potential scores will be kept.
|
||||
Default value is chosen based on MultiNicheNet specification (s. Browaeys, R. et al. MultiNicheNet: a flexible framework for differential cell-cell communication analysis from multi-sample multi-condition single-cell transcriptomics data. bioRxiv (2023) doi:10.1101/2023.06.13.544751).
|
||||
|
||||
- name: Outputs
|
||||
arguments:
|
||||
- name: "--output"
|
||||
type: file
|
||||
direction: output
|
||||
required: true
|
||||
description: Path to the output gene program mask JSON file.
|
||||
example: gp_mask.json
|
||||
- name: "--output_omnipath_lr_network"
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
description: |
|
||||
Path to the output OmniPath ligand-receptor network CSV file.
|
||||
If `--input_omnipath_lr_network` is not provided, the network will be saved to this file after querying the OmniPath API.
|
||||
example: omnipath_lr_network.csv
|
||||
- name: "--output_nichenet_lrt_network"
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
description: |
|
||||
Path to the output NicheNet ligand-receptor network CSV file.
|
||||
If `--input_nichenet_lrt_network` is not provided, the network will be saved to this file after querying the NicheNet API.
|
||||
example: nichenet_lrt_network.csv
|
||||
- name: "--output_nichenet_ligand_target_matrix"
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
description: |
|
||||
Path to the output NicheNet ligand-target gene regulatory potential matrix file.
|
||||
If `--input_nichenet_ligand_target_matrix` is not provided, the matrix will be saved to this file after querying the NicheNet API.
|
||||
example: nichenet_ligand_target_matrix.csv
|
||||
- name: "--output_collectri_tf_network"
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
description: |
|
||||
Path to the output CollecTRI TF-target gene regulatory potential network CSV file.
|
||||
If `--input_collectri_tf_network` is not provided, the network will be saved to this file after querying the CollecTRI API.
|
||||
example: collectri_tf_network.csv
|
||||
- name: "--output_omnipath_gp_gene_count_distributions"
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
description: Path to save the OmniPath gene program gene count distributions plot.
|
||||
example: omnipath_gp_gene_count_distributions.svg
|
||||
- name: "--output_nichenet_gp_gene_count_distributions"
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
description: Path to save the NicheNet gene program gene count distributions plot.
|
||||
example: nichenet_gp_gene_count_distributions.svg
|
||||
- name: "--output_mebocost_gp_gene_count_distributions"
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
description: Path to save the MeBocost gene program gene count distributions plot.
|
||||
example: mebocost_gp_gene_count_distributions.svg
|
||||
- name: "--output_collectri_tf_gp_gene_count_distributions"
|
||||
type: file
|
||||
direction: output
|
||||
required: false
|
||||
description: Path to save the CollecTRI TF gene program gene count distributions plot.
|
||||
example: collectri_tf_gp_gene_count_distributions.svg
|
||||
|
||||
resources:
|
||||
- type: python_script
|
||||
path: script.py
|
||||
- path: /src/utils/setup_logger.py
|
||||
|
||||
test_resources:
|
||||
- type: python_script
|
||||
path: test.py
|
||||
- path: /resources_test/niche/
|
||||
|
||||
engines:
|
||||
- type: docker
|
||||
image: nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04
|
||||
setup:
|
||||
- type: docker
|
||||
env:
|
||||
- PIP_BREAK_SYSTEM_PACKAGES=1
|
||||
- type: apt
|
||||
packages:
|
||||
- python3-pip
|
||||
- python-is-python3
|
||||
- type: docker
|
||||
run: pip install --upgrade --no-cache-dir --ignore-installed pip
|
||||
- type: docker
|
||||
run: |
|
||||
pip install torch --index-url https://download.pytorch.org/whl/cu124 \
|
||||
&& pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html
|
||||
- type: python
|
||||
packages:
|
||||
- numpy<2
|
||||
- nichecompass
|
||||
- decoupler~=2.1.6
|
||||
- type: docker
|
||||
run:
|
||||
- |
|
||||
cat > /NOTICE <<'EOF'
|
||||
This image contains software provided by NVIDIA Corporation
|
||||
and is governed by the NVIDIA Deep Learning Container License:
|
||||
https://developer.download.nvidia.com/licenses/NVIDIA_Deep_Learning_Container_License.pdf
|
||||
|
||||
The source code for this project is licensed under MIT:
|
||||
https://github.com/openpipelines-bio/openpipeline_spatial
|
||||
EOF
|
||||
__merge__: [ /src/base/requirements/python_test_setup.yaml, .]
|
||||
|
||||
|
||||
runners:
|
||||
- type: executable
|
||||
- type: nextflow
|
||||
directives:
|
||||
label: [lowcpu, lowmem, lowdisk]
|
||||
385
src/nichecompass/gene_program_mask/script.py
Normal file
385
src/nichecompass/gene_program_mask/script.py
Normal file
@@ -0,0 +1,385 @@
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from nichecompass.utils import (
|
||||
extract_gp_dict_from_mebocost_ms_interactions,
|
||||
extract_gp_dict_from_nichenet_lrt_interactions,
|
||||
extract_gp_dict_from_omnipath_lr_interactions,
|
||||
filter_and_combine_gp_dict_gps_v2,
|
||||
extract_gp_dict_from_collectri_tf_network,
|
||||
)
|
||||
|
||||
|
||||
## VIASH START
|
||||
par = {
|
||||
"species": "mouse",
|
||||
"create_omnipath_gene_program_mask": False,
|
||||
"create_nichenet_gene_program_mask": False,
|
||||
"create_mebocost_gene_program_mask": False,
|
||||
"create_collectri_tf_gene_program_mask": True,
|
||||
# omnipath params
|
||||
"input_gene_orthologs_mapping_file": "resources_test/niche/human_mouse_gene_orthologs.csv",
|
||||
"omnipath_min_curation_effort": 2,
|
||||
"input_omnipath_lr_network": "resources_test/niche/omnipath_lr_network.csv",
|
||||
# "input_omnipath_lr_network": None,
|
||||
# nichenet params
|
||||
"input_nichenet_lrt_network": "resources_test/niche/nichenet_lr_network.csv",
|
||||
"input_nichenet_ligand_target_matrix": "resources_test/niche/nichenet_ligand_target_matrix_v2_mouse.csv",
|
||||
"nichenet_version": "v2",
|
||||
"nichenet_keep_target_genes_ratio": 1.0,
|
||||
"nichenet_max_n_target_genes_per_gp": 250,
|
||||
# mebocost_gene_program_mask
|
||||
"input_metabolite_enzymes": "resources_test/niche/mouse_metabolite_enzymes.tsv",
|
||||
"input_metabolite_sensors": "resources_test/niche/mouse_metabolite_sensors.tsv",
|
||||
# collectri params
|
||||
"input_collectri_tf_network": "resources_test/niche/collectri_tf_network.csv",
|
||||
# filter and combine programs
|
||||
"overlap_thresh_target_genes": 1.0,
|
||||
# output paths
|
||||
"output": "collectri_gp_mask.json",
|
||||
"output_omnipath_lr_network": "resources_test/niche/omnipath_lr_network.csv",
|
||||
"output_nichenet_lrt_network": "resources_test/niche/nichenet_lr_network.csv",
|
||||
"output_nichenet_ligand_target_matrix": "nichenet_ligand_target_matrix_v2_mouse.csv",
|
||||
"output_collectri_tf_network": "collectri_tf_network.csv",
|
||||
"output_omnipath_gp_gene_count_distributions": "omnipath_gp_gene_count_distributions.svg",
|
||||
"output_nichenet_gp_gene_count_distributions": "nichenet_gp_gene_count_distributions.svg",
|
||||
"output_mebocost_gp_gene_count_distributions": "mebocost_gp_gene_count_distributions.svg",
|
||||
"output_collectri_tf_gp_gene_count_distributions": "collectri_tf_gp_gene_count_distributions.svg",
|
||||
}
|
||||
|
||||
meta = {"temp_dir": "tmp/", "resources_dir": "src/utils/"}
|
||||
## VIASH END
|
||||
sys.path.append(meta["resources_dir"])
|
||||
from setup_logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def _sanitize_omnipath_csv(file_path: str) -> None:
|
||||
"""
|
||||
Sanitize the OmniPath CSV file format for compatibility with NicheCompass
|
||||
`load_from_disk`.
|
||||
|
||||
Bug 1 - CSV index inconsistency:
|
||||
The library saves with `to_csv(path, index=False)` but loads with
|
||||
`read_csv(path, index_col=0)`. This causes the first data column to be
|
||||
incorrectly treated as the index when loading.
|
||||
|
||||
Bug 2 - Missing NaN handling:
|
||||
OmniPath contains proteins (TrEMBL/unreviewed entries like A0A2R8YE73) that
|
||||
don't have gene symbol mappings. These appear as NaN in genesymbol columns.
|
||||
The `resolve_protein_complexes` function doesn't handle NaN, causing
|
||||
`TypeError`. When fetching from the API, groupby operations implicitly
|
||||
filter some NaN rows, but `load_from_disk` doesn't have this filtering.
|
||||
|
||||
This function sanitizes the loaded CSV by:
|
||||
1. Dropping rows with NaN in genesymbol columns
|
||||
(unusable for gene programs)
|
||||
2. Rewriting with an index column (to fix the load inconsistency)
|
||||
|
||||
See:
|
||||
https://github.com/Lotfollahi-lab/nichecompass/blob/main/src/nichecompass/utils/gene_programs.py
|
||||
|
||||
Args:
|
||||
file_path: Path to the OmniPath CSV file to fix.
|
||||
"""
|
||||
if not file_path or not os.path.exists(file_path):
|
||||
return
|
||||
|
||||
df = pd.read_csv(file_path)
|
||||
|
||||
# Find all genesymbol columns dynamically and drop rows where any are NaN.
|
||||
# These proteins don't have gene mappings and cannot be used in gene programs.
|
||||
genesymbol_cols = df.columns.str.lower().str.contains("genesymbol")
|
||||
if genesymbol_cols.any():
|
||||
df = df.dropna(subset=df.columns[genesymbol_cols])
|
||||
|
||||
# Rewrite with an index column so downstream `read_csv(..., index_col=0)` is stable.
|
||||
os.makedirs(meta["temp_dir"], exist_ok=True)
|
||||
|
||||
lr_network_file_path = os.path.join(
|
||||
meta["temp_dir"],
|
||||
"input_omnipath_lr_network_sanitized.csv",
|
||||
)
|
||||
df.to_csv(lr_network_file_path, index=True)
|
||||
return lr_network_file_path
|
||||
|
||||
|
||||
def create_omnipath_gene_program_mask(
|
||||
output_lr_network: str | None,
|
||||
output_count_distr: str | None,
|
||||
input_lr_network: str | None,
|
||||
input_orthologs: str | None,
|
||||
) -> dict:
|
||||
# Generate omnipath gene program mask
|
||||
# Determine output distribution
|
||||
plot_gp_gene_count_distributions = bool(output_count_distr)
|
||||
|
||||
# Determine load_from_disk and save_to_disk from I/O params.
|
||||
load_from_disk = bool(input_lr_network)
|
||||
save_to_disk = bool(output_lr_network) and (not load_from_disk)
|
||||
|
||||
# Warn if both input and output are provided
|
||||
if input_lr_network and output_lr_network:
|
||||
logger.warning(
|
||||
"Both Omnipath input and output paths are provided. "
|
||||
"Using input file; output will not be saved."
|
||||
)
|
||||
|
||||
# Use input file path if provided, otherwise use output file path.
|
||||
if load_from_disk:
|
||||
lr_network_file_path = _sanitize_omnipath_csv(input_lr_network)
|
||||
else:
|
||||
lr_network_file_path = output_lr_network
|
||||
|
||||
omnipath_gp_dict = extract_gp_dict_from_omnipath_lr_interactions(
|
||||
species=par["species"],
|
||||
min_curation_effort=par["omnipath_min_curation_effort"],
|
||||
load_from_disk=load_from_disk,
|
||||
save_to_disk=save_to_disk,
|
||||
lr_network_file_path=lr_network_file_path,
|
||||
gene_orthologs_mapping_file_path=input_orthologs,
|
||||
plot_gp_gene_count_distributions=plot_gp_gene_count_distributions,
|
||||
gp_gene_count_distributions_save_path=output_count_distr,
|
||||
)
|
||||
|
||||
return omnipath_gp_dict
|
||||
|
||||
|
||||
def create_nichenet_gene_program_mask(
|
||||
output_lrt_network: str | None,
|
||||
output_lt_matrix: str | None,
|
||||
output_count_distr: str | None,
|
||||
input_lrt_network: str | None,
|
||||
input_lt_matrix: str | None,
|
||||
input_orthologs: str | None,
|
||||
) -> dict:
|
||||
plot_gp_gene_count_distributions = bool(output_count_distr)
|
||||
|
||||
# Validate NicheNet I/O.
|
||||
load_from_disk = bool(input_lrt_network) and bool(input_lt_matrix)
|
||||
|
||||
save_to_disk = (
|
||||
bool(output_lrt_network) or bool(output_lt_matrix) and not load_from_disk
|
||||
)
|
||||
|
||||
# Warn if both input and output are provided.
|
||||
if load_from_disk and (bool(output_lrt_network) or bool(output_lt_matrix)):
|
||||
logger.warning(
|
||||
"Both NicheNet input and output paths are provided. "
|
||||
"Using input files; outputs will not be saved."
|
||||
)
|
||||
|
||||
# Use input file path if provided, otherwise use output file path.
|
||||
if load_from_disk:
|
||||
lr_network_file_path = input_lrt_network
|
||||
ligand_target_matrix_file_path = input_lt_matrix
|
||||
else:
|
||||
lr_network_file_path = output_lrt_network
|
||||
ligand_target_matrix_file_path = output_lt_matrix
|
||||
|
||||
nichenet_gp_dict = extract_gp_dict_from_nichenet_lrt_interactions(
|
||||
species=par["species"],
|
||||
version=par["nichenet_version"],
|
||||
keep_target_genes_ratio=par["nichenet_keep_target_genes_ratio"],
|
||||
max_n_target_genes_per_gp=par["nichenet_max_n_target_genes_per_gp"],
|
||||
load_from_disk=load_from_disk,
|
||||
save_to_disk=save_to_disk,
|
||||
lr_network_file_path=lr_network_file_path,
|
||||
ligand_target_matrix_file_path=ligand_target_matrix_file_path,
|
||||
gene_orthologs_mapping_file_path=input_orthologs,
|
||||
plot_gp_gene_count_distributions=plot_gp_gene_count_distributions,
|
||||
gp_gene_count_distributions_save_path=output_count_distr,
|
||||
)
|
||||
return nichenet_gp_dict
|
||||
|
||||
|
||||
def create_mebocost_gene_program_mask(
|
||||
output_count_distr: str | None,
|
||||
input_metabolite_enzymes: str,
|
||||
input_metabolite_sensors: str,
|
||||
) -> dict:
|
||||
os.makedirs(meta["temp_dir"], exist_ok=True)
|
||||
|
||||
metabolite_enzymes_path = os.path.join(
|
||||
meta["temp_dir"],
|
||||
f"{par['species']}_metabolite_enzymes.tsv",
|
||||
)
|
||||
metabolite_sensors_path = os.path.join(
|
||||
meta["temp_dir"],
|
||||
f"{par['species']}_metabolite_sensors.tsv",
|
||||
)
|
||||
|
||||
shutil.copy2(
|
||||
input_metabolite_enzymes,
|
||||
metabolite_enzymes_path,
|
||||
)
|
||||
shutil.copy2(
|
||||
input_metabolite_sensors,
|
||||
metabolite_sensors_path,
|
||||
)
|
||||
plot_gp_gene_count_distributions = bool(output_count_distr)
|
||||
|
||||
mebocost_gp_dict = extract_gp_dict_from_mebocost_ms_interactions(
|
||||
dir_path=meta["temp_dir"],
|
||||
species=par["species"],
|
||||
plot_gp_gene_count_distributions=plot_gp_gene_count_distributions,
|
||||
gp_gene_count_distributions_save_path=output_count_distr,
|
||||
)
|
||||
return mebocost_gp_dict
|
||||
|
||||
|
||||
def create_collectri_tf_gene_program_mask(
|
||||
output_count_distr: str | None,
|
||||
output_tf_network: str | None,
|
||||
input_tf_network: str | None,
|
||||
) -> dict:
|
||||
plot_gp_gene_count_distributions = bool(output_count_distr)
|
||||
|
||||
# Determine load_from_disk and save_to_disk from I/O params.
|
||||
load_from_disk = bool(input_tf_network)
|
||||
save_to_disk = bool(output_tf_network) and not load_from_disk
|
||||
|
||||
# Warn if both input and output are provided
|
||||
if input_tf_network and output_tf_network:
|
||||
logger.warning(
|
||||
"Both CollecTRI input and output paths are provided. "
|
||||
"Using input file; output will not be saved."
|
||||
)
|
||||
|
||||
# Use input file path if provided, otherwise use output file path
|
||||
tf_network_file_path = input_tf_network if load_from_disk else output_tf_network
|
||||
|
||||
collectri_gp_dict = extract_gp_dict_from_collectri_tf_network(
|
||||
species=par["species"],
|
||||
load_from_disk=load_from_disk,
|
||||
save_to_disk=save_to_disk,
|
||||
tf_network_file_path=tf_network_file_path,
|
||||
plot_gp_gene_count_distributions=plot_gp_gene_count_distributions,
|
||||
gp_gene_count_distributions_save_path=output_count_distr,
|
||||
)
|
||||
return collectri_gp_dict
|
||||
|
||||
|
||||
def main():
|
||||
# Validate that inputs are provided correctly
|
||||
if not any(
|
||||
[
|
||||
par["create_omnipath_gene_program_mask"],
|
||||
par["create_nichenet_gene_program_mask"],
|
||||
par["create_mebocost_gene_program_mask"],
|
||||
par["create_collectri_tf_gene_program_mask"],
|
||||
]
|
||||
):
|
||||
raise ValueError("At least one gene program mask must be set to True")
|
||||
if (
|
||||
par["create_omnipath_gene_program_mask"]
|
||||
and par["species"] == "mouse"
|
||||
and not par["input_gene_orthologs_mapping_file"]
|
||||
):
|
||||
raise ValueError(
|
||||
"Mouse species requires --input_gene_orthologs_mapping_file "
|
||||
"to generate the Omnipath mask."
|
||||
)
|
||||
if (
|
||||
par["create_nichenet_gene_program_mask"]
|
||||
and par["species"] == "mouse"
|
||||
and not par["input_gene_orthologs_mapping_file"]
|
||||
):
|
||||
raise ValueError(
|
||||
"Mouse species requires --input_gene_orthologs_mapping_file "
|
||||
"to generate the NicheNet mask."
|
||||
)
|
||||
if par["create_mebocost_gene_program_mask"] and (
|
||||
(not par["input_metabolite_enzymes"]) or (not par["input_metabolite_sensors"])
|
||||
):
|
||||
raise ValueError(
|
||||
"MeBocost mask requires --input_metabolite_enzymes "
|
||||
"and --input_metabolite_sensors."
|
||||
)
|
||||
|
||||
# Assemble gene program dictionaries
|
||||
gp_dicts = []
|
||||
|
||||
masks = {
|
||||
"create_omnipath_gene_program_mask": (
|
||||
"Omnipath",
|
||||
create_omnipath_gene_program_mask,
|
||||
),
|
||||
"create_nichenet_gene_program_mask": (
|
||||
"NicheNet",
|
||||
create_nichenet_gene_program_mask,
|
||||
),
|
||||
"create_mebocost_gene_program_mask": (
|
||||
"MeBocost",
|
||||
create_mebocost_gene_program_mask,
|
||||
),
|
||||
"create_collectri_tf_gene_program_mask": (
|
||||
"CollecTRI TF",
|
||||
create_collectri_tf_gene_program_mask,
|
||||
),
|
||||
}
|
||||
|
||||
mask_args = {
|
||||
"create_omnipath_gene_program_mask": (
|
||||
par["output_omnipath_lr_network"],
|
||||
par["output_omnipath_gp_gene_count_distributions"],
|
||||
par["input_omnipath_lr_network"],
|
||||
par["input_gene_orthologs_mapping_file"],
|
||||
),
|
||||
"create_nichenet_gene_program_mask": (
|
||||
par["output_nichenet_lrt_network"],
|
||||
par["output_nichenet_ligand_target_matrix"],
|
||||
par["output_nichenet_gp_gene_count_distributions"],
|
||||
par["input_nichenet_lrt_network"],
|
||||
par["input_nichenet_ligand_target_matrix"],
|
||||
par["input_gene_orthologs_mapping_file"],
|
||||
),
|
||||
"create_mebocost_gene_program_mask": (
|
||||
par["output_mebocost_gp_gene_count_distributions"],
|
||||
par["input_metabolite_enzymes"],
|
||||
par["input_metabolite_sensors"],
|
||||
),
|
||||
"create_collectri_tf_gene_program_mask": (
|
||||
par["output_collectri_tf_gp_gene_count_distributions"],
|
||||
par["output_collectri_tf_network"],
|
||||
par["input_collectri_tf_network"],
|
||||
),
|
||||
}
|
||||
|
||||
for mask, (mask_name, mask_function) in masks.items():
|
||||
if par[mask]:
|
||||
logger.info(f"Generating {mask_name} gene program mask...")
|
||||
gp_dict = mask_function(*mask_args[mask])
|
||||
gp_dicts.append(gp_dict)
|
||||
|
||||
# Filter and combine GPs
|
||||
assert len(gp_dicts) > 0, "No gene program dictionaries were created."
|
||||
|
||||
combined_gp_dict = filter_and_combine_gp_dict_gps_v2(
|
||||
gp_dicts,
|
||||
overlap_thresh_target_genes=par["overlap_thresh_target_genes"],
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
logger.info("Gene program mask generation completed.")
|
||||
logger.info(
|
||||
"Number of gene programs after filtering and combining: %s.",
|
||||
len(combined_gp_dict),
|
||||
)
|
||||
|
||||
output_path = Path(par["output"])
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info("Saving combined gene program mask to: %s", str(output_path))
|
||||
with output_path.open("w", encoding="utf-8") as f:
|
||||
json.dump(combined_gp_dict, f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
333
src/nichecompass/gene_program_mask/test.py
Normal file
333
src/nichecompass/gene_program_mask/test.py
Normal file
@@ -0,0 +1,333 @@
|
||||
import pytest
|
||||
import json
|
||||
import subprocess
|
||||
import re
|
||||
|
||||
## VIASH START
|
||||
meta = {
|
||||
"executable": "./target/executable/nichecompass/gene_program_mask/gene_program_mask",
|
||||
"resources_dir": "./resources_test/niche/",
|
||||
}
|
||||
## VIASH END
|
||||
|
||||
import sys
|
||||
|
||||
sys.path.append(meta["resources_dir"])
|
||||
|
||||
ortholog_file = f"{meta['resources_dir']}/niche/human_mouse_gene_orthologs.csv"
|
||||
enzymes_file = f"{meta['resources_dir']}/niche/mouse_metabolite_enzymes.tsv"
|
||||
sensors_file = f"{meta['resources_dir']}/niche/mouse_metabolite_sensors.tsv"
|
||||
omnipath_lr_network_file = f"{meta['resources_dir']}/niche/omnipath_lr_network.csv"
|
||||
nichenet_lr_network_file = f"{meta['resources_dir']}/niche/nichenet_lr_network.csv"
|
||||
nichenet_matrix_file = (
|
||||
f"{meta['resources_dir']}/niche/nichenet_ligand_target_matrix_v2_mouse.csv"
|
||||
)
|
||||
collectri_tf_network_file = f"{meta['resources_dir']}/niche/collectri_tf_network.csv"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nichenet_io(tmp_path):
|
||||
nichenet_distr = tmp_path / "nichenet_distr.svg"
|
||||
return {
|
||||
"--create_nichenet_gene_program_mask": "True",
|
||||
"--create_omnipath_gene_program_mask": "False",
|
||||
"--create_mebocost_gene_program_mask": "False",
|
||||
"--create_collectri_tf_gene_program_mask": "False",
|
||||
"--input_gene_orthologs_mapping_file": ortholog_file,
|
||||
"--input_nichenet_lrt_network": nichenet_lr_network_file,
|
||||
"--input_nichenet_ligand_target_matrix": nichenet_matrix_file,
|
||||
"--output_nichenet_gp_gene_count_distributions": nichenet_distr,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def omnipath_io(tmp_path):
|
||||
omnipath_distr = tmp_path / "omnipath_distr.svg"
|
||||
return {
|
||||
"--create_nichenet_gene_program_mask": "False",
|
||||
"--create_omnipath_gene_program_mask": "True",
|
||||
"--create_mebocost_gene_program_mask": "False",
|
||||
"--create_collectri_tf_gene_program_mask": "False",
|
||||
"--input_gene_orthologs_mapping_file": ortholog_file,
|
||||
"--input_omnipath_lr_network": omnipath_lr_network_file,
|
||||
"--output_omnipath_gp_gene_count_distributions": omnipath_distr,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mebocost_io(tmp_path):
|
||||
mebocost_distr = tmp_path / "mebocost_distr.svg"
|
||||
return {
|
||||
"--create_nichenet_gene_program_mask": "False",
|
||||
"--create_omnipath_gene_program_mask": "False",
|
||||
"--create_mebocost_gene_program_mask": "True",
|
||||
"--create_collectri_tf_gene_program_mask": "False",
|
||||
"--input_metabolite_enzymes": enzymes_file,
|
||||
"--input_metabolite_sensors": sensors_file,
|
||||
"--output_mebocost_gp_gene_count_distributions": mebocost_distr,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def collectri_io(tmp_path):
|
||||
collectri_distr = tmp_path / "collectri_distr.svg"
|
||||
return {
|
||||
"--create_nichenet_gene_program_mask": "False",
|
||||
"--create_omnipath_gene_program_mask": "False",
|
||||
"--create_mebocost_gene_program_mask": "False",
|
||||
"--create_collectri_tf_gene_program_mask": "True",
|
||||
"--input_gene_orthologs_mapping_file": ortholog_file,
|
||||
"--input_collectri_tf_network": collectri_tf_network_file,
|
||||
"--input_collectri_ligand_target_matrix": collectri_tf_network_file,
|
||||
"--output_collectri_tf_gp_gene_count_distributions": collectri_distr,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def gene_program_io(nichenet_io, omnipath_io, mebocost_io, collectri_io):
|
||||
return {
|
||||
"nichenet": nichenet_io,
|
||||
"omnipath": omnipath_io,
|
||||
"mebocost": mebocost_io,
|
||||
"collectri": collectri_io,
|
||||
}
|
||||
|
||||
|
||||
def test_api_execution(run_component, tmp_path):
|
||||
output = tmp_path / "output.json"
|
||||
|
||||
args = [
|
||||
"--input_gene_orthologs_mapping_file",
|
||||
ortholog_file,
|
||||
"--input_metabolite_enzymes",
|
||||
enzymes_file,
|
||||
"--input_metabolite_sensors",
|
||||
sensors_file,
|
||||
"--create_omnipath_gene_program_mask",
|
||||
"True",
|
||||
"--create_nichenet_gene_program_mask",
|
||||
"True",
|
||||
"--create_mebocost_gene_program_mask",
|
||||
"True",
|
||||
"--create_collectri_tf_gene_program_mask",
|
||||
"True",
|
||||
"--species",
|
||||
"mouse",
|
||||
"--output",
|
||||
output,
|
||||
]
|
||||
|
||||
run_component(args)
|
||||
|
||||
# check files
|
||||
assert output.is_file(), "Output file does not exist"
|
||||
|
||||
# Read gene program mask
|
||||
with open(output, "r") as f:
|
||||
gp_mask = json.load(f)
|
||||
|
||||
expected_gp_keys = [
|
||||
"ligand_receptor_GP", # omnipath
|
||||
"ligand_receptor_target_gene_GP", # nichenet
|
||||
"metabolite_enzyme_sensor_GP", # mebocost
|
||||
"TF_target_genes_GP", # collectri
|
||||
"combined_GP",
|
||||
]
|
||||
matching_gp = []
|
||||
for key in expected_gp_keys:
|
||||
assert any(key in gp for gp in gp_mask.keys()), (
|
||||
f"No gene programs containing '{key}' found"
|
||||
)
|
||||
|
||||
gp = next(gp for gp in gp_mask.keys() if key in gp)
|
||||
matching_gp.append(gp)
|
||||
|
||||
for gp in matching_gp:
|
||||
expected_keys = [
|
||||
"sources",
|
||||
"targets",
|
||||
"sources_categories",
|
||||
"targets_categories",
|
||||
]
|
||||
assert all([key in gp_mask[gp] for key in expected_keys]), (
|
||||
f"Gene program {gp} is missing expected keys"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mask_type,expected_gp_keys",
|
||||
[
|
||||
("nichenet", ["ligand_receptor_target_gene_GP"]),
|
||||
("omnipath", ["ligand_receptor_GP", "combined_GP"]),
|
||||
("mebocost", ["metabolite_enzyme_sensor_GP", "combined_GP"]),
|
||||
("collectri", ["TF_target_genes_GP"]),
|
||||
],
|
||||
)
|
||||
def test_io_gene_program_mask(
|
||||
run_component, tmp_path, mask_type, expected_gp_keys, gene_program_io
|
||||
):
|
||||
output = tmp_path / "output.json"
|
||||
io_args = gene_program_io[mask_type]
|
||||
|
||||
args = [
|
||||
"--species",
|
||||
"mouse",
|
||||
"--output",
|
||||
output,
|
||||
]
|
||||
|
||||
for flag, value in io_args.items():
|
||||
args.extend([flag, value])
|
||||
|
||||
run_component(args)
|
||||
|
||||
expected_outputs = [output]
|
||||
for flag, value in io_args.items():
|
||||
if flag.startswith("--output"):
|
||||
expected_outputs.append(value)
|
||||
|
||||
for output_file in expected_outputs:
|
||||
assert output_file.is_file(), (
|
||||
f"Expected output file {output_file} does not exist"
|
||||
)
|
||||
|
||||
# Read gene program mask
|
||||
with open(output, "r") as f:
|
||||
gp_mask = json.load(f)
|
||||
|
||||
matching_gp = []
|
||||
for key in expected_gp_keys:
|
||||
assert any(key in gp for gp in gp_mask.keys()), (
|
||||
f"No gene programs containing '{key}' found"
|
||||
)
|
||||
|
||||
gp = next(gp for gp in gp_mask.keys() if key in gp)
|
||||
matching_gp.append(gp)
|
||||
|
||||
for gp in matching_gp:
|
||||
expected_keys = [
|
||||
"sources",
|
||||
"targets",
|
||||
"sources_categories",
|
||||
"targets_categories",
|
||||
]
|
||||
assert all([key in gp_mask[gp] for key in expected_keys]), (
|
||||
f"Gene program {gp} is missing expected keys"
|
||||
)
|
||||
|
||||
|
||||
def test_fail_no_masks(run_component, tmp_path):
|
||||
output = tmp_path / "output.json"
|
||||
# fails because input data are not correctly lognormalized
|
||||
with pytest.raises(subprocess.CalledProcessError) as err:
|
||||
run_component(
|
||||
[
|
||||
"--input_gene_orthologs_mapping_file",
|
||||
ortholog_file,
|
||||
"--input_metabolite_enzymes",
|
||||
enzymes_file,
|
||||
"--input_metabolite_sensors",
|
||||
sensors_file,
|
||||
"--create_omnipath_gene_program_mask",
|
||||
"False",
|
||||
"--create_nichenet_gene_program_mask",
|
||||
"False",
|
||||
"--create_mebocost_gene_program_mask",
|
||||
"False",
|
||||
"--create_collectri_tf_gene_program_mask",
|
||||
"False",
|
||||
"--species",
|
||||
"mouse",
|
||||
"--output",
|
||||
output,
|
||||
]
|
||||
)
|
||||
assert re.search(
|
||||
r"At least one gene program mask must be set to True",
|
||||
err.value.stdout.decode("utf-8"),
|
||||
)
|
||||
|
||||
|
||||
def test_fail_missing_omnipath_orthologs(run_component, tmp_path):
|
||||
output = tmp_path / "output.json"
|
||||
|
||||
# fails because omnipath mask creation requires gene ortholog mapping
|
||||
with pytest.raises(subprocess.CalledProcessError) as err:
|
||||
run_component(
|
||||
[
|
||||
"--create_omnipath_gene_program_mask",
|
||||
"True",
|
||||
"--create_nichenet_gene_program_mask",
|
||||
"False",
|
||||
"--create_mebocost_gene_program_mask",
|
||||
"False",
|
||||
"--create_collectri_tf_gene_program_mask",
|
||||
"False",
|
||||
"--species",
|
||||
"mouse",
|
||||
"--output",
|
||||
output,
|
||||
]
|
||||
)
|
||||
assert re.search(
|
||||
r"Mouse species requires --input_gene_orthologs_mapping_file to generate the Omnipath mask.",
|
||||
err.value.stdout.decode("utf-8"),
|
||||
)
|
||||
|
||||
|
||||
def test_fail_missing_nichenet_orthologs(run_component, tmp_path):
|
||||
output = tmp_path / "output.json"
|
||||
|
||||
# fails because nichenet mask creation requires gene ortholog mapping
|
||||
with pytest.raises(subprocess.CalledProcessError) as err:
|
||||
run_component(
|
||||
[
|
||||
"--create_omnipath_gene_program_mask",
|
||||
"False",
|
||||
"--create_nichenet_gene_program_mask",
|
||||
"True",
|
||||
"--create_mebocost_gene_program_mask",
|
||||
"False",
|
||||
"--create_collectri_tf_gene_program_mask",
|
||||
"False",
|
||||
"--species",
|
||||
"mouse",
|
||||
"--output",
|
||||
output,
|
||||
]
|
||||
)
|
||||
assert re.search(
|
||||
r"Mouse species requires --input_gene_orthologs_mapping_file to generate the NicheNet mask.",
|
||||
err.value.stdout.decode("utf-8"),
|
||||
)
|
||||
|
||||
|
||||
def test_fail_missing_mebocost_metabolites(run_component, tmp_path):
|
||||
output = tmp_path / "output.json"
|
||||
|
||||
# fails because mebocost mask creation requires metabolite files
|
||||
with pytest.raises(subprocess.CalledProcessError) as err:
|
||||
run_component(
|
||||
[
|
||||
"--create_omnipath_gene_program_mask",
|
||||
"False",
|
||||
"--create_nichenet_gene_program_mask",
|
||||
"False",
|
||||
"--create_mebocost_gene_program_mask",
|
||||
"True",
|
||||
"--create_collectri_tf_gene_program_mask",
|
||||
"False",
|
||||
"--species",
|
||||
"mouse",
|
||||
"--output",
|
||||
output,
|
||||
]
|
||||
)
|
||||
assert re.search(
|
||||
r"MeBocost mask requires --input_metabolite_enzymes and --input_metabolite_sensors.",
|
||||
err.value.stdout.decode("utf-8"),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main([__file__]))
|
||||
@@ -144,6 +144,15 @@ argument_groups:
|
||||
type: integer
|
||||
description: Minimum of non-zero values per protein.
|
||||
|
||||
- name: "Cross-modality filtering"
|
||||
arguments:
|
||||
- name: "--intersect_obs"
|
||||
type: boolean_true
|
||||
description: |
|
||||
After per-modality filtering and multisample processing, remove observations
|
||||
that are not present in all processed modalities so that each modality shares
|
||||
the same set of cells.
|
||||
|
||||
- name: "Highly variable features detection"
|
||||
arguments:
|
||||
- name: "--highly_variable_features_var_output"
|
||||
|
||||
@@ -34,6 +34,7 @@ workflow run_wf {
|
||||
"prot_min_proteins_per_cell": state.prot_min_proteins_per_cell,
|
||||
"prot_max_proteins_per_cell": state.prot_max_proteins_per_cell,
|
||||
"prot_min_cells_per_protein": state.prot_min_cells_per_protein,
|
||||
"intersect_obs": state.intersect_obs,
|
||||
"highly_variable_features_var_output": state.highly_variable_features_var_output,
|
||||
"highly_variable_features_obs_batch_key": state.highly_variable_features_obs_batch_key,
|
||||
"var_gene_names": state.var_gene_names,
|
||||
|
||||
@@ -112,7 +112,7 @@ repositories:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
tag: "v4.1.0"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
docker_registry: "ghcr.io"
|
||||
@@ -201,7 +201,7 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "scanpy~=1.10.4"
|
||||
- "scanpy~=1.11.4"
|
||||
- "squidpy~=1.8.1"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
@@ -212,7 +212,7 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.9.0"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
@@ -226,8 +226,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/_private/executable/filter/subset_cosmx"
|
||||
executable: "target/_private/executable/filter/subset_cosmx/subset_cosmx"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
package_config:
|
||||
name: "openpipeline_spatial"
|
||||
@@ -241,8 +241,8 @@ package_config:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
viash_version: "0.9.4"
|
||||
tag: "v4.1.0"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# subset_cosmx niche-compass
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -454,13 +454,13 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --upgrade --no-cache-dir "scanpy~=1.10.4" "squidpy~=1.8.1"
|
||||
pip install --upgrade --no-cache-dir "scanpy~=1.11.4" "squidpy~=1.8.1"
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component filter subset_cosmx"
|
||||
LABEL org.opencontainers.image.created="2026-03-25T10:11:21Z"
|
||||
LABEL org.opencontainers.image.created="2026-06-08T11:59:18Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
LABEL org.opencontainers.image.revision="87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
LABEL org.opencontainers.image.revision="3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
LABEL org.opencontainers.image.version="niche-compass"
|
||||
|
||||
VIASHDOCKER
|
||||
|
||||
@@ -112,7 +112,7 @@ repositories:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
tag: "v4.1.0"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
docker_registry: "ghcr.io"
|
||||
@@ -201,7 +201,7 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "scanpy~=1.10.4"
|
||||
- "scanpy~=1.11.4"
|
||||
- "squidpy~=1.8.1"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
@@ -212,7 +212,7 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.9.0"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
@@ -226,8 +226,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/_private/nextflow/filter/subset_cosmx"
|
||||
executable: "target/_private/nextflow/filter/subset_cosmx/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
package_config:
|
||||
name: "openpipeline_spatial"
|
||||
@@ -241,8 +241,8 @@ package_config:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
viash_version: "0.9.4"
|
||||
tag: "v4.1.0"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// subset_cosmx niche-compass
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1354,47 +1354,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1710,10 +1705,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3187,7 +3197,7 @@ meta = [
|
||||
"type" : "vsh",
|
||||
"name" : "openpipeline",
|
||||
"repo" : "openpipeline",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
}
|
||||
],
|
||||
"links" : {
|
||||
@@ -3295,7 +3305,7 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"scanpy~=1.10.4",
|
||||
"scanpy~=1.11.4",
|
||||
"squidpy~=1.8.1"
|
||||
],
|
||||
"upgrade" : true
|
||||
@@ -3313,7 +3323,7 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"viashpy==0.9.0"
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
@@ -3332,8 +3342,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/_private/nextflow/filter/subset_cosmx",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "87e62605aafd706d539bf2978ef47ede6fe41926",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "3edcea085b905ea86a471276eda3c691aad66c30",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3353,10 +3363,10 @@ meta = [
|
||||
"type" : "vsh",
|
||||
"name" : "openpipeline",
|
||||
"repo" : "openpipeline",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
}
|
||||
],
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -48,7 +48,7 @@ repositories:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
tag: "v4.1.0"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
docker_registry: "ghcr.io"
|
||||
@@ -135,10 +135,11 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "viashpy==0.9.0"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
script:
|
||||
@@ -156,8 +157,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/_test/executable/test_workflows/ingestion/spaceranger_mapping_test"
|
||||
executable: "target/_test/executable/test_workflows/ingestion/spaceranger_mapping_test/spaceranger_mapping_test"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
package_config:
|
||||
name: "openpipeline_spatial"
|
||||
@@ -171,8 +172,8 @@ package_config:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
viash_version: "0.9.4"
|
||||
tag: "v4.1.0"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# spaceranger_mapping_test niche-compass
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -453,15 +453,15 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --upgrade --no-cache-dir "anndata~=0.12.7" "awkward" "mudata~=0.3.2" "viashpy==0.9.0" && \
|
||||
pip install --upgrade --no-cache-dir "anndata~=0.12.16" "awkward" "scipy~=1.17.1" "mudata~=0.3.8" "viashpy==0.10.0" && \
|
||||
pip install --upgrade --no-cache-dir "git+https://github.com/openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" && \
|
||||
python -c 'exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")'
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dorien Roosen"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component test_workflows/ingestion spaceranger_mapping_test"
|
||||
LABEL org.opencontainers.image.created="2026-03-25T10:11:23Z"
|
||||
LABEL org.opencontainers.image.created="2026-06-08T11:59:17Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
LABEL org.opencontainers.image.revision="87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
LABEL org.opencontainers.image.revision="3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
LABEL org.opencontainers.image.version="niche-compass"
|
||||
|
||||
VIASHDOCKER
|
||||
|
||||
@@ -47,7 +47,7 @@ repositories:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
tag: "v4.1.0"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
docker_registry: "ghcr.io"
|
||||
@@ -134,10 +134,11 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "viashpy==0.9.0"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
script:
|
||||
@@ -155,8 +156,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/_test/executable/test_workflows/niche/nichecompass_leiden_test"
|
||||
executable: "target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nichecompass_leiden_test"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
package_config:
|
||||
name: "openpipeline_spatial"
|
||||
@@ -170,8 +171,8 @@ package_config:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
viash_version: "0.9.4"
|
||||
tag: "v4.1.0"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# nichecompass_leiden_test niche-compass
|
||||
#
|
||||
# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
# This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
# Intuitive.
|
||||
#
|
||||
@@ -453,15 +453,15 @@ RUN apt-get update && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --upgrade --no-cache-dir "anndata~=0.12.7" "awkward" "mudata~=0.3.2" "viashpy==0.9.0" && \
|
||||
pip install --upgrade --no-cache-dir "anndata~=0.12.16" "awkward" "scipy~=1.17.1" "mudata~=0.3.8" "viashpy==0.10.0" && \
|
||||
pip install --upgrade --no-cache-dir "git+https://github.com/openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" && \
|
||||
python -c 'exec("try:\n import zarr; from importlib.metadata import version\nexcept ModuleNotFoundError:\n exit(0)\nelse: assert int(version(\"zarr\").partition(\".\")[0]) > 2")'
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dorien Roosen"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component test_workflows/niche nichecompass_leiden_test"
|
||||
LABEL org.opencontainers.image.created="2026-03-25T10:11:23Z"
|
||||
LABEL org.opencontainers.image.created="2026-06-08T11:59:17Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
LABEL org.opencontainers.image.revision="87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
LABEL org.opencontainers.image.revision="3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
LABEL org.opencontainers.image.version="niche-compass"
|
||||
|
||||
VIASHDOCKER
|
||||
|
||||
@@ -48,7 +48,7 @@ repositories:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
tag: "v4.1.0"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
docker_registry: "ghcr.io"
|
||||
@@ -135,10 +135,11 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "viashpy==0.9.0"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
script:
|
||||
@@ -156,8 +157,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test"
|
||||
executable: "target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
package_config:
|
||||
name: "openpipeline_spatial"
|
||||
@@ -171,8 +172,8 @@ package_config:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
viash_version: "0.9.4"
|
||||
tag: "v4.1.0"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// spaceranger_mapping_test niche-compass
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3104,7 +3114,7 @@ meta = [
|
||||
"type" : "vsh",
|
||||
"name" : "openpipeline",
|
||||
"repo" : "openpipeline",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
}
|
||||
],
|
||||
"links" : {
|
||||
@@ -3209,10 +3219,11 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"anndata~=0.12.7",
|
||||
"anndata~=0.12.16",
|
||||
"awkward",
|
||||
"mudata~=0.3.2",
|
||||
"viashpy==0.9.0"
|
||||
"scipy~=1.17.1",
|
||||
"mudata~=0.3.8",
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
@@ -3234,8 +3245,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/_test/nextflow/test_workflows/ingestion/spaceranger_mapping_test",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "87e62605aafd706d539bf2978ef47ede6fe41926",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "3edcea085b905ea86a471276eda3c691aad66c30",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3255,10 +3266,10 @@ meta = [
|
||||
"type" : "vsh",
|
||||
"name" : "openpipeline",
|
||||
"repo" : "openpipeline",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
}
|
||||
],
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -47,7 +47,7 @@ repositories:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
tag: "v4.1.0"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
docker_registry: "ghcr.io"
|
||||
@@ -134,10 +134,11 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "viashpy==0.9.0"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
script:
|
||||
@@ -155,8 +156,8 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test"
|
||||
executable: "target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "87e62605aafd706d539bf2978ef47ede6fe41926"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "3edcea085b905ea86a471276eda3c691aad66c30"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
package_config:
|
||||
name: "openpipeline_spatial"
|
||||
@@ -170,8 +171,8 @@ package_config:
|
||||
- type: "vsh"
|
||||
name: "openpipeline"
|
||||
repo: "openpipeline"
|
||||
tag: "v4.0.3"
|
||||
viash_version: "0.9.4"
|
||||
tag: "v4.1.0"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// nichecompass_leiden_test niche-compass
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3104,7 +3114,7 @@ meta = [
|
||||
"type" : "vsh",
|
||||
"name" : "openpipeline",
|
||||
"repo" : "openpipeline",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
}
|
||||
],
|
||||
"links" : {
|
||||
@@ -3209,10 +3219,11 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"anndata~=0.12.7",
|
||||
"anndata~=0.12.16",
|
||||
"awkward",
|
||||
"mudata~=0.3.2",
|
||||
"viashpy==0.9.0"
|
||||
"scipy~=1.17.1",
|
||||
"mudata~=0.3.8",
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
@@ -3234,8 +3245,8 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "87e62605aafd706d539bf2978ef47ede6fe41926",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "3edcea085b905ea86a471276eda3c691aad66c30",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
|
||||
},
|
||||
"package_config" : {
|
||||
@@ -3255,10 +3266,10 @@ meta = [
|
||||
"type" : "vsh",
|
||||
"name" : "openpipeline",
|
||||
"repo" : "openpipeline",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
}
|
||||
],
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
def subset_vars(adata, subset_col):
|
||||
"""Subset AnnData object on highly variable genes
|
||||
|
||||
Parameters
|
||||
----------
|
||||
adata : AnnData
|
||||
Annotated data object
|
||||
subset_col : str
|
||||
Name of the boolean column in `adata.var` that contains the information if features should be used or not
|
||||
|
||||
Returns
|
||||
-------
|
||||
AnnData
|
||||
Copy of `adata` with subsetted features
|
||||
"""
|
||||
if subset_col not in adata.var.columns:
|
||||
raise ValueError(
|
||||
f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
|
||||
)
|
||||
|
||||
if adata.var[subset_col].dtype == "boolean":
|
||||
assert adata.var[subset_col].isna().sum() == 0, (
|
||||
f"The .var column `{subset_col}` contains NaN values. Can not subset data."
|
||||
)
|
||||
adata.var[subset_col] = adata.var[subset_col].astype("bool")
|
||||
|
||||
assert adata.var[subset_col].dtype == "bool", (
|
||||
f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
|
||||
)
|
||||
|
||||
return adata[:, adata.var[subset_col]].copy()
|
||||
@@ -0,0 +1,567 @@
|
||||
name: "process_singlesample_base"
|
||||
namespace: "workflows/multiomics"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dorien Roosen"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "dorien@data-intuitive.com"
|
||||
github: "dorien-er"
|
||||
linkedin: "dorien-roosen"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
argument_groups:
|
||||
- name: "Output State"
|
||||
arguments:
|
||||
- type: "string"
|
||||
name: "--output_modality"
|
||||
info: null
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Inputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "Path to the sample."
|
||||
info: null
|
||||
example:
|
||||
- "input.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--rna_layer"
|
||||
description: "Input layer for the gene expression modality. If not specified,\
|
||||
\ .X is used."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--prot_layer"
|
||||
description: "Input layer for the antibody capture modality. If not specified,\
|
||||
\ .X is used."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--gdo_layer"
|
||||
description: "Input layer for the guide-derived oligonucleotide (GDO) data. If\
|
||||
\ not specified, .X is used."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
description: "Destination path to the output."
|
||||
info: null
|
||||
example:
|
||||
- "output.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Sample ID options"
|
||||
description: "Options for adding the id to .obs on the MuData object. Having a sample\
|
||||
\ \nid present in a requirement of several components for this pipeline.\n"
|
||||
arguments:
|
||||
- type: "boolean"
|
||||
name: "--add_id_to_obs"
|
||||
description: "Add the value passed with --id to .obs."
|
||||
info: null
|
||||
default:
|
||||
- true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--add_id_obs_output"
|
||||
description: ".Obs column to add the sample IDs to. Required and only used when\
|
||||
\ \n--add_id_to_obs is set to 'true'\n"
|
||||
info: null
|
||||
default:
|
||||
- "sample_id"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean"
|
||||
name: "--add_id_make_observation_keys_unique"
|
||||
description: "Join the id to the .obs index (.obs_names). \nOnly used when --add_id_to_obs\
|
||||
\ is set to 'true'.\n"
|
||||
info: null
|
||||
default:
|
||||
- true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "RNA filtering options"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--rna_min_counts"
|
||||
description: "Minimum number of counts captured per cell."
|
||||
info: null
|
||||
example:
|
||||
- 200
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--rna_max_counts"
|
||||
description: "Maximum number of counts captured per cell."
|
||||
info: null
|
||||
example:
|
||||
- 5000000
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--rna_min_genes_per_cell"
|
||||
description: "Minimum of non-zero values per cell."
|
||||
info: null
|
||||
example:
|
||||
- 200
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--rna_max_genes_per_cell"
|
||||
description: "Maximum of non-zero values per cell."
|
||||
info: null
|
||||
example:
|
||||
- 1500000
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--rna_min_cells_per_gene"
|
||||
description: "Minimum of non-zero values per gene."
|
||||
info: null
|
||||
example:
|
||||
- 3
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--rna_min_fraction_mito"
|
||||
description: "Minimum fraction of UMIs that are mitochondrial."
|
||||
info: null
|
||||
example:
|
||||
- 0.0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--rna_max_fraction_mito"
|
||||
description: "Maximum fraction of UMIs that are mitochondrial."
|
||||
info: null
|
||||
example:
|
||||
- 0.2
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--rna_min_fraction_ribo"
|
||||
description: "Minimum fraction of UMIs that are mitochondrial."
|
||||
info: null
|
||||
example:
|
||||
- 0.0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--rna_max_fraction_ribo"
|
||||
description: "Maximum fraction of UMIs that are mitochondrial."
|
||||
info: null
|
||||
example:
|
||||
- 0.2
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean_true"
|
||||
name: "--skip_scrublet_doublet_detection"
|
||||
description: "Skip the scrublet doublet detection step."
|
||||
info: null
|
||||
direction: "input"
|
||||
- name: "CITE-seq filtering options"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--prot_min_counts"
|
||||
description: "Minimum number of counts per cell."
|
||||
info: null
|
||||
example:
|
||||
- 3
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--prot_max_counts"
|
||||
description: "Minimum number of counts per cell."
|
||||
info: null
|
||||
example:
|
||||
- 5000000
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--prot_min_proteins_per_cell"
|
||||
description: "Minimum of non-zero values per cell."
|
||||
info: null
|
||||
example:
|
||||
- 200
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--prot_max_proteins_per_cell"
|
||||
description: "Maximum of non-zero values per cell."
|
||||
info: null
|
||||
example:
|
||||
- 100000000
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--prot_min_cells_per_protein"
|
||||
description: "Minimum of non-zero values per protein."
|
||||
info: null
|
||||
example:
|
||||
- 3
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "GDO filtering options"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--gdo_min_counts"
|
||||
description: "Minimum number of counts per cell."
|
||||
info: null
|
||||
example:
|
||||
- 3
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--gdo_max_counts"
|
||||
description: "Minimum number of counts per cell."
|
||||
info: null
|
||||
example:
|
||||
- 5000000
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--gdo_min_guides_per_cell"
|
||||
description: "Minimum of non-zero values per cell."
|
||||
info: null
|
||||
example:
|
||||
- 200
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--gdo_max_guides_per_cell"
|
||||
description: "Maximum of non-zero values per cell."
|
||||
info: null
|
||||
example:
|
||||
- 100000000
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--gdo_min_cells_per_guide"
|
||||
description: "Minimum of non-zero values per guide."
|
||||
info: null
|
||||
example:
|
||||
- 3
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Cross-modality filtering"
|
||||
arguments:
|
||||
- type: "boolean_true"
|
||||
name: "--intersect_obs"
|
||||
description: "After per-modality filtering, remove observations that are not present\n\
|
||||
in all processed modalities so that each modality shares the same set of cells.\n"
|
||||
info: null
|
||||
direction: "input"
|
||||
- name: "Mitochondrial & Ribosomal Gene Detection"
|
||||
arguments:
|
||||
- type: "string"
|
||||
name: "--var_gene_names"
|
||||
description: ".var column name to be used to detect mitochondrial/ribosomal genes\
|
||||
\ instead of .var_names (default if not set).\nGene names matching with the\
|
||||
\ regex value from --mitochondrial_gene_regex or --ribosomal_gene_regex will\
|
||||
\ be \nidentified as mitochondrial or ribosomal genes, respectively. \n"
|
||||
info: null
|
||||
example:
|
||||
- "gene_symbol"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--var_name_mitochondrial_genes"
|
||||
description: "In which .var slot to store a boolean array corresponding the mitochondrial\
|
||||
\ genes.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_name_mitochondrial_fraction"
|
||||
description: "When specified, write the fraction of counts originating from mitochondrial\
|
||||
\ genes \n(based on --mitochondrial_gene_regex) to an .obs column with the specified\
|
||||
\ name.\nRequires --var_name_mitochondrial_genes.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--mitochondrial_gene_regex"
|
||||
description: "Regex string that identifies mitochondrial genes from --var_gene_names.\n\
|
||||
By default will detect human and mouse mitochondrial genes from a gene symbol.\n"
|
||||
info: null
|
||||
default:
|
||||
- "^[mM][tT]-"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--var_name_ribosomal_genes"
|
||||
description: "In which .var slot to store a boolean array corresponding the ribosomal\
|
||||
\ genes.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_name_ribosomal_fraction"
|
||||
description: "When specified, write the fraction of counts originating from ribosomal\
|
||||
\ genes \n(based on --ribosomal_gene_regex) to an .obs column with the specified\
|
||||
\ name.\nRequires --var_name_ribosomal_genes.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--ribosomal_gene_regex"
|
||||
description: "Regex string that identifies ribosomal genes from --var_gene_names.\n\
|
||||
By default will detect human and mouse ribosomal genes from a gene symbol.\n"
|
||||
info: null
|
||||
default:
|
||||
- "^[Mm]?[Rr][Pp][LlSs]"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "nextflow_script"
|
||||
path: "main.nf"
|
||||
is_executable: true
|
||||
entrypoint: "run_wf"
|
||||
- type: "file"
|
||||
path: "utils"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "A pipeline to analyse a single multiomics sample."
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "private"
|
||||
target: "private"
|
||||
dependencies:
|
||||
- name: "metadata/add_id"
|
||||
repository:
|
||||
type: "local"
|
||||
- name: "workflows/multiomics/split_modalities"
|
||||
alias: "split_modalities_workflow"
|
||||
repository:
|
||||
type: "local"
|
||||
- name: "workflows/rna/rna_singlesample"
|
||||
repository:
|
||||
type: "local"
|
||||
- name: "workflows/prot/prot_singlesample"
|
||||
repository:
|
||||
type: "local"
|
||||
- name: "workflows/gdo/gdo_singlesample"
|
||||
repository:
|
||||
type: "local"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/workflows/multiomics/process_singlesample_base/config.vsh.yaml"
|
||||
runner: "nextflow"
|
||||
engine: "native"
|
||||
output: "target/_private/nextflow/workflows/multiomics/process_singlesample_base"
|
||||
executable: "target/_private/nextflow/workflows/multiomics/process_singlesample_base/main.nf"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
dependencies:
|
||||
- "target/nextflow/metadata/add_id"
|
||||
- "target/_private/nextflow/workflows/multiomics/split_modalities"
|
||||
- "target/nextflow/workflows/rna/rna_singlesample"
|
||||
- "target/nextflow/workflows/prot/prot_singlesample"
|
||||
- "target/nextflow/workflows/gdo/gdo_singlesample"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
\nIn terms of workflows, the following has been made available, but keep in mind\
|
||||
\ that\nindividual tools and functionality can be executed as standalone components\
|
||||
\ as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
|
||||
\ * Ingestion: Read mapping and generating a count matrix.\n * Single sample\
|
||||
\ processing: cell filtering and doublet detection.\n * Multisample processing:\
|
||||
\ Count transformation, normalization, QC metric calulations.\n * Integration:\
|
||||
\ Clustering, integration and batch correction using single and multimodal methods.\n\
|
||||
\ * Downstream analysis workflows\n"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
nextflow_labels_ci:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,126 @@
|
||||
manifest {
|
||||
name = 'workflows/multiomics/process_singlesample_base'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.1.0'
|
||||
description = 'A pipeline to analyse a single multiomics sample.'
|
||||
author = 'Dorien Roosen'
|
||||
}
|
||||
|
||||
process.container = 'nextflow/bash:latest'
|
||||
|
||||
// detect tempdir
|
||||
tempDir = java.nio.file.Paths.get(
|
||||
System.getenv('NXF_TEMP') ?:
|
||||
System.getenv('VIASH_TEMP') ?:
|
||||
System.getenv('TEMPDIR') ?:
|
||||
System.getenv('TMPDIR') ?:
|
||||
'/tmp'
|
||||
).toAbsolutePath()
|
||||
|
||||
profiles {
|
||||
no_publish {
|
||||
process {
|
||||
withName: '.*' {
|
||||
publishDir = [
|
||||
enabled: false
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
mount_temp {
|
||||
docker.temp = tempDir
|
||||
podman.temp = tempDir
|
||||
charliecloud.temp = tempDir
|
||||
}
|
||||
docker {
|
||||
docker.enabled = true
|
||||
// docker.userEmulation = true
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
singularity {
|
||||
singularity.enabled = true
|
||||
singularity.autoMounts = true
|
||||
docker.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
podman {
|
||||
podman.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
shifter.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
shifter {
|
||||
shifter.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
charliecloud.enabled = false
|
||||
}
|
||||
charliecloud {
|
||||
charliecloud.enabled = true
|
||||
docker.enabled = false
|
||||
singularity.enabled = false
|
||||
podman.enabled = false
|
||||
shifter.enabled = false
|
||||
}
|
||||
}
|
||||
|
||||
process{
|
||||
withLabel: mem1gb { memory = 1000000000.B }
|
||||
withLabel: mem2gb { memory = 2000000000.B }
|
||||
withLabel: mem5gb { memory = 5000000000.B }
|
||||
withLabel: mem10gb { memory = 10000000000.B }
|
||||
withLabel: mem20gb { memory = 20000000000.B }
|
||||
withLabel: mem50gb { memory = 50000000000.B }
|
||||
withLabel: mem100gb { memory = 100000000000.B }
|
||||
withLabel: mem200gb { memory = 200000000000.B }
|
||||
withLabel: mem500gb { memory = 500000000000.B }
|
||||
withLabel: mem1tb { memory = 1000000000000.B }
|
||||
withLabel: mem2tb { memory = 2000000000000.B }
|
||||
withLabel: mem5tb { memory = 5000000000000.B }
|
||||
withLabel: mem10tb { memory = 10000000000000.B }
|
||||
withLabel: mem20tb { memory = 20000000000000.B }
|
||||
withLabel: mem50tb { memory = 50000000000000.B }
|
||||
withLabel: mem100tb { memory = 100000000000000.B }
|
||||
withLabel: mem200tb { memory = 200000000000000.B }
|
||||
withLabel: mem500tb { memory = 500000000000000.B }
|
||||
withLabel: mem1gib { memory = 1073741824.B }
|
||||
withLabel: mem2gib { memory = 2147483648.B }
|
||||
withLabel: mem4gib { memory = 4294967296.B }
|
||||
withLabel: mem8gib { memory = 8589934592.B }
|
||||
withLabel: mem16gib { memory = 17179869184.B }
|
||||
withLabel: mem32gib { memory = 34359738368.B }
|
||||
withLabel: mem64gib { memory = 68719476736.B }
|
||||
withLabel: mem128gib { memory = 137438953472.B }
|
||||
withLabel: mem256gib { memory = 274877906944.B }
|
||||
withLabel: mem512gib { memory = 549755813888.B }
|
||||
withLabel: mem1tib { memory = 1099511627776.B }
|
||||
withLabel: mem2tib { memory = 2199023255552.B }
|
||||
withLabel: mem4tib { memory = 4398046511104.B }
|
||||
withLabel: mem8tib { memory = 8796093022208.B }
|
||||
withLabel: mem16tib { memory = 17592186044416.B }
|
||||
withLabel: mem32tib { memory = 35184372088832.B }
|
||||
withLabel: mem64tib { memory = 70368744177664.B }
|
||||
withLabel: mem128tib { memory = 140737488355328.B }
|
||||
withLabel: mem256tib { memory = 281474976710656.B }
|
||||
withLabel: mem512tib { memory = 562949953421312.B }
|
||||
withLabel: cpu1 { cpus = 1 }
|
||||
withLabel: cpu2 { cpus = 2 }
|
||||
withLabel: cpu5 { cpus = 5 }
|
||||
withLabel: cpu10 { cpus = 10 }
|
||||
withLabel: cpu20 { cpus = 20 }
|
||||
withLabel: cpu50 { cpus = 50 }
|
||||
withLabel: cpu100 { cpus = 100 }
|
||||
withLabel: cpu200 { cpus = 200 }
|
||||
withLabel: cpu500 { cpus = 500 }
|
||||
withLabel: cpu1000 { cpus = 1000 }
|
||||
}
|
||||
|
||||
includeConfig("nextflow_labels.config")
|
||||
@@ -1,6 +1,6 @@
|
||||
name: "split_modalities"
|
||||
namespace: "workflows/multiomics"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
@@ -182,14 +182,14 @@ build_info:
|
||||
engine: "native"
|
||||
output: "target/_private/nextflow/workflows/multiomics/split_modalities"
|
||||
executable: "target/_private/nextflow/workflows/multiomics/split_modalities/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
dependencies:
|
||||
- "target/nextflow/dataflow/split_modalities"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
@@ -210,7 +210,7 @@ package_config:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
@@ -219,7 +219,7 @@ package_config:
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
@@ -1,6 +1,6 @@
|
||||
// split_modalities v4.0.3
|
||||
// split_modalities v4.1.0
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3035,7 +3045,7 @@ meta = [
|
||||
"config": processConfig(readJsonBlob('''{
|
||||
"name" : "split_modalities",
|
||||
"namespace" : "workflows/multiomics",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries Schaumont",
|
||||
@@ -3273,13 +3283,13 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native",
|
||||
"output" : "/workdir/root/repo/target/_private/nextflow/workflows/multiomics/split_modalities",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "openpipeline",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
|
||||
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
|
||||
"info" : {
|
||||
@@ -3297,14 +3307,14 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
|
||||
".engines += { type: \\"native\\" }",
|
||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
],
|
||||
"keywords" : [
|
||||
"single-cell",
|
||||
@@ -2,7 +2,7 @@ manifest {
|
||||
name = 'workflows/multiomics/split_modalities'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.0.3'
|
||||
version = 'v4.1.0'
|
||||
description = 'A pipeline to split a multimodal mudata files into several unimodal mudata files.'
|
||||
author = 'Dries Schaumont'
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
name: "log_normalize"
|
||||
namespace: "workflows/rna"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
@@ -197,8 +197,8 @@ build_info:
|
||||
engine: "native"
|
||||
output: "target/_private/nextflow/workflows/rna/log_normalize"
|
||||
executable: "target/_private/nextflow/workflows/rna/log_normalize/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
dependencies:
|
||||
- "target/nextflow/transform/normalize_total"
|
||||
@@ -206,7 +206,7 @@ build_info:
|
||||
- "target/nextflow/transform/delete_layer"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
@@ -227,7 +227,7 @@ package_config:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
@@ -236,7 +236,7 @@ package_config:
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
@@ -1,6 +1,6 @@
|
||||
// log_normalize v4.0.3
|
||||
// log_normalize v4.1.0
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3035,7 +3045,7 @@ meta = [
|
||||
"config": processConfig(readJsonBlob('''{
|
||||
"name" : "log_normalize",
|
||||
"namespace" : "workflows/rna",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries Schaumont",
|
||||
@@ -3293,13 +3303,13 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "native",
|
||||
"output" : "/workdir/root/repo/target/_private/nextflow/workflows/rna/log_normalize",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "openpipeline",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
|
||||
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
|
||||
"info" : {
|
||||
@@ -3317,14 +3327,14 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
|
||||
".engines += { type: \\"native\\" }",
|
||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
],
|
||||
"keywords" : [
|
||||
"single-cell",
|
||||
@@ -2,7 +2,7 @@ manifest {
|
||||
name = 'workflows/rna/log_normalize'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.0.3'
|
||||
version = 'v4.1.0'
|
||||
description = 'Performs normalization and subsequent log-transformation of raw count data.'
|
||||
author = 'Dries Schaumont'
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
name: "leiden"
|
||||
namespace: "cluster"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dries De Maeyer"
|
||||
roles:
|
||||
@@ -77,6 +77,41 @@ argument_groups:
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--flavor"
|
||||
description: "Which package's implementation to use.\n"
|
||||
info: null
|
||||
default:
|
||||
- "leidenalg"
|
||||
required: false
|
||||
choices:
|
||||
- "leidenalg"
|
||||
- "igraph"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--n_iterations"
|
||||
description: "How many iterations of the Leiden clustering algorithm to perform.\n\
|
||||
When defined, positive values above 2 define the total number of iterations\
|
||||
\ to perform.\nWhen not set, the algorithm will run until it reaches its optimal\
|
||||
\ clustering.\n"
|
||||
info: null
|
||||
required: false
|
||||
min: 1
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--seed"
|
||||
description: "Fix the initialization of the optimization. Can be used to increase\
|
||||
\ reproducibility.\n"
|
||||
info: null
|
||||
required: false
|
||||
min: 0
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--resolution"
|
||||
description: "A parameter value controlling the coarseness of the clustering.\
|
||||
@@ -91,8 +126,8 @@ argument_groups:
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata H5\
|
||||
\ files.\nBy default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
@@ -215,7 +250,7 @@ engines:
|
||||
id: "docker"
|
||||
image: "python:3.13-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v4.0.3"
|
||||
target_tag: "v4.1.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
@@ -225,11 +260,12 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
- "scanpy~=1.11.4"
|
||||
- "leidenalg~=0.10.0"
|
||||
- "leidenalg~=0.11.0"
|
||||
script:
|
||||
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
|
||||
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
|
||||
@@ -243,7 +279,7 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
@@ -257,12 +293,12 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/cluster/leiden"
|
||||
executable: "target/nextflow/cluster/leiden/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
@@ -283,7 +319,7 @@ package_config:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
@@ -292,7 +328,7 @@ package_config:
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
@@ -1,6 +1,6 @@
|
||||
// leiden v4.0.3
|
||||
// leiden v4.1.0
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3035,7 +3045,7 @@ meta = [
|
||||
"config": processConfig(readJsonBlob('''{
|
||||
"name" : "leiden",
|
||||
"namespace" : "cluster",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries De Maeyer",
|
||||
@@ -3133,6 +3143,42 @@ meta = [
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--flavor",
|
||||
"description" : "Which package's implementation to use.\n",
|
||||
"default" : [
|
||||
"leidenalg"
|
||||
],
|
||||
"required" : false,
|
||||
"choices" : [
|
||||
"leidenalg",
|
||||
"igraph"
|
||||
],
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "integer",
|
||||
"name" : "--n_iterations",
|
||||
"description" : "How many iterations of the Leiden clustering algorithm to perform.\nWhen defined, positive values above 2 define the total number of iterations to perform.\nWhen not set, the algorithm will run until it reaches its optimal clustering.\n",
|
||||
"required" : false,
|
||||
"min" : 1,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "integer",
|
||||
"name" : "--seed",
|
||||
"description" : "Fix the initialization of the optimization. Can be used to increase reproducibility.\n",
|
||||
"required" : false,
|
||||
"min" : 0,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "double",
|
||||
"name" : "--resolution",
|
||||
@@ -3148,7 +3194,7 @@ meta = [
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--output_compression",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"example" : [
|
||||
"gzip"
|
||||
],
|
||||
@@ -3294,7 +3340,7 @@ meta = [
|
||||
"id" : "docker",
|
||||
"image" : "python:3.13-slim",
|
||||
"target_registry" : "images.viash-hub.com",
|
||||
"target_tag" : "v4.0.3",
|
||||
"target_tag" : "v4.1.0",
|
||||
"namespace_separator" : "/",
|
||||
"setup" : [
|
||||
{
|
||||
@@ -3308,11 +3354,12 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"anndata~=0.12.7",
|
||||
"anndata~=0.12.16",
|
||||
"awkward",
|
||||
"mudata~=0.3.2",
|
||||
"scipy~=1.17.1",
|
||||
"mudata~=0.3.8",
|
||||
"scanpy~=1.11.4",
|
||||
"leidenalg~=0.10.0"
|
||||
"leidenalg~=0.11.0"
|
||||
],
|
||||
"script" : [
|
||||
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
|
||||
@@ -3332,7 +3379,7 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"viashpy==0.8.0"
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
@@ -3351,13 +3398,13 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/nextflow/cluster/leiden",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "openpipeline",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
|
||||
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
|
||||
"info" : {
|
||||
@@ -3375,14 +3422,14 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
|
||||
".engines += { type: \\"native\\" }",
|
||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
],
|
||||
"keywords" : [
|
||||
"single-cell",
|
||||
@@ -3416,7 +3463,6 @@ import os
|
||||
import time
|
||||
import logging
|
||||
import logging.handlers
|
||||
import warnings
|
||||
import h5py
|
||||
import mudata as mu
|
||||
import pandas as pd
|
||||
@@ -3438,6 +3484,9 @@ par = {
|
||||
'obsp_connectivities': $( if [ ! -z ${VIASH_PAR_OBSP_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_OBSP_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'obsm_name': $( if [ ! -z ${VIASH_PAR_OBSM_NAME+x} ]; then echo "r'${VIASH_PAR_OBSM_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'flavor': $( if [ ! -z ${VIASH_PAR_FLAVOR+x} ]; then echo "r'${VIASH_PAR_FLAVOR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'n_iterations': $( if [ ! -z ${VIASH_PAR_N_ITERATIONS+x} ]; then echo "int(r'${VIASH_PAR_N_ITERATIONS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
|
||||
'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
|
||||
'resolution': $( if [ ! -z ${VIASH_PAR_RESOLUTION+x} ]; then echo "list(map(float, r'${VIASH_PAR_RESOLUTION//\\'/\\'\\"\\'\\"r\\'}'.split(';')))"; else echo None; fi ),
|
||||
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
|
||||
}
|
||||
@@ -3473,6 +3522,9 @@ from compress_h5mu import compress_h5mu
|
||||
|
||||
_shared_logger_name = "leiden"
|
||||
|
||||
if not par["n_iterations"]:
|
||||
par["n_iterations"] = -1
|
||||
|
||||
|
||||
# Function to check available space in /dev/shm
|
||||
def get_available_shared_memory():
|
||||
@@ -3571,18 +3623,18 @@ def run_single_resolution(shared_csr_matrix, obs_names, resolution):
|
||||
try:
|
||||
connectivities = shared_csr_matrix.to_csr_matrix()
|
||||
adata = create_empty_anndata_with_connectivities(connectivities, obs_names)
|
||||
with warnings.catch_warnings():
|
||||
# In the future, the default backend for leiden will be igraph instead of leidenalg.
|
||||
warnings.simplefilter(action="ignore", category=FutureWarning)
|
||||
adata_out = sc.tl.leiden(
|
||||
adata,
|
||||
resolution=resolution,
|
||||
key_added=str(resolution),
|
||||
obsp="connectivities",
|
||||
copy=True,
|
||||
)
|
||||
sc.tl.leiden(
|
||||
adata,
|
||||
resolution=resolution,
|
||||
key_added=str(resolution),
|
||||
obsp="connectivities",
|
||||
flavor=par["flavor"],
|
||||
n_iterations=par["n_iterations"],
|
||||
random_state=par["seed"],
|
||||
copy=False, # A copy was already created above
|
||||
)
|
||||
logger.info(f"Returning result for resolution {resolution}")
|
||||
return adata_out.obs[str(resolution)]
|
||||
return adata.obs[str(resolution)]
|
||||
finally:
|
||||
obs_names.shm.close()
|
||||
shared_csr_matrix.close()
|
||||
@@ -4147,7 +4199,7 @@ meta["defaults"] = [
|
||||
"container" : {
|
||||
"registry" : "images.viash-hub.com",
|
||||
"image" : "vsh/openpipeline/cluster/leiden",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
},
|
||||
"label" : [
|
||||
"highcpu",
|
||||
@@ -2,7 +2,7 @@ manifest {
|
||||
name = 'cluster/leiden'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.0.3'
|
||||
version = 'v4.1.0'
|
||||
description = 'Cluster cells using the [Leiden algorithm] [Traag18] implemented in the [Scanpy framework] [Wolf18]. \nLeiden is an improved version of the [Louvain algorithm] [Blondel08]. \nIt has been proposed for single-cell analysis by [Levine15] [Levine15]. \nThis requires having ran `neighbors/find_neighbors` or `neighbors/bbknn` first.\n\n[Blondel08]: Blondel et al. (2008), Fast unfolding of communities in large networks, J. Stat. Mech. \n[Levine15]: Levine et al. (2015), Data-Driven Phenotypic Dissection of AML Reveals Progenitor-like Cells that Correlate with Prognosis, Cell. \n[Traag18]: Traag et al. (2018), From Louvain to Leiden: guaranteeing well-connected communities arXiv. \n[Wolf18]: Wolf et al. (2018), Scanpy: large-scale single-cell gene expression data analysis, Genome Biology. \n'
|
||||
author = 'Dries De Maeyer'
|
||||
}
|
||||
@@ -41,6 +41,26 @@
|
||||
"help_text": "Type: `string`, multiple: `False`, default: `\"leiden\"`. ",
|
||||
"default": "leiden"
|
||||
},
|
||||
"flavor": {
|
||||
"type": "string",
|
||||
"description": "Which package's implementation to use.\n",
|
||||
"help_text": "Type: `string`, multiple: `False`, default: `\"leidenalg\"`, choices: ``leidenalg`, `igraph``. ",
|
||||
"enum": [
|
||||
"leidenalg",
|
||||
"igraph"
|
||||
],
|
||||
"default": "leidenalg"
|
||||
},
|
||||
"n_iterations": {
|
||||
"type": "integer",
|
||||
"description": "How many iterations of the Leiden clustering algorithm to perform.\nWhen defined, positive values above 2 define the total number of iterations to perform.\nWhen not set, the algorithm will run until it reaches its optimal clustering.\n",
|
||||
"help_text": "Type: `integer`, multiple: `False`. "
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"description": "Fix the initialization of the optimization",
|
||||
"help_text": "Type: `integer`, multiple: `False`. "
|
||||
},
|
||||
"resolution": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@@ -54,7 +74,7 @@
|
||||
},
|
||||
"output_compression": {
|
||||
"type": "string",
|
||||
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
|
||||
"enum": [
|
||||
"gzip",
|
||||
@@ -1,6 +1,6 @@
|
||||
name: "concatenate_h5mu"
|
||||
namespace: "dataflow"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
@@ -136,8 +136,8 @@ argument_groups:
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata H5\
|
||||
\ files.\nBy default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
@@ -253,7 +253,7 @@ engines:
|
||||
id: "docker"
|
||||
image: "python:3.13-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v4.0.3"
|
||||
target_tag: "v4.1.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
@@ -263,9 +263,10 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
script:
|
||||
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
|
||||
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
|
||||
@@ -279,14 +280,14 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
- "viashpy==0.10.0"
|
||||
- "pytest-benchmark"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
@@ -299,12 +300,12 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/dataflow/concatenate_h5mu"
|
||||
executable: "target/nextflow/dataflow/concatenate_h5mu/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
@@ -325,7 +326,7 @@ package_config:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
@@ -334,7 +335,7 @@ package_config:
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
@@ -1,6 +1,6 @@
|
||||
// concatenate_h5mu v4.0.3
|
||||
// concatenate_h5mu v4.1.0
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3035,7 +3045,7 @@ meta = [
|
||||
"config": processConfig(readJsonBlob('''{
|
||||
"name" : "concatenate_h5mu",
|
||||
"namespace" : "dataflow",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries Schaumont",
|
||||
@@ -3179,7 +3189,7 @@ meta = [
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--output_compression",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"example" : [
|
||||
"gzip"
|
||||
],
|
||||
@@ -3328,7 +3338,7 @@ meta = [
|
||||
"id" : "docker",
|
||||
"image" : "python:3.13-slim",
|
||||
"target_registry" : "images.viash-hub.com",
|
||||
"target_tag" : "v4.0.3",
|
||||
"target_tag" : "v4.1.0",
|
||||
"namespace_separator" : "/",
|
||||
"setup" : [
|
||||
{
|
||||
@@ -3342,9 +3352,10 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"anndata~=0.12.7",
|
||||
"anndata~=0.12.16",
|
||||
"awkward",
|
||||
"mudata~=0.3.2"
|
||||
"scipy~=1.17.1",
|
||||
"mudata~=0.3.8"
|
||||
],
|
||||
"script" : [
|
||||
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
|
||||
@@ -3364,7 +3375,7 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"viashpy==0.8.0"
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
@@ -3375,7 +3386,7 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"viashpy==0.8.0",
|
||||
"viashpy==0.10.0",
|
||||
"pytest-benchmark"
|
||||
],
|
||||
"upgrade" : true
|
||||
@@ -3392,13 +3403,13 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/nextflow/dataflow/concatenate_h5mu",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "openpipeline",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
|
||||
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
|
||||
"info" : {
|
||||
@@ -3416,14 +3427,14 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
|
||||
".engines += { type: \\"native\\" }",
|
||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
],
|
||||
"keywords" : [
|
||||
"single-cell",
|
||||
@@ -4266,7 +4277,7 @@ meta["defaults"] = [
|
||||
"container" : {
|
||||
"registry" : "images.viash-hub.com",
|
||||
"image" : "vsh/openpipeline/dataflow/concatenate_h5mu",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
},
|
||||
"label" : [
|
||||
"midcpu",
|
||||
@@ -2,7 +2,7 @@ manifest {
|
||||
name = 'dataflow/concatenate_h5mu'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.0.3'
|
||||
version = 'v4.1.0'
|
||||
description = 'Concatenate observations from samples in several (uni- and/or multi-modal) MuData files into a single file.\n'
|
||||
author = 'Dries Schaumont'
|
||||
}
|
||||
@@ -85,7 +85,7 @@
|
||||
},
|
||||
"output_compression": {
|
||||
"type": "string",
|
||||
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
|
||||
"enum": [
|
||||
"gzip",
|
||||
@@ -1,6 +1,6 @@
|
||||
name: "merge"
|
||||
namespace: "dataflow"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
@@ -25,6 +25,9 @@ argument_groups:
|
||||
- "-i"
|
||||
description: "Paths to the single-modality .h5mu files that need to be combined"
|
||||
info: null
|
||||
example:
|
||||
- "/path/to/modality_1.h5mu"
|
||||
- "/path/to/modality_2.h5mu"
|
||||
default:
|
||||
- "sample_paths"
|
||||
must_exist: true
|
||||
@@ -163,7 +166,7 @@ engines:
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v4.0.3"
|
||||
target_tag: "v4.1.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
@@ -173,9 +176,10 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
script:
|
||||
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
|
||||
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
|
||||
@@ -189,7 +193,7 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
@@ -203,12 +207,12 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/dataflow/merge"
|
||||
executable: "target/nextflow/dataflow/merge/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
@@ -229,7 +233,7 @@ package_config:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
@@ -238,7 +242,7 @@ package_config:
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
@@ -1,6 +1,6 @@
|
||||
// merge v4.0.3
|
||||
// merge v4.1.0
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3035,7 +3045,7 @@ meta = [
|
||||
"config": processConfig(readJsonBlob('''{
|
||||
"name" : "merge",
|
||||
"namespace" : "dataflow",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries Schaumont",
|
||||
@@ -3071,6 +3081,10 @@ meta = [
|
||||
"-i"
|
||||
],
|
||||
"description" : "Paths to the single-modality .h5mu files that need to be combined",
|
||||
"example" : [
|
||||
"/path/to/modality_1.h5mu",
|
||||
"/path/to/modality_2.h5mu"
|
||||
],
|
||||
"default" : [
|
||||
"sample_paths"
|
||||
],
|
||||
@@ -3246,7 +3260,7 @@ meta = [
|
||||
"id" : "docker",
|
||||
"image" : "python:3.12-slim",
|
||||
"target_registry" : "images.viash-hub.com",
|
||||
"target_tag" : "v4.0.3",
|
||||
"target_tag" : "v4.1.0",
|
||||
"namespace_separator" : "/",
|
||||
"setup" : [
|
||||
{
|
||||
@@ -3260,9 +3274,10 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"anndata~=0.12.7",
|
||||
"anndata~=0.12.16",
|
||||
"awkward",
|
||||
"mudata~=0.3.2"
|
||||
"scipy~=1.17.1",
|
||||
"mudata~=0.3.8"
|
||||
],
|
||||
"script" : [
|
||||
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
|
||||
@@ -3282,7 +3297,7 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"viashpy==0.8.0"
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
@@ -3301,13 +3316,13 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/nextflow/dataflow/merge",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "openpipeline",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
|
||||
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
|
||||
"info" : {
|
||||
@@ -3325,14 +3340,14 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
|
||||
".engines += { type: \\"native\\" }",
|
||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
],
|
||||
"keywords" : [
|
||||
"single-cell",
|
||||
@@ -3847,7 +3862,7 @@ meta["defaults"] = [
|
||||
"container" : {
|
||||
"registry" : "images.viash-hub.com",
|
||||
"image" : "vsh/openpipeline/dataflow/merge",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
},
|
||||
"label" : [
|
||||
"singlecpu",
|
||||
@@ -2,7 +2,7 @@ manifest {
|
||||
name = 'dataflow/merge'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.0.3'
|
||||
version = 'v4.1.0'
|
||||
description = 'Combine one or more single-modality .h5mu files together into one .h5mu file.\n'
|
||||
author = 'Dries Schaumont'
|
||||
}
|
||||
@@ -17,7 +17,7 @@
|
||||
"format": "path",
|
||||
"exists": true,
|
||||
"description": "Paths to the single-modality .h5mu files that need to be combined",
|
||||
"help_text": "Type: `file`, multiple: `True`, required, default: `[\"sample_paths\"]`, direction: `input`. ",
|
||||
"help_text": "Type: `file`, multiple: `True`, required, default: `[\"sample_paths\"]`, direction: `input`, example: `[\"/path/to/modality_1.h5mu\";\"/path/to/modality_2.h5mu\"]`. ",
|
||||
"default": [
|
||||
"sample_paths"
|
||||
]
|
||||
@@ -1,6 +1,6 @@
|
||||
name: "split_h5mu"
|
||||
namespace: "dataflow"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dorien Roosen"
|
||||
roles:
|
||||
@@ -23,6 +23,8 @@ argument_groups:
|
||||
name: "--input"
|
||||
description: "Path to a single .h5mu file."
|
||||
info: null
|
||||
example:
|
||||
- "input.h5mu"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
@@ -90,8 +92,8 @@ argument_groups:
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata H5\
|
||||
\ files.\nBy default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
@@ -201,9 +203,9 @@ runners:
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
image: "python:3.13-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v4.0.3"
|
||||
target_tag: "v4.1.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
@@ -213,9 +215,10 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
script:
|
||||
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
|
||||
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
|
||||
@@ -229,7 +232,7 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
@@ -243,12 +246,12 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/dataflow/split_h5mu"
|
||||
executable: "target/nextflow/dataflow/split_h5mu/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
@@ -269,7 +272,7 @@ package_config:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
@@ -278,7 +281,7 @@ package_config:
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
@@ -1,6 +1,6 @@
|
||||
// split_h5mu v4.0.3
|
||||
// split_h5mu v4.1.0
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3035,7 +3045,7 @@ meta = [
|
||||
"config": processConfig(readJsonBlob('''{
|
||||
"name" : "split_h5mu",
|
||||
"namespace" : "dataflow",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dorien Roosen",
|
||||
@@ -3068,6 +3078,9 @@ meta = [
|
||||
"type" : "file",
|
||||
"name" : "--input",
|
||||
"description" : "Path to a single .h5mu file.",
|
||||
"example" : [
|
||||
"input.h5mu"
|
||||
],
|
||||
"must_exist" : true,
|
||||
"create_parent" : true,
|
||||
"required" : true,
|
||||
@@ -3147,7 +3160,7 @@ meta = [
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--output_compression",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"example" : [
|
||||
"gzip"
|
||||
],
|
||||
@@ -3283,9 +3296,9 @@ meta = [
|
||||
{
|
||||
"type" : "docker",
|
||||
"id" : "docker",
|
||||
"image" : "python:3.12-slim",
|
||||
"image" : "python:3.13-slim",
|
||||
"target_registry" : "images.viash-hub.com",
|
||||
"target_tag" : "v4.0.3",
|
||||
"target_tag" : "v4.1.0",
|
||||
"namespace_separator" : "/",
|
||||
"setup" : [
|
||||
{
|
||||
@@ -3299,9 +3312,10 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"anndata~=0.12.7",
|
||||
"anndata~=0.12.16",
|
||||
"awkward",
|
||||
"mudata~=0.3.2"
|
||||
"scipy~=1.17.1",
|
||||
"mudata~=0.3.8"
|
||||
],
|
||||
"script" : [
|
||||
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
|
||||
@@ -3321,7 +3335,7 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"viashpy==0.8.0"
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
@@ -3340,13 +3354,13 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/nextflow/dataflow/split_h5mu",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "openpipeline",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
|
||||
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
|
||||
"info" : {
|
||||
@@ -3364,14 +3378,14 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
|
||||
".engines += { type: \\"native\\" }",
|
||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
],
|
||||
"keywords" : [
|
||||
"single-cell",
|
||||
@@ -3910,7 +3924,7 @@ meta["defaults"] = [
|
||||
"container" : {
|
||||
"registry" : "images.viash-hub.com",
|
||||
"image" : "vsh/openpipeline/dataflow/split_h5mu",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
},
|
||||
"label" : [
|
||||
"lowcpu",
|
||||
@@ -2,7 +2,7 @@ manifest {
|
||||
name = 'dataflow/split_h5mu'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.0.3'
|
||||
version = 'v4.1.0'
|
||||
description = 'Split the samples of a single modality from a .h5mu (multimodal) sample into seperate .h5mu files based on the values of an .obs column of this modality. \n'
|
||||
author = 'Dorien Roosen'
|
||||
}
|
||||
@@ -25,7 +25,7 @@
|
||||
},
|
||||
"output_compression": {
|
||||
"type": "string",
|
||||
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
|
||||
"enum": [
|
||||
"gzip",
|
||||
@@ -44,7 +44,7 @@
|
||||
"format": "path",
|
||||
"exists": true,
|
||||
"description": "Path to a single .h5mu file.",
|
||||
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`. "
|
||||
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. "
|
||||
},
|
||||
"modality": {
|
||||
"type": "string",
|
||||
@@ -1,6 +1,6 @@
|
||||
name: "split_modalities"
|
||||
namespace: "dataflow"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
@@ -42,6 +42,8 @@ argument_groups:
|
||||
- "-i"
|
||||
description: "Path to a single .h5mu file."
|
||||
info: null
|
||||
example:
|
||||
- "input.h5mu"
|
||||
default:
|
||||
- "sample_path"
|
||||
must_exist: true
|
||||
@@ -79,8 +81,8 @@ argument_groups:
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata H5\
|
||||
\ files.\nBy default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
@@ -97,6 +99,8 @@ resources:
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "setup_logger.py"
|
||||
- type: "file"
|
||||
path: "mudata_opener.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
@@ -190,7 +194,7 @@ engines:
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v4.0.3"
|
||||
target_tag: "v4.1.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
@@ -200,9 +204,10 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
script:
|
||||
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
|
||||
\ ModuleNotFoundError:\\n exit(0)\\nelse: assert int(version(\\\"zarr\\\"\
|
||||
@@ -216,7 +221,7 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
@@ -230,12 +235,12 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/dataflow/split_modalities"
|
||||
executable: "target/nextflow/dataflow/split_modalities/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
@@ -256,7 +261,7 @@ package_config:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
@@ -265,7 +270,7 @@ package_config:
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
@@ -1,6 +1,6 @@
|
||||
// split_modalities v4.0.3
|
||||
// split_modalities v4.1.0
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1354,47 +1354,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1710,10 +1705,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3036,7 +3046,7 @@ meta = [
|
||||
"config": processConfig(readJsonBlob('''{
|
||||
"name" : "split_modalities",
|
||||
"namespace" : "dataflow",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries Schaumont",
|
||||
@@ -3099,6 +3109,9 @@ meta = [
|
||||
"-i"
|
||||
],
|
||||
"description" : "Path to a single .h5mu file.",
|
||||
"example" : [
|
||||
"input.h5mu"
|
||||
],
|
||||
"default" : [
|
||||
"sample_path"
|
||||
],
|
||||
@@ -3143,7 +3156,7 @@ meta = [
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--output_compression",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"example" : [
|
||||
"gzip"
|
||||
],
|
||||
@@ -3169,6 +3182,10 @@ meta = [
|
||||
"type" : "file",
|
||||
"path" : "/src/utils/setup_logger.py"
|
||||
},
|
||||
{
|
||||
"type" : "file",
|
||||
"path" : "/src/utils/mudata_opener.py"
|
||||
},
|
||||
{
|
||||
"type" : "file",
|
||||
"path" : "/src/workflows/utils/labels.config",
|
||||
@@ -3280,7 +3297,7 @@ meta = [
|
||||
"id" : "docker",
|
||||
"image" : "python:3.12-slim",
|
||||
"target_registry" : "images.viash-hub.com",
|
||||
"target_tag" : "v4.0.3",
|
||||
"target_tag" : "v4.1.0",
|
||||
"namespace_separator" : "/",
|
||||
"setup" : [
|
||||
{
|
||||
@@ -3294,9 +3311,10 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"anndata~=0.12.7",
|
||||
"anndata~=0.12.16",
|
||||
"awkward",
|
||||
"mudata~=0.3.2"
|
||||
"scipy~=1.17.1",
|
||||
"mudata~=0.3.8"
|
||||
],
|
||||
"script" : [
|
||||
"exec(\\"try:\\\\n import zarr; from importlib.metadata import version\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: assert int(version(\\\\\\"zarr\\\\\\").partition(\\\\\\".\\\\\\")[0]) > 2\\")"
|
||||
@@ -3316,7 +3334,7 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"viashpy==0.8.0"
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
@@ -3335,13 +3353,13 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/nextflow/dataflow/split_modalities",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "openpipeline",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
|
||||
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
|
||||
"info" : {
|
||||
@@ -3359,14 +3377,14 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
|
||||
".engines += { type: \\"native\\" }",
|
||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
],
|
||||
"keywords" : [
|
||||
"single-cell",
|
||||
@@ -3395,11 +3413,14 @@ def innerWorkflowFactory(args) {
|
||||
tempscript=".viash_script.py"
|
||||
cat > "$tempscript" << VIASHMAIN
|
||||
from __future__ import annotations
|
||||
from functools import partial
|
||||
import sys
|
||||
import mudata as md
|
||||
import anndata as ad
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
|
||||
### VIASH START
|
||||
# The following code has been auto-generated by Viash.
|
||||
par = {
|
||||
@@ -3436,6 +3457,7 @@ dep = {
|
||||
|
||||
sys.path.append(meta["resources_dir"])
|
||||
from setup_logger import setup_logger
|
||||
from mudata_opener import mudata_opener
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -3447,36 +3469,54 @@ def main() -> None:
|
||||
logger.info("Creating %s", output_dir)
|
||||
output_dir.mkdir(parents=True)
|
||||
|
||||
logger.info("Reading input file '%s'", par["input"])
|
||||
input_file = Path(par["input"].strip())
|
||||
sample = md.read_h5mu(input_file)
|
||||
|
||||
logger.info("Creating output types CSV.")
|
||||
modalities = list(sample.mod.keys())
|
||||
|
||||
logger.info("Found the following modalities:\\\\n%s", "\\\\n".join(modalities))
|
||||
names = {mod_name: f"{input_file.stem}_{mod_name}.h5mu" for mod_name in modalities}
|
||||
output_files = list(names.values())
|
||||
logger.info(
|
||||
"Will be creating the following output .h5mu files:\\\\n%s",
|
||||
"\\\\n".join(output_files),
|
||||
)
|
||||
df = pd.DataFrame({"name": modalities, "filename": output_files})
|
||||
logger.info("Writing output_types CSV file to '%s'.", par["output_types"])
|
||||
df.to_csv(par["output_types"], index=False)
|
||||
|
||||
logger.info("Splitting input file into unimodal output files.")
|
||||
for mod_name, mod in sample.mod.items():
|
||||
logger.info("Processing modality '%s'", mod_name)
|
||||
new_sample = md.MuData({mod_name: mod})
|
||||
input_file = Path(par["input"])
|
||||
logger.info("Checking which modalities exist for '%s'", par["input"])
|
||||
with mudata_opener(input_file, mode="r") as (open_mudata, input_is_zarr):
|
||||
logger.info(
|
||||
"Writing to '%s', with compression '%s'",
|
||||
names[mod_name],
|
||||
par["output_compression"],
|
||||
"Opened %s in %s format.", par["input"], "zarr" if input_is_zarr else "h5"
|
||||
)
|
||||
new_sample.write_h5mu(
|
||||
output_dir / names[mod_name], compression=par["output_compression"]
|
||||
|
||||
modalities = list(open_mudata["mod"].keys())
|
||||
logger.info("Found the following modalities:\\\\n%s", "\\\\n".join(modalities))
|
||||
|
||||
logger.info("Creating output types CSV.")
|
||||
output_extension = "zarr" if input_is_zarr else "h5mu"
|
||||
names = {
|
||||
mod_name: f"{input_file.stem}_{mod_name}.{output_extension}"
|
||||
for mod_name in modalities
|
||||
}
|
||||
output_files = list(names.values())
|
||||
logger.info(
|
||||
"Will be creating the following output .%s files:\\\\n%s",
|
||||
output_extension,
|
||||
"\\\\n".join(output_files),
|
||||
)
|
||||
df = pd.DataFrame({"name": modalities, "filename": output_files})
|
||||
logger.info("Writing output_types CSV file to '%s'.", par["output_types"])
|
||||
df.to_csv(par["output_types"], index=False)
|
||||
|
||||
logger.info("Splitting input file into unimodal output files.")
|
||||
for mod_name in modalities:
|
||||
logger.info("Processing modality '%s'", mod_name)
|
||||
elem_key = f"/mod/{mod_name}"
|
||||
elem = open_mudata[elem_key]
|
||||
logger.info("Reading %s", elem_key)
|
||||
new_ad = ad.io.read_elem(elem)
|
||||
logger.info("Creating MuData object.")
|
||||
new_sample = md.MuData({mod_name: new_ad})
|
||||
logger.info(
|
||||
"Writing to '%s', with compression '%s'",
|
||||
names[mod_name],
|
||||
par["output_compression"],
|
||||
)
|
||||
writer = (
|
||||
partial(md.MuData.write_zarr, zarr_format=3)
|
||||
if input_is_zarr
|
||||
else partial(
|
||||
md.MuData.write_h5mu, compression=par["output_compression"]
|
||||
)
|
||||
)
|
||||
writer(new_sample, output_dir / names[mod_name])
|
||||
logger.info("Done writing output file.")
|
||||
logger.info("Finished")
|
||||
|
||||
@@ -3865,7 +3905,7 @@ meta["defaults"] = [
|
||||
"container" : {
|
||||
"registry" : "images.viash-hub.com",
|
||||
"image" : "vsh/openpipeline/dataflow/split_modalities",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
},
|
||||
"label" : [
|
||||
"singlecpu",
|
||||
@@ -0,0 +1,27 @@
|
||||
import zarr
|
||||
import h5py
|
||||
from contextlib import contextmanager
|
||||
|
||||
|
||||
@contextmanager
|
||||
def mudata_opener(file_loc, mode=None):
|
||||
open_mudata = None
|
||||
input_is_zarr = False
|
||||
try:
|
||||
open_mudata = zarr.open(file_loc, zarr_format=3, mode=mode)
|
||||
input_is_zarr = True
|
||||
yield open_mudata, input_is_zarr
|
||||
except (zarr.errors.GroupNotFoundError, NotADirectoryError):
|
||||
try:
|
||||
open_mudata = h5py.File(file_loc, mode=mode)
|
||||
yield open_mudata, input_is_zarr
|
||||
except (FileNotFoundError, IsADirectoryError, KeyError) as e:
|
||||
e.add_note(f"Could not open file {file_loc}.")
|
||||
raise e
|
||||
finally:
|
||||
try:
|
||||
if open_mudata:
|
||||
open_mudata.close()
|
||||
del open_mudata
|
||||
except (AttributeError, UnboundLocalError):
|
||||
pass
|
||||
@@ -2,7 +2,7 @@ manifest {
|
||||
name = 'dataflow/split_modalities'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.0.3'
|
||||
version = 'v4.1.0'
|
||||
description = 'Split the modalities from a single .h5mu multimodal sample into seperate .h5mu files. \n'
|
||||
author = 'Dries Schaumont, Robrecht Cannoodt'
|
||||
}
|
||||
@@ -14,7 +14,7 @@
|
||||
"format": "path",
|
||||
"exists": true,
|
||||
"description": "Path to a single .h5mu file.",
|
||||
"help_text": "Type: `file`, multiple: `False`, required, default: `\"sample_path\"`, direction: `input`. ",
|
||||
"help_text": "Type: `file`, multiple: `False`, required, default: `\"sample_path\"`, direction: `input`, example: `\"input.h5mu\"`. ",
|
||||
"default": "sample_path"
|
||||
},
|
||||
"output": {
|
||||
@@ -33,7 +33,7 @@
|
||||
},
|
||||
"output_compression": {
|
||||
"type": "string",
|
||||
"description": "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"description": "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"help_text": "Type: `string`, multiple: `False`, example: `\"gzip\"`, choices: ``gzip`, `lzf``. ",
|
||||
"enum": [
|
||||
"gzip",
|
||||
@@ -1,6 +1,6 @@
|
||||
name: "pca"
|
||||
namespace: "dimred"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
authors:
|
||||
- name: "Dries De Maeyer"
|
||||
roles:
|
||||
@@ -17,6 +17,22 @@ authors:
|
||||
role: "Principal Scientist"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata H5\
|
||||
\ files.\nBy default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "inputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
@@ -62,6 +78,47 @@ argument_groups:
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Options"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--num_components"
|
||||
description: "Number of principal components to compute. Defaults to 50, or 1\
|
||||
\ - minimum dimension size of selected representation."
|
||||
info: null
|
||||
example:
|
||||
- 25
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean_true"
|
||||
name: "--chunked"
|
||||
description: "If True, perform an incremental PCA on segments of a predefined\
|
||||
\ size. Setting this flag automatically implies zero centering.\nMust be specified\
|
||||
\ together with --chunk_size.\n"
|
||||
info: null
|
||||
direction: "input"
|
||||
- type: "integer"
|
||||
name: "--chunk_size"
|
||||
description: "Number of observations to include in each chunk. Required if chunked=True\
|
||||
\ was passed.\n"
|
||||
info: null
|
||||
required: false
|
||||
min: 2
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--seed"
|
||||
description: "Used to set the initial states for the optimization. \n"
|
||||
info: null
|
||||
required: false
|
||||
min: 0
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
alternatives:
|
||||
@@ -106,36 +163,11 @@ argument_groups:
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--num_components"
|
||||
description: "Number of principal components to compute. Defaults to 50, or 1\
|
||||
\ - minimum dimension size of selected representation."
|
||||
info: null
|
||||
example:
|
||||
- 25
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean_true"
|
||||
name: "--overwrite"
|
||||
description: "Allow overwriting .obsm, .varm and .uns slots."
|
||||
info: null
|
||||
direction: "input"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
@@ -147,8 +179,7 @@ resources:
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Computes PCA coordinates, loadings and variance decomposition. Uses\
|
||||
\ the implementation of scikit-learn [Pedregosa11].\n"
|
||||
description: "Computes PCA coordinates, loadings and variance decomposition.\n"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
@@ -238,9 +269,9 @@ runners:
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
image: "python:3.13-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "v4.0.3"
|
||||
target_tag: "v4.1.0"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
@@ -250,9 +281,10 @@ engines:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.12.7"
|
||||
- "anndata~=0.12.16"
|
||||
- "awkward"
|
||||
- "mudata~=0.3.2"
|
||||
- "scipy~=1.17.1"
|
||||
- "mudata~=0.3.8"
|
||||
- "scanpy~=1.11.4"
|
||||
script:
|
||||
- "exec(\"try:\\n import zarr; from importlib.metadata import version\\nexcept\
|
||||
@@ -260,10 +292,16 @@ engines:
|
||||
).partition(\\\".\\\")[0]) > 2\")"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "git"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
- "viashpy==0.10.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
@@ -275,12 +313,12 @@ build_info:
|
||||
engine: "docker|native"
|
||||
output: "target/nextflow/dimred/pca"
|
||||
executable: "target/nextflow/dimred/pca/main.nf"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "7bfad4ea12f87eca59213be3ab08deff67cc4206"
|
||||
viash_version: "0.9.7"
|
||||
git_commit: "a6499fddaa8553874585c747370f919f4d7b729c"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "v4.0.3"
|
||||
version: "v4.1.0"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
@@ -301,7 +339,7 @@ package_config:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
viash_version: "0.9.7"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
@@ -310,7 +348,7 @@ package_config:
|
||||
)'"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
@@ -1,6 +1,6 @@
|
||||
// pca v4.0.3
|
||||
// pca v4.1.0
|
||||
//
|
||||
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
|
||||
// This wrapper script is auto-generated by viash 0.9.7 and is thus a derivative
|
||||
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
||||
// Intuitive.
|
||||
//
|
||||
@@ -1353,47 +1353,42 @@ def readCsv(file_path) {
|
||||
def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''')
|
||||
def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''')
|
||||
|
||||
def br = java.nio.file.Files.newBufferedReader(inputFile)
|
||||
java.nio.file.Files.newBufferedReader(inputFile).withCloseable { br ->
|
||||
def row = 0
|
||||
def header = null
|
||||
def line
|
||||
|
||||
def row = -1
|
||||
def header = null
|
||||
while (br.ready() && header == null) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect{field ->
|
||||
m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
}
|
||||
assert header != null: "CSV file should contain a header"
|
||||
|
||||
while (br.ready()) {
|
||||
def line = br.readLine()
|
||||
row++
|
||||
if (line == null) {
|
||||
br.close()
|
||||
break
|
||||
}
|
||||
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect{field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
while (header == null && (line = br.readLine()) != null) {
|
||||
if (!line.startsWith("#")) {
|
||||
header = splitRegex.split(line, -1).collect { field ->
|
||||
def m = removeQuote.matcher(field)
|
||||
m.find() ? m.replaceFirst('$1') : field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size(): "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null}
|
||||
output.add(dataMap)
|
||||
row++
|
||||
}
|
||||
assert header != null : "CSV file should contain a header"
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
row++
|
||||
if (!line.startsWith("#")) {
|
||||
def predata = splitRegex.split(line, -1)
|
||||
def data = predata.collect { field ->
|
||||
if (field == "") {
|
||||
return null
|
||||
}
|
||||
def m = removeQuote.matcher(field)
|
||||
if (m.find()) {
|
||||
return m.replaceFirst('$1')
|
||||
} else {
|
||||
return field
|
||||
}
|
||||
}
|
||||
assert header.size() == data.size() : "Row $row should contain the same number as fields as the header"
|
||||
|
||||
def dataMap = [header, data].transpose().collectEntries().findAll { it.value != null }
|
||||
output.add(dataMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1709,10 +1704,25 @@ process publishFilesProc {
|
||||
]
|
||||
.transpose()
|
||||
.collectMany{infile, outfile ->
|
||||
if (infile.toString() != outfile.toString()) {
|
||||
def infileString = infile.toString()
|
||||
def outfileString = outfile.toString()
|
||||
if (infileString != outfileString) {
|
||||
/* Trailing slashes are removed from both the source and destination arguments.
|
||||
From source arguments, this is useful when a source argument may have a trailing slash
|
||||
and specify a symbolic link to a directory. Without removing the slash, cp will dereference
|
||||
the symbolic link.
|
||||
See https://www.gnu.org/software/coreutils/manual/html_node/Trailing-slashes.html#Trailing-slashes-1
|
||||
|
||||
For the destination path addding a trailing slash is a problem when publishing directories:
|
||||
it requires the destination directory to exist. This fails because we only create the parent
|
||||
directories first.
|
||||
*/
|
||||
def regexTrailingSlashes = ~/\/+$/
|
||||
def infileNoTrailingSlash = infileString - regexTrailingSlashes
|
||||
def outfileNoTrailingSlash = outfileString - regexTrailingSlashes
|
||||
[
|
||||
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
|
||||
"cp -r '${infile.toString()}' '${outfile.toString()}'"
|
||||
"[ -d \"\$(dirname '${outfileNoTrailingSlash}')\" ] || mkdir -p \"\$(dirname '${outfileNoTrailingSlash}')\"",
|
||||
"cp -a '${infileNoTrailingSlash}' '${outfileNoTrailingSlash}'"
|
||||
]
|
||||
} else {
|
||||
// no need to copy if infile is the same as outfile
|
||||
@@ -3035,7 +3045,7 @@ meta = [
|
||||
"config": processConfig(readJsonBlob('''{
|
||||
"name" : "pca",
|
||||
"namespace" : "dimred",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"authors" : [
|
||||
{
|
||||
"name" : "Dries De Maeyer",
|
||||
@@ -3062,6 +3072,27 @@ meta = [
|
||||
"argument_groups" : [
|
||||
{
|
||||
"name" : "Arguments",
|
||||
"arguments" : [
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--output_compression",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata H5 files.\nBy default no compression is applied.\n",
|
||||
"example" : [
|
||||
"gzip"
|
||||
],
|
||||
"required" : false,
|
||||
"choices" : [
|
||||
"gzip",
|
||||
"lzf"
|
||||
],
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name" : "inputs",
|
||||
"arguments" : [
|
||||
{
|
||||
"type" : "file",
|
||||
@@ -3112,7 +3143,55 @@ meta = [
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name" : "Options",
|
||||
"arguments" : [
|
||||
{
|
||||
"type" : "integer",
|
||||
"name" : "--num_components",
|
||||
"description" : "Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.",
|
||||
"example" : [
|
||||
25
|
||||
],
|
||||
"required" : false,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "boolean_true",
|
||||
"name" : "--chunked",
|
||||
"description" : "If True, perform an incremental PCA on segments of a predefined size. Setting this flag automatically implies zero centering.\nMust be specified together with --chunk_size.\n",
|
||||
"direction" : "input"
|
||||
},
|
||||
{
|
||||
"type" : "integer",
|
||||
"name" : "--chunk_size",
|
||||
"description" : "Number of observations to include in each chunk. Required if chunked=True was passed.\n",
|
||||
"required" : false,
|
||||
"min" : 2,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "integer",
|
||||
"name" : "--seed",
|
||||
"description" : "Used to set the initial states for the optimization. \n",
|
||||
"required" : false,
|
||||
"min" : 0,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name" : "Outputs",
|
||||
"arguments" : [
|
||||
{
|
||||
"type" : "file",
|
||||
"name" : "--output",
|
||||
@@ -3166,39 +3245,11 @@ meta = [
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "integer",
|
||||
"name" : "--num_components",
|
||||
"description" : "Number of principal components to compute. Defaults to 50, or 1 - minimum dimension size of selected representation.",
|
||||
"example" : [
|
||||
25
|
||||
],
|
||||
"required" : false,
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
},
|
||||
{
|
||||
"type" : "boolean_true",
|
||||
"name" : "--overwrite",
|
||||
"description" : "Allow overwriting .obsm, .varm and .uns slots.",
|
||||
"direction" : "input"
|
||||
},
|
||||
{
|
||||
"type" : "string",
|
||||
"name" : "--output_compression",
|
||||
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
|
||||
"example" : [
|
||||
"gzip"
|
||||
],
|
||||
"required" : false,
|
||||
"choices" : [
|
||||
"gzip",
|
||||
"lzf"
|
||||
],
|
||||
"direction" : "input",
|
||||
"multiple" : false,
|
||||
"multiple_sep" : ";"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -3223,7 +3274,7 @@ meta = [
|
||||
"dest" : "nextflow_labels.config"
|
||||
}
|
||||
],
|
||||
"description" : "Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n",
|
||||
"description" : "Computes PCA coordinates, loadings and variance decomposition.\n",
|
||||
"test_resources" : [
|
||||
{
|
||||
"type" : "python_script",
|
||||
@@ -3331,9 +3382,9 @@ meta = [
|
||||
{
|
||||
"type" : "docker",
|
||||
"id" : "docker",
|
||||
"image" : "python:3.12-slim",
|
||||
"image" : "python:3.13-slim",
|
||||
"target_registry" : "images.viash-hub.com",
|
||||
"target_tag" : "v4.0.3",
|
||||
"target_tag" : "v4.1.0",
|
||||
"namespace_separator" : "/",
|
||||
"setup" : [
|
||||
{
|
||||
@@ -3347,9 +3398,10 @@ meta = [
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"anndata~=0.12.7",
|
||||
"anndata~=0.12.16",
|
||||
"awkward",
|
||||
"mudata~=0.3.2",
|
||||
"scipy~=1.17.1",
|
||||
"mudata~=0.3.8",
|
||||
"scanpy~=1.11.4"
|
||||
],
|
||||
"script" : [
|
||||
@@ -3359,11 +3411,21 @@ meta = [
|
||||
}
|
||||
],
|
||||
"test_setup" : [
|
||||
{
|
||||
"type" : "apt",
|
||||
"packages" : [
|
||||
"git"
|
||||
],
|
||||
"interactive" : false
|
||||
},
|
||||
{
|
||||
"type" : "python",
|
||||
"user" : false,
|
||||
"packages" : [
|
||||
"viashpy==0.8.0"
|
||||
"viashpy==0.10.0"
|
||||
],
|
||||
"github" : [
|
||||
"openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
],
|
||||
"upgrade" : true
|
||||
}
|
||||
@@ -3379,13 +3441,13 @@ meta = [
|
||||
"runner" : "nextflow",
|
||||
"engine" : "docker|native",
|
||||
"output" : "/workdir/root/repo/target/nextflow/dimred/pca",
|
||||
"viash_version" : "0.9.4",
|
||||
"git_commit" : "7bfad4ea12f87eca59213be3ab08deff67cc4206",
|
||||
"viash_version" : "0.9.7",
|
||||
"git_commit" : "a6499fddaa8553874585c747370f919f4d7b729c",
|
||||
"git_remote" : "https://github.com/openpipelines-bio/openpipeline"
|
||||
},
|
||||
"package_config" : {
|
||||
"name" : "openpipeline",
|
||||
"version" : "v4.0.3",
|
||||
"version" : "v4.1.0",
|
||||
"summary" : "Best-practice workflows for single-cell multi-omics analyses.\n",
|
||||
"description" : "OpenPipelines are extensible single cell analysis pipelines for reproducible and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\nIn terms of workflows, the following has been made available, but keep in mind that\nindividual tools and functionality can be executed as standalone components as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n * Ingestion: Read mapping and generating a count matrix.\n * Single sample processing: cell filtering and doublet detection.\n * Multisample processing: Count transformation, normalization, QC metric calulations.\n * Integration: Clustering, integration and batch correction using single and multimodal methods.\n * Downstream analysis workflows\n",
|
||||
"info" : {
|
||||
@@ -3403,14 +3465,14 @@ meta = [
|
||||
}
|
||||
]
|
||||
},
|
||||
"viash_version" : "0.9.4",
|
||||
"viash_version" : "0.9.7",
|
||||
"source" : "/workdir/root/repo/src",
|
||||
"target" : "/workdir/root/repo/target",
|
||||
"config_mods" : [
|
||||
".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'",
|
||||
".engines += { type: \\"native\\" }",
|
||||
".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'",
|
||||
".engines[.type == 'docker'].target_tag := 'v4.0.3'"
|
||||
".engines[.type == 'docker'].target_tag := 'v4.1.0'"
|
||||
],
|
||||
"keywords" : [
|
||||
"single-cell",
|
||||
@@ -3441,22 +3503,26 @@ cat > "$tempscript" << VIASHMAIN
|
||||
import scanpy as sc
|
||||
import mudata as mu
|
||||
import sys
|
||||
import pandas as pd
|
||||
from anndata import AnnData
|
||||
|
||||
## VIASH START
|
||||
# The following code has been auto-generated by Viash.
|
||||
par = {
|
||||
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'modality': $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo "r'${VIASH_PAR_MODALITY//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'var_input': $( if [ ! -z ${VIASH_PAR_VAR_INPUT+x} ]; then echo "r'${VIASH_PAR_VAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
|
||||
'chunked': $( if [ ! -z ${VIASH_PAR_CHUNKED+x} ]; then echo "r'${VIASH_PAR_CHUNKED//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ),
|
||||
'chunk_size': $( if [ ! -z ${VIASH_PAR_CHUNK_SIZE+x} ]; then echo "int(r'${VIASH_PAR_CHUNK_SIZE//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
|
||||
'seed': $( if [ ! -z ${VIASH_PAR_SEED+x} ]; then echo "int(r'${VIASH_PAR_SEED//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
|
||||
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'obsm_output': $( if [ ! -z ${VIASH_PAR_OBSM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OBSM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'varm_output': $( if [ ! -z ${VIASH_PAR_VARM_OUTPUT+x} ]; then echo "r'${VIASH_PAR_VARM_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'uns_output': $( if [ ! -z ${VIASH_PAR_UNS_OUTPUT+x} ]; then echo "r'${VIASH_PAR_UNS_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
'num_components': $( if [ ! -z ${VIASH_PAR_NUM_COMPONENTS+x} ]; then echo "int(r'${VIASH_PAR_NUM_COMPONENTS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
|
||||
'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ),
|
||||
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
|
||||
'overwrite': $( if [ ! -z ${VIASH_PAR_OVERWRITE+x} ]; then echo "r'${VIASH_PAR_OVERWRITE//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi )
|
||||
}
|
||||
meta = {
|
||||
'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
|
||||
@@ -3496,11 +3562,24 @@ data = mu.read_h5ad(par["input"], mod=par["modality"])
|
||||
logger.info("Computing PCA components for modality '%s'", par["modality"])
|
||||
if par["layer"] and par["layer"] not in data.layers:
|
||||
raise ValueError(f"{par['layer']} was not found in modality {par['modality']}.")
|
||||
layer = data.X if not par["layer"] else data.layers[par["layer"]]
|
||||
adata_input_layer = AnnData(layer)
|
||||
adata_input_layer.var.index = data.var.index
|
||||
|
||||
use_highly_variable = False
|
||||
chunked, chunk_size = par["chunked"], par["chunk_size"]
|
||||
if chunked:
|
||||
if not chunk_size:
|
||||
raise ValueError(
|
||||
"Requested to perform an incremental PCA "
|
||||
"('chunked'), but the chunk size is not set."
|
||||
)
|
||||
if chunk_size < par["num_components"]:
|
||||
raise ValueError(
|
||||
f"The requested chunk size ({chunk_size}) must not be smaller "
|
||||
f"than the number of components ({par['num_components']})"
|
||||
)
|
||||
|
||||
layer = data.X if not par["layer"] else data.layers[par["layer"]]
|
||||
adata_input_layer = AnnData(layer, var=pd.DataFrame([], index=data.var.index))
|
||||
|
||||
mask_var = None
|
||||
if par["var_input"]:
|
||||
if par["var_input"] not in data.var.columns:
|
||||
raise ValueError(
|
||||
@@ -3508,15 +3587,18 @@ if par["var_input"]:
|
||||
"as a selection of genes to run the PCA on, "
|
||||
f"but the column is not available for modality {par['modality']}"
|
||||
)
|
||||
use_highly_variable = True
|
||||
adata_input_layer.var["highly_variable"] = data.var[par["var_input"]]
|
||||
mask_var = data.var[par["var_input"]]
|
||||
|
||||
# run pca
|
||||
output_adata = sc.tl.pca(
|
||||
sc.tl.pca(
|
||||
adata_input_layer,
|
||||
n_comps=par["num_components"],
|
||||
copy=True,
|
||||
use_highly_variable=use_highly_variable,
|
||||
copy=False, # A copy was already created
|
||||
return_info=True,
|
||||
mask_var=mask_var,
|
||||
chunked=chunked,
|
||||
chunk_size=chunk_size,
|
||||
random_state=par["seed"],
|
||||
)
|
||||
|
||||
# store output in specific objects
|
||||
@@ -3535,11 +3617,11 @@ for parameter_name, field in check_exist_dict.items():
|
||||
)
|
||||
del getattr(data, field)[par[parameter_name]]
|
||||
|
||||
data.obsm[par["obsm_output"]] = output_adata.obsm["X_pca"]
|
||||
data.varm[par["varm_output"]] = output_adata.varm["PCs"]
|
||||
data.obsm[par["obsm_output"]] = adata_input_layer.obsm["X_pca"]
|
||||
data.varm[par["varm_output"]] = adata_input_layer.varm["PCs"]
|
||||
data.uns[par["uns_output"]] = {
|
||||
"variance": output_adata.uns["pca"]["variance"],
|
||||
"variance_ratio": output_adata.uns["pca"]["variance_ratio"],
|
||||
"variance": adata_input_layer.uns["pca"]["variance"],
|
||||
"variance_ratio": adata_input_layer.uns["pca"]["variance_ratio"],
|
||||
}
|
||||
|
||||
|
||||
@@ -3933,7 +4015,7 @@ meta["defaults"] = [
|
||||
"container" : {
|
||||
"registry" : "images.viash-hub.com",
|
||||
"image" : "vsh/openpipeline/dimred/pca",
|
||||
"tag" : "v4.0.3"
|
||||
"tag" : "v4.1.0"
|
||||
},
|
||||
"label" : [
|
||||
"highcpu",
|
||||
@@ -2,8 +2,8 @@ manifest {
|
||||
name = 'dimred/pca'
|
||||
mainScript = 'main.nf'
|
||||
nextflowVersion = '!>=20.12.1-edge'
|
||||
version = 'v4.0.3'
|
||||
description = 'Computes PCA coordinates, loadings and variance decomposition. Uses the implementation of scikit-learn [Pedregosa11].\n'
|
||||
version = 'v4.1.0'
|
||||
description = 'Computes PCA coordinates, loadings and variance decomposition.\n'
|
||||
author = 'Dries De Maeyer'
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user