From 5273fbdd05ab8d10e2e6d0836d7267288c192146 Mon Sep 17 00:00:00 2001 From: CI Date: Fri, 22 Aug 2025 15:00:12 +0000 Subject: [PATCH] Build branch build/main with version build_main (798a0cb) Build pipeline: openpipelines-bio.openpipeline-spatial.build-main-7frxn Source commit: https://github.com/openpipelines-bio/openpipeline_spatial/commit/798a0cb2692eaac648662732a05bb48f951f36a0 Source message: deploy: 5fc3bcd8432928c148e8f27d6ae49214a91add67 --- CHANGELOG.md | 2 + .../config.vsh.yaml | 75 + .../from_h5mu_to_spatialexperiment/script.R | 113 + .../from_h5mu_to_spatialexperiment/test.R | 475 ++ .../from_cells2stats_to_h5mu/.config.vsh.yaml | 2 +- .../from_cells2stats_to_h5mu | 4 +- .../from_cosmx_to_h5mu/.config.vsh.yaml | 2 +- .../from_cosmx_to_h5mu/from_cosmx_to_h5mu | 4 +- .../.config.vsh.yaml | 2 +- .../from_cosmx_to_spatialexperiment | 4 +- .../.config.vsh.yaml | 255 ++ .../from_h5mu_to_spatialexperiment | 1278 ++++++ .../nextflow_labels.config | 68 + .../from_spatialdata_to_h5mu/.config.vsh.yaml | 2 +- .../from_spatialdata_to_h5mu | 4 +- .../from_xenium_to_h5mu/.config.vsh.yaml | 2 +- .../from_xenium_to_h5mu/from_xenium_to_h5mu | 4 +- .../.config.vsh.yaml | 2 +- .../from_xenium_to_spatialdata | 4 +- .../.config.vsh.yaml | 2 +- .../from_xenium_to_spatialexperiment | 4 +- .../filter/subset_cosmx/.config.vsh.yaml | 2 +- .../filter/subset_cosmx/subset_cosmx | 4 +- .../spaceranger_count/.config.vsh.yaml | 2 +- .../spaceranger_count/spaceranger_count | 4 +- .../from_cells2stats_to_h5mu/.config.vsh.yaml | 2 +- .../convert/from_cells2stats_to_h5mu/main.nf | 2 +- .../from_cosmx_to_h5mu/.config.vsh.yaml | 2 +- .../convert/from_cosmx_to_h5mu/main.nf | 2 +- .../.config.vsh.yaml | 2 +- .../from_cosmx_to_spatialexperiment/main.nf | 2 +- .../.config.vsh.yaml | 255 ++ .../from_h5mu_to_spatialexperiment/main.nf | 4036 +++++++++++++++++ .../nextflow.config | 126 + .../nextflow_labels.config | 68 + .../nextflow_schema.json | 60 + .../from_spatialdata_to_h5mu/.config.vsh.yaml | 2 +- .../convert/from_spatialdata_to_h5mu/main.nf | 2 +- .../from_xenium_to_h5mu/.config.vsh.yaml | 2 +- .../convert/from_xenium_to_h5mu/main.nf | 2 +- .../.config.vsh.yaml | 2 +- .../from_xenium_to_spatialdata/main.nf | 2 +- .../.config.vsh.yaml | 2 +- .../from_xenium_to_spatialexperiment/main.nf | 2 +- .../filter/subset_cosmx/.config.vsh.yaml | 2 +- target/nextflow/filter/subset_cosmx/main.nf | 2 +- .../spaceranger_count/.config.vsh.yaml | 2 +- .../mapping/spaceranger_count/main.nf | 2 +- .../spatial_process_samples/.config.vsh.yaml | 2 +- .../spatial_process_samples/main.nf | 2 +- .../workflows/qc/spatial_qc/.config.vsh.yaml | 2 +- .../nextflow/workflows/qc/spatial_qc/main.nf | 2 +- 52 files changed, 6860 insertions(+), 49 deletions(-) create mode 100644 src/convert/from_h5mu_to_spatialexperiment/config.vsh.yaml create mode 100644 src/convert/from_h5mu_to_spatialexperiment/script.R create mode 100644 src/convert/from_h5mu_to_spatialexperiment/test.R create mode 100644 target/executable/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml create mode 100755 target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment create mode 100644 target/executable/convert/from_h5mu_to_spatialexperiment/nextflow_labels.config create mode 100644 target/nextflow/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml create mode 100644 target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf create mode 100644 target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow.config create mode 100644 target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow_labels.config create mode 100644 target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow_schema.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 130d5c4..5bf9cb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,3 +17,5 @@ * `workflows/qc/qc`: Added a pipeline for calculating qc metrics of spatial omics samples (PR #5). * `workflows/multiomics/spatial_process_samples`: Added a pipeline to pre-process multiple spatial omics samples (PR #7). + +* `convert/from_h5mu_to_spatialexperiment`: Added converter component for H5MU data to SpatialExperiment objects (PR #15). diff --git a/src/convert/from_h5mu_to_spatialexperiment/config.vsh.yaml b/src/convert/from_h5mu_to_spatialexperiment/config.vsh.yaml new file mode 100644 index 0000000..3d88cae --- /dev/null +++ b/src/convert/from_h5mu_to_spatialexperiment/config.vsh.yaml @@ -0,0 +1,75 @@ +name: "from_h5mu_to_spatialexperiment" +namespace: "convert" +scope: "public" +description: | + Converts an h5mu file into a SpatialExperiment object. +authors: + - __merge__: /src/authors/dorien_roosen.yaml + roles: [ author ] +arguments: + - name: "--input" + alternatives: ["-i"] + type: file + description: Input h5mu file + direction: input + required: true + example: input.h5mu + - name: "--modality" + type: string + required: true + default: "rna" + description: Name of the modality to be converted. + - name: "--obsm_spatial_coordinates" + type: string + required: false + description: | + Key in the .obsm field that contains the spatial coordinates. + Will be mapped to spatialCoords in the SpatialExperiment object. + - name: "--output" + alternatives: ["-o"] + type: file + description: Output SpatialExperiment file + direction: output + required: true + example: output.rds +resources: + - type: r_script + path: script.R +test_resources: + - type: r_script + path: test.R + - path: /resources_test/aviti/aviti_teton_tiny.h5mu + - path: /resources_test/cosmx/Lung5_Rep2_tiny.h5mu + - path: /resources_test/xenium/xenium_tiny.h5mu + +engines: + - type: docker + image: rocker/r2u:22.04 + setup: + - type: apt + packages: + - libhdf5-dev + - libgeos-dev + - type: r + cran: [ hdf5r, SpatialExperiment ] + github: scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a + test_setup: + - type: docker + env: + - RETICULATE_PYTHON=/usr/bin/python + - type: apt + packages: + - python3 + - python3-pip + - python3-dev + - python-is-python3 + - type: r + cran: [ reticulate, testthat ] + - type: python + __merge__: /src/base/requirements/anndata_mudata.yaml + +runners: + - type: executable + - type: nextflow + directives: + label: [lowmem, singlecpu] \ No newline at end of file diff --git a/src/convert/from_h5mu_to_spatialexperiment/script.R b/src/convert/from_h5mu_to_spatialexperiment/script.R new file mode 100644 index 0000000..d3400f2 --- /dev/null +++ b/src/convert/from_h5mu_to_spatialexperiment/script.R @@ -0,0 +1,113 @@ +library(SpatialExperiment) +library(SingleCellExperiment) +library(hdf5r) +library(Matrix) +library(hdf5r) + +## VIASH START +par <- list( + input = "resources_test/xenium/xenium_tiny.h5mu", + output = "xenium_test.rds", + modality = "rna", + obsm_spatial_coordinates = "spatial" +) +## VIASH END + + +h5mu_to_h5ad <- function(h5mu_path, modality_name) { + tmp_path <- tempfile(fileext = ".h5ad") + mod_location <- paste("mod", modality_name, sep = "/") + h5src <- hdf5r::H5File$new(h5mu_path, "r") + h5dest <- hdf5r::H5File$new(tmp_path, "w") + # Copy over the child objects and the child attributes from root + # Root cannot be copied directly because it always exists and + # copying does not allow overwriting. + children <- hdf5r::list.objects(h5src, + path = mod_location, + full.names = FALSE, recursive = FALSE + ) + for (child in children) { + h5dest$obj_copy_from( + h5src, paste(mod_location, child, sep = "/"), + paste0("/", child) + ) + } + # Also copy the root attributes + root_attrs <- hdf5r::h5attr_names(x = h5src) + for (attr in root_attrs) { + h5a <- h5src$attr_open(attr_name = attr) + robj <- h5a$read() + h5dest$create_attr_by_name( + attr_name = attr, + obj_name = ".", + robj = robj, + space = h5a$get_space(), + dtype = h5a$get_type() + ) + } + h5src$close() + h5dest$close() + + tmp_path +} + +read_spatial_coordinates <- function(sce, spatial_coordinates_name) { + # Check if the specified spatial coordinates exist in reducedDims + reduced_dims <- SingleCellExperiment::reducedDims(sce) + if (par$obsm_spatial_coordinates %in% names(reduced_dims)) { + spatial_coords <- reduced_dims[[par$obsm_spatial_coordinates]] + if (ncol(spatial_coords) != 2) { + stop( + "Spatial coordinates must have 2 columns, but found ", + ncol(spatial_coords), " columns" + ) + } + # Set proper column names for spatial coordinates + colnames(spatial_coords) <- c("x", "y") + } else { + warning( + "Spatial coordinates '", par$obsm_spatial_coordinates, + "' not found in reducedDims. Available dimensions: ", + paste(names(reduced_dims), collapse = ", ") + ) + spatial_coords <- NULL + } + spatial_coords +} + +main <- function() { + # Convert to AnnData + cat("Converting H5MU file to H5AD...\n") + h5file <- h5mu_to_h5ad(par$input, par$modality) + + # Convert to SpatialExperiment + cat("Converting to SingleCellExperiment...\n") + sce <- anndataR::read_h5ad(h5file, as = "SingleCellExperiment") + + # Extract spatial coordinates if specified + if ( + !is.null(par$obsm_spatial_coordinates) && + length(par$obsm_spatial_coordinates) > 0 + ) { + cat("Reading in spatial coordinates...\n") + spatial_coords <- read_spatial_coordinates( + sce, par$obsm_spatial_coordinates + ) + SingleCellExperiment::reducedDims(sce)[[ + par$obsm_spatial_coordinates + ]] <- NULL + } else { + spatial_coords <- NULL + } + + # Converting SingleCellExperiment to SpatialExperiment + cat("Converting to SpatialExperiment...\n") + spe <- as(sce, "SpatialExperiment") + SpatialExperiment::spatialCoords(spe) <- spatial_coords + + # Saving SpatialExperiment object + cat("Saving SpatialExperiment object to:", par$output, "\n") + saveRDS(spe, file = par$output, compress = FALSE) +} + +main() diff --git a/src/convert/from_h5mu_to_spatialexperiment/test.R b/src/convert/from_h5mu_to_spatialexperiment/test.R new file mode 100644 index 0000000..4ebeeca --- /dev/null +++ b/src/convert/from_h5mu_to_spatialexperiment/test.R @@ -0,0 +1,475 @@ +library(testthat) +library(SpatialExperiment) +library(SingleCellExperiment) +library(hdf5r) +library(Matrix) +library(reticulate) + +mu <- reticulate::import("mudata") +ad <- reticulate::import("anndata") + +## VIASH START +meta <- list( + resources_dir = "resources_test" +) +## VIASH END + +# Helper function to create mock H5MU test data +create_mock_h5mu <- function(path) { + n_obs <- 5 + n_var_mod1 <- 4 + n_var_mod2 <- 3 + + # ============== MOD1 MODALITY ============== + + mod1_x_data <- matrix(c( + 1, 2, 3, 0, + 4, 5, 6, 2, + 0, 1, 2, 3, + 2, 0, 1, 4, + 1, 3, 0, 2 + ), nrow = n_obs, ncol = n_var_mod1, byrow = TRUE) + + + # Create obs dataframe + mod1_obs <- data.frame( + Obs1 = c("A", "B", "A", "C", "B"), + Obs2 = c(0.9, 0.8, 0.95, 0.7, 0.85), + Obs3 = c(FALSE, FALSE, TRUE, FALSE, FALSE), + row.names = paste0("cell_", 1:n_obs), + stringsAsFactors = FALSE + ) + # Create var dataframe + mod1_var <- data.frame( + Feat1 = c("A", "B", "C", "D"), + Feat2 = c(TRUE, FALSE, TRUE, FALSE), + Feat3 = c(1.6, 2.2, 1.2, 1.8), + row.names = paste0("gene_", 1:n_var_mod1), + stringsAsFactors = FALSE + ) + + # Create layers + mod1_layers <- list( + counts = mod1_x_data * 2 + ) + + # Create obsm + obsm_1 <- matrix(c( + 100.5, 200.3, + 150.2, 180.7, + 120.8, 220.1, + 180.4, 160.9, + 200.1, 190.5 + ), nrow = n_obs, ncol = 2, byrow = TRUE) + + obsm_2 <- matrix(c( + -1.2, 0.8, 0.3, + 1.1, -0.5, -0.2, + 0.3, 1.2, 0.7, + -0.8, -0.3, 1.1, + 0.9, 0.2, -0.9 + ), nrow = n_obs, ncol = 3, byrow = TRUE) + + mod1_obsm <- list( + Obsm1 = obsm_1, + Obsm2 = obsm_2 + ) + + # Create uns (unstructured metadata) + mod1_uns <- list( + experiment_info = "metadata" + ) + + # Create AnnData object for mod1 using AnnDataR + ad_mod1 <- ad$AnnData( + X = mod1_x_data, + obs = mod1_obs, + var = mod1_var, + layers = mod1_layers, + obsm = mod1_obsm, + uns = mod1_uns + ) + + # ============== MOD2 MODALITY ============== + + # Create expression matrix + mod2_x_data <- matrix(c( + 10, 20, 15, + 25, 30, 18, + 12, 22, 20, + 18, 25, 12, + 20, 28, 16 + ), nrow = n_obs, ncol = n_var_mod2, byrow = TRUE) + + # Create obs dataframe + mod2_obs <- data.frame( + Obs = c("C", "D", "C", "E", "D"), + row.names = paste0("cell_", 1:n_obs), + stringsAsFactors = FALSE + ) + + # Create var dataframe + mod2_var <- data.frame( + Feat = c("d", "e", "g"), + row.names = paste0("protein_", 1:n_var_mod2), + stringsAsFactors = FALSE + ) + + # Create AnnData object for mod2 + ad_mod2 <- ad$AnnData( + X = mod2_x_data, + obs = mod2_obs, + var = mod2_var + ) + + # ============== CREATE MUDATA ============== + + # Create MuData object using reticulate + mdata <- mu$MuData(list( + mod1 = ad_mod1, + mod2 = ad_mod2 + )) + + # Write Mudata to path + mdata$write_h5mu(path) + path +} + +# Main test +test_simple_execution <- function() { + cat("> > Testing Simple Conversion\n") + cat("> Creating mock H5MU file\n") + + # Create mock H5MU file + test_h5mu <- tempfile(fileext = ".h5mu") + create_mock_h5mu(test_h5mu) + + # Output file + out_rds <- tempfile(fileext = ".rds") + + # Run conversion + cat("> Running conversion\n") + out <- processx::run( + meta[["executable"]], + c( + "--input", test_h5mu, + "--modality", "mod1", + "--output", out_rds, + "--obsm_spatial_coordinates", "Obsm1" + ) + ) + + cat("> Checking execution status\n") + testthat::expect_equal(out$status, 0) + testthat::expect_true(file.exists(out_rds)) + + cat("> Reading output file\n") + spe <- readRDS(file = out_rds) + testthat::expect_s4_class(spe, "SpatialExperiment") + + cat("> Opening input file for comparison\n") + mod1 <- mu$read_h5ad(test_h5mu, mod = "mod1") + + cat("> Testing dimensions\n") + dim_spe <- dim(spe) + dim_h5mu <- dim(mod1$X) + + testthat::expect_equal(dim_spe[1], dim_h5mu[2]) + testthat::expect_equal(dim_spe[2], dim_h5mu[1]) + testthat::expect_equal(nrow(spe), 4) + testthat::expect_equal(ncol(spe), 5) + + cat("> Testing colData (obs) transfer and data types\n") + col_data <- SummarizedExperiment::colData(spe) + coldata_cols <- colnames(col_data) + obs_cols <- colnames(mod1$obs) + testthat::expect_true(all(obs_cols %in% coldata_cols)) + + # Test data types in colData + testthat::expect_true(is.factor(col_data$Obs1)) + testthat::expect_true(is.numeric(col_data$Obs2)) + testthat::expect_true(is.logical(col_data$Obs3)) + + cat("> Testing rowData (var) transfer and data types\n") + row_data <- SummarizedExperiment::rowData(spe) + row_names <- colnames(row_data) + var_cols <- colnames(mod1$var) + testthat::expect_true(all(var_cols %in% row_names)) + + # Test data types in rowData + testthat::expect_true(is.character(row_data$Feat1)) + testthat::expect_true(is.logical(row_data$Feat2)) + testthat::expect_true(is.numeric(row_data$Feat3)) + + cat("> Testing spatialCoords\n") + spatial_coords <- SpatialExperiment::spatialCoords(spe) + testthat::expect_false(is.null(spatial_coords)) + testthat::expect_equal(ncol(spatial_coords), 2) + testthat::expect_equal(nrow(spatial_coords), ncol(spe)) + testthat::expect_identical(colnames(spatial_coords), c("x", "y")) + + # Test spatial coordinate data types and values + testthat::expect_true(is.numeric(spatial_coords[, "x"])) + testthat::expect_true(is.numeric(spatial_coords[, "y"])) + + # Compare with original spatial coordinates + original_spatial <- mod1$obsm[["Obsm1"]] + testthat::expect_equal( + as.numeric(original_spatial), + as.numeric(spatial_coords) + ) + + cat("> Testing assay data\n") + counts_matrix <- SummarizedExperiment::assays(spe)[["counts"]] + testthat::expect_true(is(counts_matrix, "Matrix") || is.matrix(counts_matrix)) + testthat::expect_true(all(counts_matrix >= 0)) + testthat::expect_equal(dim(counts_matrix), c(4, 5)) + + cat("> Testing reducedDims\n") + # PCA should not be in reducedDims since we only specified spatial + red_dims <- SingleCellExperiment::reducedDims(spe) + testthat::expect_false(is.null(red_dims)) + testthat::expect_equal(names(red_dims), c("Obsm2")) + testthat::expect_equal(dim(red_dims$Obsm2), c(5, 3)) + testthat::expect_true(is.numeric(red_dims$Obsm2)) + + # Compare with original spatial coordinates + original_dimred <- mod1$obsm[["Obsm2"]] + testthat::expect_equal( + as.numeric(red_dims$Obsm2), + as.numeric(original_dimred) + ) + + # Clean up + unlink(c(test_h5mu, out_rds)) +} + +test_xenium_execution <- function() { + cat("> > Testing Xenium Conversion\n") + xenium_h5mu <- paste0( + meta[["resources_dir"]], + "/xenium_tiny.h5mu" + ) + + # Output file + out_rds <- tempfile(fileext = ".rds") + + # Run conversion + cat("> Running conversion\n") + out <- processx::run( + meta[["executable"]], + c( + "--input", xenium_h5mu, + "--modality", "rna", + "--output", out_rds, + "--obsm_spatial_coordinates", "spatial" + ) + ) + + cat("> Checking execution status\n") + testthat::expect_equal(out$status, 0) + testthat::expect_true(file.exists(out_rds)) + + cat("> Reading output file\n") + xenium_spe <- readRDS(file = out_rds) + testthat::expect_s4_class(xenium_spe, "SpatialExperiment") + + cat("> Opening input file for comparison\n") + rna_mod <- mu$read_h5ad(xenium_h5mu, mod = "rna") + + cat("> Testing dimensions\n") + dim_spe <- dim(xenium_spe) + dim_h5mu <- dim(rna_mod$X) + + testthat::expect_equal(dim_spe[1], dim_h5mu[2]) + testthat::expect_equal(dim_spe[2], dim_h5mu[1]) + + cat("> Testing colData (obs) transfer and data types\n") + col_data <- SummarizedExperiment::colData(xenium_spe) + coldata_cols <- colnames(col_data) + obs_cols <- colnames(rna_mod$obs) + testthat::expect_true(all(obs_cols %in% coldata_cols)) + + cat("> Testing rowData (var) transfer and data types\n") + row_data <- SummarizedExperiment::rowData(xenium_spe) + row_names <- colnames(row_data) + var_cols <- colnames(rna_mod$var) + testthat::expect_true(all(var_cols %in% row_names)) + + cat("> Testing spatialCoords\n") + spatial_coords <- SpatialExperiment::spatialCoords(xenium_spe) + testthat::expect_false(is.null(spatial_coords)) + testthat::expect_equal(ncol(spatial_coords), 2) + testthat::expect_equal(nrow(spatial_coords), ncol(xenium_spe)) + testthat::expect_identical(colnames(spatial_coords), c("x", "y")) + + # Test spatial coordinate data types and values + testthat::expect_true(is.numeric(spatial_coords[, "x"])) + testthat::expect_true(is.numeric(spatial_coords[, "y"])) + + # Compare with original spatial coordinates + original_spatial <- rna_mod$obsm[["spatial"]] + testthat::expect_equal( + as.numeric(original_spatial), + as.numeric(spatial_coords) + ) + + # Clean up + unlink(c(xenium_h5mu, out_rds)) +} + +test_aviti_execution <- function() { + cat("> > Testing Aviti Conversion\n") + aviti_h5mu <- paste0( + meta[["resources_dir"]], + "/aviti_teton_tiny.h5mu" + ) + + # Output file + out_rds <- tempfile(fileext = ".rds") + + # Run conversion + cat("> Running conversion\n") + out <- processx::run( + meta[["executable"]], + c( + "--input", aviti_h5mu, + "--modality", "rna", + "--output", out_rds, + "--obsm_spatial_coordinates", "spatial" + ) + ) + + cat("> Checking execution status\n") + testthat::expect_equal(out$status, 0) + testthat::expect_true(file.exists(out_rds)) + + cat("> Reading output file\n") + aviti_spe <- readRDS(file = out_rds) + testthat::expect_s4_class(aviti_spe, "SpatialExperiment") + + cat("> Opening input file for comparison\n") + rna_mod <- mu$read_h5ad(aviti_h5mu, mod = "rna") + + cat("> Testing dimensions\n") + dim_spe <- dim(aviti_spe) + dim_h5mu <- dim(rna_mod$X) + + testthat::expect_equal(dim_spe[1], dim_h5mu[2]) + testthat::expect_equal(dim_spe[2], dim_h5mu[1]) + + cat("> Testing colData (obs) transfer and data types\n") + col_data <- SummarizedExperiment::colData(aviti_spe) + coldata_cols <- colnames(col_data) + obs_cols <- colnames(rna_mod$obs) + testthat::expect_true(all(obs_cols %in% coldata_cols)) + + cat("> Testing rowData (var) transfer and data types\n") + row_data <- SummarizedExperiment::rowData(aviti_spe) + row_names <- colnames(row_data) + var_cols <- colnames(rna_mod$var) + testthat::expect_true(all(var_cols %in% row_names)) + + cat("> Testing spatialCoords\n") + spatial_coords <- SpatialExperiment::spatialCoords(aviti_spe) + testthat::expect_false(is.null(spatial_coords)) + testthat::expect_equal(ncol(spatial_coords), 2) + testthat::expect_equal(nrow(spatial_coords), ncol(aviti_spe)) + testthat::expect_identical(colnames(spatial_coords), c("x", "y")) + + # Test spatial coordinate data types and values + testthat::expect_true(is.numeric(spatial_coords[, "x"])) + testthat::expect_true(is.numeric(spatial_coords[, "y"])) + + # Compare with original spatial coordinates + original_spatial <- rna_mod$obsm[["spatial"]] + testthat::expect_equal( + as.numeric(original_spatial), + as.numeric(spatial_coords) + ) + + # Clean up + unlink(c(aviti_h5mu, out_rds)) +} + +test_cosmx_execution <- function() { + cat("> > Testing CosMx Conversion\n") + cosmx_h5mu <- paste0( + meta[["resources_dir"]], + "/Lung5_Rep2_tiny.h5mu" + ) + + # Output file + out_rds <- tempfile(fileext = ".rds") + + # Run conversion + cat("> Running conversion\n") + out <- processx::run( + meta[["executable"]], + c( + "--input", cosmx_h5mu, + "--modality", "rna", + "--output", out_rds, + "--obsm_spatial_coordinates", "spatial" + ) + ) + + cat("> Checking execution status\n") + testthat::expect_equal(out$status, 0) + testthat::expect_true(file.exists(out_rds)) + + cat("> Reading output file\n") + cosmx_spe <- readRDS(file = out_rds) + testthat::expect_s4_class(cosmx_spe, "SpatialExperiment") + + cat("> Opening input file for comparison\n") + rna_mod <- mu$read_h5ad(cosmx_h5mu, mod = "rna") + + cat("> Testing dimensions\n") + dim_spe <- dim(cosmx_spe) + dim_h5mu <- dim(rna_mod$X) + + testthat::expect_equal(dim_spe[1], dim_h5mu[2]) + testthat::expect_equal(dim_spe[2], dim_h5mu[1]) + + cat("> Testing colData (obs) transfer and data types\n") + col_data <- SummarizedExperiment::colData(cosmx_spe) + coldata_cols <- colnames(col_data) + obs_cols <- colnames(rna_mod$obs) + testthat::expect_true(all(obs_cols %in% coldata_cols)) + + cat("> Testing rowData (var) transfer and data types\n") + row_data <- SummarizedExperiment::rowData(cosmx_spe) + row_names <- colnames(row_data) + var_cols <- colnames(rna_mod$var) + testthat::expect_true(all(var_cols %in% row_names)) + + cat("> Testing spatialCoords\n") + spatial_coords <- SpatialExperiment::spatialCoords(cosmx_spe) + testthat::expect_false(is.null(spatial_coords)) + testthat::expect_equal(ncol(spatial_coords), 2) + testthat::expect_equal(nrow(spatial_coords), ncol(cosmx_spe)) + testthat::expect_identical(colnames(spatial_coords), c("x", "y")) + + # Test spatial coordinate data types and values + testthat::expect_true(is.numeric(spatial_coords[, "x"])) + testthat::expect_true(is.numeric(spatial_coords[, "y"])) + + # Compare with original spatial coordinates + original_spatial <- rna_mod$obsm[["spatial"]] + testthat::expect_equal( + as.numeric(original_spatial), + as.numeric(spatial_coords) + ) + + # Clean up + unlink(c(cosmx_h5mu, out_rds)) +} + +cat("Starting tests...") +test_simple_execution() +test_xenium_execution() +test_aviti_execution() +test_cosmx_execution() + +cat("All tests completed!\n") diff --git a/target/executable/convert/from_cells2stats_to_h5mu/.config.vsh.yaml b/target/executable/convert/from_cells2stats_to_h5mu/.config.vsh.yaml index 111f86c..dc5ac3e 100644 --- a/target/executable/convert/from_cells2stats_to_h5mu/.config.vsh.yaml +++ b/target/executable/convert/from_cells2stats_to_h5mu/.config.vsh.yaml @@ -290,7 +290,7 @@ build_info: output: "target/executable/convert/from_cells2stats_to_h5mu" executable: "target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu b/target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu index df0115f..3afc809 100755 --- a/target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu +++ b/target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu @@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_cells2stats_to_h5mu" -LABEL org.opencontainers.image.created="2025-08-22T08:22:46Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:50Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/executable/convert/from_cosmx_to_h5mu/.config.vsh.yaml b/target/executable/convert/from_cosmx_to_h5mu/.config.vsh.yaml index 98cfee3..7130c77 100644 --- a/target/executable/convert/from_cosmx_to_h5mu/.config.vsh.yaml +++ b/target/executable/convert/from_cosmx_to_h5mu/.config.vsh.yaml @@ -225,7 +225,7 @@ build_info: output: "target/executable/convert/from_cosmx_to_h5mu" executable: "target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu b/target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu index 6aef839..36d2578 100755 --- a/target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu +++ b/target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu @@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component convert from_cosmx_to_h5mu" -LABEL org.opencontainers.image.created="2025-08-22T08:22:48Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:52Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/executable/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml b/target/executable/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml index 3bd4365..964d2b5 100644 --- a/target/executable/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml +++ b/target/executable/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml @@ -232,7 +232,7 @@ build_info: output: "target/executable/convert/from_cosmx_to_spatialexperiment" executable: "target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment b/target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment index 5c53f8e..a976908 100755 --- a/target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment +++ b/target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment @@ -457,9 +457,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_cosmx_to_spatialexperiment" -LABEL org.opencontainers.image.created="2025-08-22T08:22:47Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:51Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/executable/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml b/target/executable/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml new file mode 100644 index 0000000..b0a1676 --- /dev/null +++ b/target/executable/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml @@ -0,0 +1,255 @@ +name: "from_h5mu_to_spatialexperiment" +namespace: "convert" +version: "build_main" +authors: +- name: "Dorien Roosen" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Arguments" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--modality" + description: "Name of the modality to be converted." + info: null + default: + - "rna" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--obsm_spatial_coordinates" + description: "Key in the .obsm field that contains the spatial coordinates. \n\ + Will be mapped to spatialCoords in the SpatialExperiment object.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output SpatialExperiment file" + info: null + example: + - "output.rds" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "r_script" + path: "script.R" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Converts an h5mu file into a SpatialExperiment object.\n" +test_resources: +- type: "r_script" + path: "test.R" + is_executable: true +- type: "file" + path: "aviti_teton_tiny.h5mu" +- type: "file" + path: "Lung5_Rep2_tiny.h5mu" +- type: "file" + path: "xenium_tiny.h5mu" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +repositories: +- type: "github" + name: "openpipeline" + repo: "openpipelines-bio/openpipeline" + tag: "2.1.2" +links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "rocker/r2u:22.04" + target_registry: "images.viash-hub.com" + target_tag: "build_main" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "libgeos-dev" + interactive: false + - type: "r" + cran: + - "hdf5r" + - "SpatialExperiment" + github: + - "scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a" + bioc_force_install: false + warnings_as_errors: true + test_setup: + - type: "docker" + env: + - "RETICULATE_PYTHON=/usr/bin/python" + - type: "apt" + packages: + - "python3" + - "python3-pip" + - "python3-dev" + - "python-is-python3" + interactive: false + - type: "r" + cran: + - "reticulate" + - "testthat" + bioc_force_install: false + warnings_as_errors: true + - type: "python" + user: false + packages: + - "anndata~=0.11.1" + - "mudata~=0.3.1" + script: + - "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\ + nelse: exit(1)\")" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/convert/from_h5mu_to_spatialexperiment/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/convert/from_h5mu_to_spatialexperiment" + executable: "target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment" + viash_version: "0.9.4" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" + git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" +package_config: + name: "openpipeline_spatial" + version: "build_main" + info: + test_resources: + - type: "s3" + path: "s3://openpipelines-bio/openpipeline_spatial/resources_test" + dest: "resources_test" + repositories: + - type: "github" + name: "openpipeline" + repo: "openpipelines-bio/openpipeline" + tag: "2.1.2" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'build_main'" + organization: "vsh" + links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" diff --git a/target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment b/target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment new file mode 100755 index 0000000..80717ef --- /dev/null +++ b/target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment @@ -0,0 +1,1278 @@ +#!/usr/bin/env bash + +# from_h5mu_to_spatialexperiment build_main +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dorien Roosen (author) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="from_h5mu_to_spatialexperiment" +VIASH_META_FUNCTIONALITY_NAME="from_h5mu_to_spatialexperiment" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM rocker/r2u:22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libhdf5-dev libgeos-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' && \ + Rscript -e 'options(warn = 2); remotes::install_cran(c("hdf5r", "SpatialExperiment"), repos = "https://cran.rstudio.com")' && \ + Rscript -e 'options(warn = 2); remotes::install_github(c("scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a"), repos = "https://cran.rstudio.com")' + +LABEL org.opencontainers.image.authors="Dorien Roosen" +LABEL org.opencontainers.image.description="Companion container for running component convert from_h5mu_to_spatialexperiment" +LABEL org.opencontainers.image.created="2025-08-22T14:30:51Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" +LABEL org.opencontainers.image.version="build_main" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "from_h5mu_to_spatialexperiment build_main" + echo "" + echo "Converts an h5mu file into a SpatialExperiment object." + echo "" + echo "Arguments:" + echo " -i, --input" + echo " type: file, required parameter, file must exist" + echo " example: input.h5mu" + echo " Input h5mu file" + echo "" + echo " --modality" + echo " type: string, required parameter" + echo " default: rna" + echo " Name of the modality to be converted." + echo "" + echo " --obsm_spatial_coordinates" + echo " type: string" + echo " Key in the .obsm field that contains the spatial coordinates." + echo " Will be mapped to spatialCoords in the SpatialExperiment object." + echo "" + echo " -o, --output" + echo " type: file, required parameter, output, file must exist" + echo " example: output.rds" + echo " Output SpatialExperiment file" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "from_h5mu_to_spatialexperiment build_main" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -i) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'-i\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -i. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --modality. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --modality=*) + [ -n "$VIASH_PAR_MODALITY" ] && ViashError Bad arguments for option \'--modality=*\': \'$VIASH_PAR_MODALITY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODALITY=$(ViashRemoveFlags "$1") + shift 1 + ;; + --obsm_spatial_coordinates) + [ -n "$VIASH_PAR_OBSM_SPATIAL_COORDINATES" ] && ViashError Bad arguments for option \'--obsm_spatial_coordinates\': \'$VIASH_PAR_OBSM_SPATIAL_COORDINATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_SPATIAL_COORDINATES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --obsm_spatial_coordinates. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --obsm_spatial_coordinates=*) + [ -n "$VIASH_PAR_OBSM_SPATIAL_COORDINATES" ] && ViashError Bad arguments for option \'--obsm_spatial_coordinates=*\': \'$VIASH_PAR_OBSM_SPATIAL_COORDINATES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OBSM_SPATIAL_COORDINATES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + -o) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'-o\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to -o. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/openpipeline_spatial/convert/from_h5mu_to_spatialexperiment:build_main' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_MODALITY+x} ]; then + ViashError '--modality' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-from_h5mu_to_spatialexperiment-XXXXXX").R +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +library(SpatialExperiment) +library(SingleCellExperiment) +library(hdf5r) +library(Matrix) +library(hdf5r) + +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "modality" = $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_MODALITY" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "obsm_spatial_coordinates" = $( if [ ! -z ${VIASH_PAR_OBSM_SPATIAL_COORDINATES+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OBSM_SPATIAL_COORDINATES" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "name" = $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\]#\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kib" = $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mib" = $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gib" = $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tib" = $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pib" = $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PIB" | sed "s#['\\]#\\\\&#g"; echo "')"; else echo NULL; fi ) +) +dep <- list( + +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + + +h5mu_to_h5ad <- function(h5mu_path, modality_name) { + tmp_path <- tempfile(fileext = ".h5ad") + mod_location <- paste("mod", modality_name, sep = "/") + h5src <- hdf5r::H5File\$new(h5mu_path, "r") + h5dest <- hdf5r::H5File\$new(tmp_path, "w") + # Copy over the child objects and the child attributes from root + # Root cannot be copied directly because it always exists and + # copying does not allow overwriting. + children <- hdf5r::list.objects(h5src, + path = mod_location, + full.names = FALSE, recursive = FALSE + ) + for (child in children) { + h5dest\$obj_copy_from( + h5src, paste(mod_location, child, sep = "/"), + paste0("/", child) + ) + } + # Also copy the root attributes + root_attrs <- hdf5r::h5attr_names(x = h5src) + for (attr in root_attrs) { + h5a <- h5src\$attr_open(attr_name = attr) + robj <- h5a\$read() + h5dest\$create_attr_by_name( + attr_name = attr, + obj_name = ".", + robj = robj, + space = h5a\$get_space(), + dtype = h5a\$get_type() + ) + } + h5src\$close() + h5dest\$close() + + tmp_path +} + +read_spatial_coordinates <- function(sce, spatial_coordinates_name) { + # Check if the specified spatial coordinates exist in reducedDims + reduced_dims <- SingleCellExperiment::reducedDims(sce) + if (par\$obsm_spatial_coordinates %in% names(reduced_dims)) { + spatial_coords <- reduced_dims[[par\$obsm_spatial_coordinates]] + if (ncol(spatial_coords) != 2) { + stop( + "Spatial coordinates must have 2 columns, but found ", + ncol(spatial_coords), " columns" + ) + } + # Set proper column names for spatial coordinates + colnames(spatial_coords) <- c("x", "y") + } else { + warning( + "Spatial coordinates '", par\$obsm_spatial_coordinates, + "' not found in reducedDims. Available dimensions: ", + paste(names(reduced_dims), collapse = ", ") + ) + spatial_coords <- NULL + } + spatial_coords +} + +main <- function() { + # Convert to AnnData + cat("Converting H5MU file to H5AD...\\n") + h5file <- h5mu_to_h5ad(par\$input, par\$modality) + + # Convert to SpatialExperiment + cat("Converting to SingleCellExperiment...\\n") + sce <- anndataR::read_h5ad(h5file, as = "SingleCellExperiment") + + # Extract spatial coordinates if specified + if ( + !is.null(par\$obsm_spatial_coordinates) && + length(par\$obsm_spatial_coordinates) > 0 + ) { + cat("Reading in spatial coordinates...\\n") + spatial_coords <- read_spatial_coordinates( + sce, par\$obsm_spatial_coordinates + ) + SingleCellExperiment::reducedDims(sce)[[ + par\$obsm_spatial_coordinates + ]] <- NULL + } else { + spatial_coords <- NULL + } + + # Converting SingleCellExperiment to SpatialExperiment + cat("Converting to SpatialExperiment...\\n") + spe <- as(sce, "SpatialExperiment") + SpatialExperiment::spatialCoords(spe) <- spatial_coords + + # Saving SpatialExperiment object + cat("Saving SpatialExperiment object to:", par\$output, "\\n") + saveRDS(spe, file = par\$output, compress = FALSE) +} + +main() +VIASHMAIN +Rscript "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/convert/from_h5mu_to_spatialexperiment/nextflow_labels.config b/target/executable/convert/from_h5mu_to_spatialexperiment/nextflow_labels.config new file mode 100644 index 0000000..541aaad --- /dev/null +++ b/target/executable/convert/from_h5mu_to_spatialexperiment/nextflow_labels.config @@ -0,0 +1,68 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // CPU resources + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 10 } + withLabel: highcpu { cpus = 20 } + + // Memory resources + withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } } + + // Disk space + // Nextflow apparently can't handle empty directives, i.e. + // withLabel: lowdisk {} + // so for that reason we have to add a dummy directive + withLabel: lowdisk { + dummyDirective = "dummyValue" + } + withLabel: middisk { + dummyDirective = "dummyValue" + } + withLabel: highdisk { + dummyDirective = "dummyValue" + } + withLabel: veryhighdisk { + dummyDirective = "dummyValue" + } + // NOTE: The above labels intentionally do not have an effect by default. + // The user should set the disk space requirements by adding the following + // to the compute environment: + // + // withLabel: lowdisk { disk = { 20.GB * task.attempt } } + // withLabel: middisk { disk = { 100.GB * task.attempt } } + // withLabel: highdisk { disk = { 200.GB * task.attempt } } + // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} diff --git a/target/executable/convert/from_spatialdata_to_h5mu/.config.vsh.yaml b/target/executable/convert/from_spatialdata_to_h5mu/.config.vsh.yaml index ee5d6d0..17ae3bb 100644 --- a/target/executable/convert/from_spatialdata_to_h5mu/.config.vsh.yaml +++ b/target/executable/convert/from_spatialdata_to_h5mu/.config.vsh.yaml @@ -220,7 +220,7 @@ build_info: output: "target/executable/convert/from_spatialdata_to_h5mu" executable: "target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu b/target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu index 1312c70..46162a9 100755 --- a/target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu +++ b/target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu @@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component convert from_spatialdata_to_h5mu" -LABEL org.opencontainers.image.created="2025-08-22T08:22:47Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:51Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/executable/convert/from_xenium_to_h5mu/.config.vsh.yaml b/target/executable/convert/from_xenium_to_h5mu/.config.vsh.yaml index f806f1c..9e09f2c 100644 --- a/target/executable/convert/from_xenium_to_h5mu/.config.vsh.yaml +++ b/target/executable/convert/from_xenium_to_h5mu/.config.vsh.yaml @@ -233,7 +233,7 @@ build_info: output: "target/executable/convert/from_xenium_to_h5mu" executable: "target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu b/target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu index fc84cca..a54982e 100755 --- a/target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu +++ b/target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu @@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_h5mu" -LABEL org.opencontainers.image.created="2025-08-22T08:22:47Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:51Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/executable/convert/from_xenium_to_spatialdata/.config.vsh.yaml b/target/executable/convert/from_xenium_to_spatialdata/.config.vsh.yaml index 447b8f0..394704f 100644 --- a/target/executable/convert/from_xenium_to_spatialdata/.config.vsh.yaml +++ b/target/executable/convert/from_xenium_to_spatialdata/.config.vsh.yaml @@ -314,7 +314,7 @@ build_info: output: "target/executable/convert/from_xenium_to_spatialdata" executable: "target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata b/target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata index 5e1667a..8d6505f 100755 --- a/target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata +++ b/target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata @@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_spatialdata" -LABEL org.opencontainers.image.created="2025-08-22T08:22:47Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:51Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/executable/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml b/target/executable/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml index 483d6ef..18066e0 100644 --- a/target/executable/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml +++ b/target/executable/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml @@ -222,7 +222,7 @@ build_info: output: "target/executable/convert/from_xenium_to_spatialexperiment" executable: "target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment b/target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment index 4d639e1..d1b47bc 100755 --- a/target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment +++ b/target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment @@ -457,9 +457,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_spatialexperiment" -LABEL org.opencontainers.image.created="2025-08-22T08:22:47Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:51Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/executable/filter/subset_cosmx/.config.vsh.yaml b/target/executable/filter/subset_cosmx/.config.vsh.yaml index f5badef..8351266 100644 --- a/target/executable/filter/subset_cosmx/.config.vsh.yaml +++ b/target/executable/filter/subset_cosmx/.config.vsh.yaml @@ -227,7 +227,7 @@ build_info: output: "target/executable/filter/subset_cosmx" executable: "target/executable/filter/subset_cosmx/subset_cosmx" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/filter/subset_cosmx/subset_cosmx b/target/executable/filter/subset_cosmx/subset_cosmx index db0f380..79f5ab4 100755 --- a/target/executable/filter/subset_cosmx/subset_cosmx +++ b/target/executable/filter/subset_cosmx/subset_cosmx @@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component filter subset_cosmx" -LABEL org.opencontainers.image.created="2025-08-22T08:22:46Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:50Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/executable/mapping/spaceranger_count/.config.vsh.yaml b/target/executable/mapping/spaceranger_count/.config.vsh.yaml index 3613d33..244af0d 100644 --- a/target/executable/mapping/spaceranger_count/.config.vsh.yaml +++ b/target/executable/mapping/spaceranger_count/.config.vsh.yaml @@ -426,7 +426,7 @@ build_info: output: "target/executable/mapping/spaceranger_count" executable: "target/executable/mapping/spaceranger_count/spaceranger_count" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/mapping/spaceranger_count/spaceranger_count b/target/executable/mapping/spaceranger_count/spaceranger_count index e640da3..86f5452 100755 --- a/target/executable/mapping/spaceranger_count/spaceranger_count +++ b/target/executable/mapping/spaceranger_count/spaceranger_count @@ -453,9 +453,9 @@ apt upgrade -y && apt install -y procps && rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.authors="Jakub Majercik" LABEL org.opencontainers.image.description="Companion container for running component mapping spaceranger_count" -LABEL org.opencontainers.image.created="2025-08-22T08:22:48Z" +LABEL org.opencontainers.image.created="2025-08-22T14:30:52Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="6f308e6a3cca52f1283fbba734a3cdc858e18e1b" +LABEL org.opencontainers.image.revision="798a0cb2692eaac648662732a05bb48f951f36a0" LABEL org.opencontainers.image.version="build_main" VIASHDOCKER diff --git a/target/nextflow/convert/from_cells2stats_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_cells2stats_to_h5mu/.config.vsh.yaml index e43b9a5..2e12158 100644 --- a/target/nextflow/convert/from_cells2stats_to_h5mu/.config.vsh.yaml +++ b/target/nextflow/convert/from_cells2stats_to_h5mu/.config.vsh.yaml @@ -290,7 +290,7 @@ build_info: output: "target/nextflow/convert/from_cells2stats_to_h5mu" executable: "target/nextflow/convert/from_cells2stats_to_h5mu/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_cells2stats_to_h5mu/main.nf b/target/nextflow/convert/from_cells2stats_to_h5mu/main.nf index a6523e3..17590c3 100644 --- a/target/nextflow/convert/from_cells2stats_to_h5mu/main.nf +++ b/target/nextflow/convert/from_cells2stats_to_h5mu/main.nf @@ -3382,7 +3382,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_cells2stats_to_h5mu", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_cosmx_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_cosmx_to_h5mu/.config.vsh.yaml index aa45696..ce16c7b 100644 --- a/target/nextflow/convert/from_cosmx_to_h5mu/.config.vsh.yaml +++ b/target/nextflow/convert/from_cosmx_to_h5mu/.config.vsh.yaml @@ -225,7 +225,7 @@ build_info: output: "target/nextflow/convert/from_cosmx_to_h5mu" executable: "target/nextflow/convert/from_cosmx_to_h5mu/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_cosmx_to_h5mu/main.nf b/target/nextflow/convert/from_cosmx_to_h5mu/main.nf index 8b19a8f..9d4de5b 100644 --- a/target/nextflow/convert/from_cosmx_to_h5mu/main.nf +++ b/target/nextflow/convert/from_cosmx_to_h5mu/main.nf @@ -3331,7 +3331,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_cosmx_to_h5mu", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml b/target/nextflow/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml index 1be0996..99d37d9 100644 --- a/target/nextflow/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml +++ b/target/nextflow/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml @@ -232,7 +232,7 @@ build_info: output: "target/nextflow/convert/from_cosmx_to_spatialexperiment" executable: "target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf b/target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf index b4b8968..0c05d2d 100644 --- a/target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf +++ b/target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf @@ -3322,7 +3322,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_cosmx_to_spatialexperiment", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml b/target/nextflow/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml new file mode 100644 index 0000000..5932dc1 --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml @@ -0,0 +1,255 @@ +name: "from_h5mu_to_spatialexperiment" +namespace: "convert" +version: "build_main" +authors: +- name: "Dorien Roosen" + roles: + - "author" + info: + role: "Core Team Member" + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Arguments" + arguments: + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Input h5mu file" + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--modality" + description: "Name of the modality to be converted." + info: null + default: + - "rna" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--obsm_spatial_coordinates" + description: "Key in the .obsm field that contains the spatial coordinates. \n\ + Will be mapped to spatialCoords in the SpatialExperiment object.\n" + info: null + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output" + alternatives: + - "-o" + description: "Output SpatialExperiment file" + info: null + example: + - "output.rds" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "r_script" + path: "script.R" + is_executable: true +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Converts an h5mu file into a SpatialExperiment object.\n" +test_resources: +- type: "r_script" + path: "test.R" + is_executable: true +- type: "file" + path: "aviti_teton_tiny.h5mu" +- type: "file" + path: "Lung5_Rep2_tiny.h5mu" +- type: "file" + path: "xenium_tiny.h5mu" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +repositories: +- type: "github" + name: "openpipeline" + repo: "openpipelines-bio/openpipeline" + tag: "2.1.2" +links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowmem" + - "singlecpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "rocker/r2u:22.04" + target_registry: "images.viash-hub.com" + target_tag: "build_main" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "libgeos-dev" + interactive: false + - type: "r" + cran: + - "hdf5r" + - "SpatialExperiment" + github: + - "scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a" + bioc_force_install: false + warnings_as_errors: true + test_setup: + - type: "docker" + env: + - "RETICULATE_PYTHON=/usr/bin/python" + - type: "apt" + packages: + - "python3" + - "python3-pip" + - "python3-dev" + - "python-is-python3" + interactive: false + - type: "r" + cran: + - "reticulate" + - "testthat" + bioc_force_install: false + warnings_as_errors: true + - type: "python" + user: false + packages: + - "anndata~=0.11.1" + - "mudata~=0.3.1" + script: + - "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\ + nelse: exit(1)\")" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/convert/from_h5mu_to_spatialexperiment/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/convert/from_h5mu_to_spatialexperiment" + executable: "target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf" + viash_version: "0.9.4" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" + git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" +package_config: + name: "openpipeline_spatial" + version: "build_main" + info: + test_resources: + - type: "s3" + path: "s3://openpipelines-bio/openpipeline_spatial/resources_test" + dest: "resources_test" + repositories: + - type: "github" + name: "openpipeline" + repo: "openpipelines-bio/openpipeline" + tag: "2.1.2" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'build_main'" + organization: "vsh" + links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" diff --git a/target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf b/target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf new file mode 100644 index 0000000..dc2af37 --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf @@ -0,0 +1,4036 @@ +// from_h5mu_to_spatialexperiment build_main +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dorien Roosen (author) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "from_h5mu_to_spatialexperiment", + "namespace" : "convert", + "version" : "build_main", + "authors" : [ + { + "name" : "Dorien Roosen", + "roles" : [ + "author" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Arguments", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Input h5mu file", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "Name of the modality to be converted.", + "default" : [ + "rna" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--obsm_spatial_coordinates", + "description" : "Key in the .obsm field that contains the spatial coordinates. \nWill be mapped to spatialCoords in the SpatialExperiment object.\n", + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output", + "alternatives" : [ + "-o" + ], + "description" : "Output SpatialExperiment file", + "example" : [ + "output.rds" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "r_script", + "path" : "script.R", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Converts an h5mu file into a SpatialExperiment object.\n", + "test_resources" : [ + { + "type" : "r_script", + "path" : "test.R", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/resources_test/aviti/aviti_teton_tiny.h5mu" + }, + { + "type" : "file", + "path" : "/resources_test/cosmx/Lung5_Rep2_tiny.h5mu" + }, + { + "type" : "file", + "path" : "/resources_test/xenium/xenium_tiny.h5mu" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "repositories" : [ + { + "type" : "github", + "name" : "openpipeline", + "repo" : "openpipelines-bio/openpipeline", + "tag" : "2.1.2" + } + ], + "links" : { + "repository" : "https://github.com/openpipelines-bio/openpipeline_spatial", + "docker_registry" : "ghcr.io" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "rocker/r2u:22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "build_main", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "libhdf5-dev", + "libgeos-dev" + ], + "interactive" : false + }, + { + "type" : "r", + "cran" : [ + "hdf5r", + "SpatialExperiment" + ], + "github" : [ + "scverse/anndataR@36f3caad9a7f360165c1510bbe0c62657580415a" + ], + "bioc_force_install" : false, + "warnings_as_errors" : true + } + ], + "test_setup" : [ + { + "type" : "docker", + "env" : [ + "RETICULATE_PYTHON=/usr/bin/python" + ] + }, + { + "type" : "apt", + "packages" : [ + "python3", + "python3-pip", + "python3-dev", + "python-is-python3" + ], + "interactive" : false + }, + { + "type" : "r", + "cran" : [ + "reticulate", + "testthat" + ], + "bioc_force_install" : false, + "warnings_as_errors" : true + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "anndata~=0.11.1", + "mudata~=0.3.1" + ], + "script" : [ + "exec(\\"try:\\\\n import awkward\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: exit(1)\\")" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/convert/from_h5mu_to_spatialexperiment/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "/workdir/root/repo/target/nextflow/convert/from_h5mu_to_spatialexperiment", + "viash_version" : "0.9.4", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" + }, + "package_config" : { + "name" : "openpipeline_spatial", + "version" : "build_main", + "info" : { + "test_resources" : [ + { + "type" : "s3", + "path" : "s3://openpipelines-bio/openpipeline_spatial/resources_test", + "dest" : "resources_test" + } + ] + }, + "repositories" : [ + { + "type" : "github", + "name" : "openpipeline", + "repo" : "openpipelines-bio/openpipeline", + "tag" : "2.1.2" + } + ], + "viash_version" : "0.9.4", + "source" : "/workdir/root/repo/src", + "target" : "/workdir/root/repo/target", + "config_mods" : [ + ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'build_main'" + ], + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/openpipelines-bio/openpipeline_spatial", + "docker_registry" : "ghcr.io" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.R" +cat > "$tempscript" << VIASHMAIN +library(SpatialExperiment) +library(SingleCellExperiment) +library(hdf5r) +library(Matrix) +library(hdf5r) + +## VIASH START +# The following code has been auto-generated by Viash. +# treat warnings as errors +.viash_orig_warn <- options(warn = 2) + +par <- list( + "input" = $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_INPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "modality" = $( if [ ! -z ${VIASH_PAR_MODALITY+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_MODALITY" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "obsm_spatial_coordinates" = $( if [ ! -z ${VIASH_PAR_OBSM_SPATIAL_COORDINATES+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OBSM_SPATIAL_COORDINATES" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "output" = $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo -n "'"; echo -n "$VIASH_PAR_OUTPUT" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ) +) +meta <- list( + "name" = $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "functionality_name" = $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo -n "'"; echo -n "$VIASH_META_FUNCTIONALITY_NAME" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "resources_dir" = $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_RESOURCES_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "executable" = $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo -n "'"; echo -n "$VIASH_META_EXECUTABLE" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "config" = $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo -n "'"; echo -n "$VIASH_META_CONFIG" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "temp_dir" = $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo -n "'"; echo -n "$VIASH_META_TEMP_DIR" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "'"; else echo NULL; fi ), + "cpus" = $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo -n "as.integer('"; echo -n "$VIASH_META_CPUS" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_b" = $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_B" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kb" = $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mb" = $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gb" = $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tb" = $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pb" = $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_kib" = $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_KIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_mib" = $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_MIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_gib" = $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_GIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_tib" = $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_TIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ), + "memory_pib" = $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo -n "bit64::as.integer64('"; echo -n "$VIASH_META_MEMORY_PIB" | sed "s#['\\\\]#\\\\\\\\&#g"; echo "')"; else echo NULL; fi ) +) +dep <- list( + +) + + +# restore original warn setting +options(.viash_orig_warn) +rm(.viash_orig_warn) + +## VIASH END + + +h5mu_to_h5ad <- function(h5mu_path, modality_name) { + tmp_path <- tempfile(fileext = ".h5ad") + mod_location <- paste("mod", modality_name, sep = "/") + h5src <- hdf5r::H5File\\$new(h5mu_path, "r") + h5dest <- hdf5r::H5File\\$new(tmp_path, "w") + # Copy over the child objects and the child attributes from root + # Root cannot be copied directly because it always exists and + # copying does not allow overwriting. + children <- hdf5r::list.objects(h5src, + path = mod_location, + full.names = FALSE, recursive = FALSE + ) + for (child in children) { + h5dest\\$obj_copy_from( + h5src, paste(mod_location, child, sep = "/"), + paste0("/", child) + ) + } + # Also copy the root attributes + root_attrs <- hdf5r::h5attr_names(x = h5src) + for (attr in root_attrs) { + h5a <- h5src\\$attr_open(attr_name = attr) + robj <- h5a\\$read() + h5dest\\$create_attr_by_name( + attr_name = attr, + obj_name = ".", + robj = robj, + space = h5a\\$get_space(), + dtype = h5a\\$get_type() + ) + } + h5src\\$close() + h5dest\\$close() + + tmp_path +} + +read_spatial_coordinates <- function(sce, spatial_coordinates_name) { + # Check if the specified spatial coordinates exist in reducedDims + reduced_dims <- SingleCellExperiment::reducedDims(sce) + if (par\\$obsm_spatial_coordinates %in% names(reduced_dims)) { + spatial_coords <- reduced_dims[[par\\$obsm_spatial_coordinates]] + if (ncol(spatial_coords) != 2) { + stop( + "Spatial coordinates must have 2 columns, but found ", + ncol(spatial_coords), " columns" + ) + } + # Set proper column names for spatial coordinates + colnames(spatial_coords) <- c("x", "y") + } else { + warning( + "Spatial coordinates '", par\\$obsm_spatial_coordinates, + "' not found in reducedDims. Available dimensions: ", + paste(names(reduced_dims), collapse = ", ") + ) + spatial_coords <- NULL + } + spatial_coords +} + +main <- function() { + # Convert to AnnData + cat("Converting H5MU file to H5AD...\\\\n") + h5file <- h5mu_to_h5ad(par\\$input, par\\$modality) + + # Convert to SpatialExperiment + cat("Converting to SingleCellExperiment...\\\\n") + sce <- anndataR::read_h5ad(h5file, as = "SingleCellExperiment") + + # Extract spatial coordinates if specified + if ( + !is.null(par\\$obsm_spatial_coordinates) && + length(par\\$obsm_spatial_coordinates) > 0 + ) { + cat("Reading in spatial coordinates...\\\\n") + spatial_coords <- read_spatial_coordinates( + sce, par\\$obsm_spatial_coordinates + ) + SingleCellExperiment::reducedDims(sce)[[ + par\\$obsm_spatial_coordinates + ]] <- NULL + } else { + spatial_coords <- NULL + } + + # Converting SingleCellExperiment to SpatialExperiment + cat("Converting to SpatialExperiment...\\\\n") + spe <- as(sce, "SpatialExperiment") + SpatialExperiment::spatialCoords(spe) <- spatial_coords + + # Saving SpatialExperiment object + cat("Saving SpatialExperiment object to:", par\\$output, "\\\\n") + saveRDS(spe, file = par\\$output, compress = FALSE) +} + +main() +VIASHMAIN +Rscript "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/openpipeline_spatial/convert/from_h5mu_to_spatialexperiment", + "tag" : "build_main" + }, + "label" : [ + "lowmem", + "singlecpu" + ], + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow.config b/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow.config new file mode 100644 index 0000000..082cbee --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'convert/from_h5mu_to_spatialexperiment' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'build_main' + description = 'Converts an h5mu file into a SpatialExperiment object.\n' + author = 'Dorien Roosen' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow_labels.config b/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow_labels.config new file mode 100644 index 0000000..541aaad --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow_labels.config @@ -0,0 +1,68 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // CPU resources + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 10 } + withLabel: highcpu { cpus = 20 } + + // Memory resources + withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } } + + // Disk space + // Nextflow apparently can't handle empty directives, i.e. + // withLabel: lowdisk {} + // so for that reason we have to add a dummy directive + withLabel: lowdisk { + dummyDirective = "dummyValue" + } + withLabel: middisk { + dummyDirective = "dummyValue" + } + withLabel: highdisk { + dummyDirective = "dummyValue" + } + withLabel: veryhighdisk { + dummyDirective = "dummyValue" + } + // NOTE: The above labels intentionally do not have an effect by default. + // The user should set the disk space requirements by adding the following + // to the compute environment: + // + // withLabel: lowdisk { disk = { 20.GB * task.attempt } } + // withLabel: middisk { disk = { 100.GB * task.attempt } } + // withLabel: highdisk { disk = { 200.GB * task.attempt } } + // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} diff --git a/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow_schema.json b/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow_schema.json new file mode 100644 index 0000000..f81b336 --- /dev/null +++ b/target/nextflow/convert/from_h5mu_to_spatialexperiment/nextflow_schema.json @@ -0,0 +1,60 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "from_h5mu_to_spatialexperiment", + "description": "Converts an h5mu file into a SpatialExperiment object.\n", + "type": "object", + "$defs": { + "arguments": { + "title": "Arguments", + "type": "object", + "description": "No description", + "properties": { + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Input h5mu file", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. " + }, + "modality": { + "type": "string", + "description": "Name of the modality to be converted.", + "help_text": "Type: `string`, multiple: `False`, required, default: `\"rna\"`. ", + "default": "rna" + }, + "obsm_spatial_coordinates": { + "type": "string", + "description": "Key in the .obsm field that contains the spatial coordinates", + "help_text": "Type: `string`, multiple: `False`. " + }, + "output": { + "type": "string", + "format": "path", + "description": "Output SpatialExperiment file", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output.rds\"`, direction: `output`, example: `\"output.rds\"`. ", + "default": "$id.$key.output.rds" + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/arguments" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/convert/from_spatialdata_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_spatialdata_to_h5mu/.config.vsh.yaml index 750670c..3f16313 100644 --- a/target/nextflow/convert/from_spatialdata_to_h5mu/.config.vsh.yaml +++ b/target/nextflow/convert/from_spatialdata_to_h5mu/.config.vsh.yaml @@ -220,7 +220,7 @@ build_info: output: "target/nextflow/convert/from_spatialdata_to_h5mu" executable: "target/nextflow/convert/from_spatialdata_to_h5mu/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_spatialdata_to_h5mu/main.nf b/target/nextflow/convert/from_spatialdata_to_h5mu/main.nf index a4eb513..bce0852 100644 --- a/target/nextflow/convert/from_spatialdata_to_h5mu/main.nf +++ b/target/nextflow/convert/from_spatialdata_to_h5mu/main.nf @@ -3330,7 +3330,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_spatialdata_to_h5mu", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_xenium_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_xenium_to_h5mu/.config.vsh.yaml index df798e9..d8ceebc 100644 --- a/target/nextflow/convert/from_xenium_to_h5mu/.config.vsh.yaml +++ b/target/nextflow/convert/from_xenium_to_h5mu/.config.vsh.yaml @@ -233,7 +233,7 @@ build_info: output: "target/nextflow/convert/from_xenium_to_h5mu" executable: "target/nextflow/convert/from_xenium_to_h5mu/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_xenium_to_h5mu/main.nf b/target/nextflow/convert/from_xenium_to_h5mu/main.nf index db9ac88..2b1d6cb 100644 --- a/target/nextflow/convert/from_xenium_to_h5mu/main.nf +++ b/target/nextflow/convert/from_xenium_to_h5mu/main.nf @@ -3331,7 +3331,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_h5mu", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_xenium_to_spatialdata/.config.vsh.yaml b/target/nextflow/convert/from_xenium_to_spatialdata/.config.vsh.yaml index 7f774d0..b35f8bc 100644 --- a/target/nextflow/convert/from_xenium_to_spatialdata/.config.vsh.yaml +++ b/target/nextflow/convert/from_xenium_to_spatialdata/.config.vsh.yaml @@ -314,7 +314,7 @@ build_info: output: "target/nextflow/convert/from_xenium_to_spatialdata" executable: "target/nextflow/convert/from_xenium_to_spatialdata/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_xenium_to_spatialdata/main.nf b/target/nextflow/convert/from_xenium_to_spatialdata/main.nf index fcb33d1..bcf2ef8 100644 --- a/target/nextflow/convert/from_xenium_to_spatialdata/main.nf +++ b/target/nextflow/convert/from_xenium_to_spatialdata/main.nf @@ -3425,7 +3425,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_spatialdata", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml b/target/nextflow/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml index 7e5a537..bceb62b 100644 --- a/target/nextflow/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml +++ b/target/nextflow/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml @@ -222,7 +222,7 @@ build_info: output: "target/nextflow/convert/from_xenium_to_spatialexperiment" executable: "target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf b/target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf index 3d4f993..3080df9 100644 --- a/target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf +++ b/target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf @@ -3311,7 +3311,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_spatialexperiment", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/filter/subset_cosmx/.config.vsh.yaml b/target/nextflow/filter/subset_cosmx/.config.vsh.yaml index bc0d541..b95342c 100644 --- a/target/nextflow/filter/subset_cosmx/.config.vsh.yaml +++ b/target/nextflow/filter/subset_cosmx/.config.vsh.yaml @@ -227,7 +227,7 @@ build_info: output: "target/nextflow/filter/subset_cosmx" executable: "target/nextflow/filter/subset_cosmx/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/filter/subset_cosmx/main.nf b/target/nextflow/filter/subset_cosmx/main.nf index 42d40d9..9a4c394 100644 --- a/target/nextflow/filter/subset_cosmx/main.nf +++ b/target/nextflow/filter/subset_cosmx/main.nf @@ -3333,7 +3333,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/filter/subset_cosmx", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/mapping/spaceranger_count/.config.vsh.yaml b/target/nextflow/mapping/spaceranger_count/.config.vsh.yaml index 2a8d69a..24f143f 100644 --- a/target/nextflow/mapping/spaceranger_count/.config.vsh.yaml +++ b/target/nextflow/mapping/spaceranger_count/.config.vsh.yaml @@ -426,7 +426,7 @@ build_info: output: "target/nextflow/mapping/spaceranger_count" executable: "target/nextflow/mapping/spaceranger_count/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/mapping/spaceranger_count/main.nf b/target/nextflow/mapping/spaceranger_count/main.nf index 614a33a..0b25421 100644 --- a/target/nextflow/mapping/spaceranger_count/main.nf +++ b/target/nextflow/mapping/spaceranger_count/main.nf @@ -3548,7 +3548,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/mapping/spaceranger_count", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/workflows/multiomics/spatial_process_samples/.config.vsh.yaml b/target/nextflow/workflows/multiomics/spatial_process_samples/.config.vsh.yaml index 4e5668a..b083d96 100644 --- a/target/nextflow/workflows/multiomics/spatial_process_samples/.config.vsh.yaml +++ b/target/nextflow/workflows/multiomics/spatial_process_samples/.config.vsh.yaml @@ -644,7 +644,7 @@ build_info: output: "target/nextflow/workflows/multiomics/spatial_process_samples" executable: "target/nextflow/workflows/multiomics/spatial_process_samples/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" dependencies: - "target/dependencies/github/openpipelines-bio/openpipeline/disable-scrublet_build/nextflow/workflows/multiomics/process_samples" diff --git a/target/nextflow/workflows/multiomics/spatial_process_samples/main.nf b/target/nextflow/workflows/multiomics/spatial_process_samples/main.nf index 5046780..0931eae 100644 --- a/target/nextflow/workflows/multiomics/spatial_process_samples/main.nf +++ b/target/nextflow/workflows/multiomics/spatial_process_samples/main.nf @@ -3813,7 +3813,7 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/multiomics/spatial_process_samples", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/workflows/qc/spatial_qc/.config.vsh.yaml b/target/nextflow/workflows/qc/spatial_qc/.config.vsh.yaml index cf7a0f9..7347f93 100644 --- a/target/nextflow/workflows/qc/spatial_qc/.config.vsh.yaml +++ b/target/nextflow/workflows/qc/spatial_qc/.config.vsh.yaml @@ -387,7 +387,7 @@ build_info: output: "target/nextflow/workflows/qc/spatial_qc" executable: "target/nextflow/workflows/qc/spatial_qc/main.nf" viash_version: "0.9.4" - git_commit: "6f308e6a3cca52f1283fbba734a3cdc858e18e1b" + git_commit: "798a0cb2692eaac648662732a05bb48f951f36a0" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" dependencies: - "target/dependencies/github/openpipelines-bio/openpipeline/2.1.2/nextflow/workflows/qc/qc" diff --git a/target/nextflow/workflows/qc/spatial_qc/main.nf b/target/nextflow/workflows/qc/spatial_qc/main.nf index 87fefb3..7cf4659 100644 --- a/target/nextflow/workflows/qc/spatial_qc/main.nf +++ b/target/nextflow/workflows/qc/spatial_qc/main.nf @@ -3505,7 +3505,7 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/qc/spatial_qc", "viash_version" : "0.9.4", - "git_commit" : "6f308e6a3cca52f1283fbba734a3cdc858e18e1b", + "git_commit" : "798a0cb2692eaac648662732a05bb48f951f36a0", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : {