From db66152ef1cdb1e669e20017df2c967b26e9f714 Mon Sep 17 00:00:00 2001 From: CI Date: Sat, 13 Dec 2025 14:07:35 +0000 Subject: [PATCH] Build branch openpipeline_spatial/niche-compass with version niche-compass to openpipeline_spatial on branch niche-compass (9151204) Build pipeline: openpipelines-bio.openpipeline-spatial.niche-compass-29shs Source commit: https://github.com/openpipelines-bio/openpipeline_spatial/commit/9151204629228da14d7c82f49f24c607efb9251e Source message: poc --- CHANGELOG.md | 2 + resources_test_scripts/niche_analysis.sh | 51 + .../requirements/testworkflows_setup.yaml | 10 + .../from_cosmx_to_spatialexperiment/test.R | 5 +- src/dataflow/obsp_block_concatenation/test.py | 1682 +++---- .../config.vsh.yaml | 4 +- .../gene_program_mask/config.vsh.yaml | 186 + src/nichecompass/gene_program_mask/script.py | 207 + src/nichecompass/gene_program_mask/test.py | 132 + src/nichecompass/nichecompass/config.vsh.yaml | 29 +- src/nichecompass/nichecompass/script.py | 45 +- src/nichecompass/nichecompass/test.py | 26 +- .../niche/nichecompass_leiden/config.vsh.yaml | 371 ++ .../nichecompass_leiden/integration_test.sh | 16 + .../niche/nichecompass_leiden/main.nf | 162 + .../niche/nichecompass_leiden/nextflow.config | 10 + .../niche/nichecompass_leiden/test.nf | 70 + .../niche/nichecompass_leiden/config.vsh.yaml | 25 + .../niche/nichecompass_leiden/script.py | 64 + .../filter/subset_cosmx/.config.vsh.yaml | 2 +- .../filter/subset_cosmx/subset_cosmx | 4 +- .../filter/subset_cosmx/.config.vsh.yaml | 2 +- .../nextflow/filter/subset_cosmx/main.nf | 2 +- .../nichecompass_leiden_test/.config.vsh.yaml | 185 + .../nextflow_labels.config | 0 .../nichecompass_leiden_test | 1134 +++++ .../nichecompass_leiden_test}/setup_logger.py | 0 .../nichecompass_leiden_test/.config.vsh.yaml | 185 + .../niche/nichecompass_leiden_test/main.nf | 3877 +++++++++++++++ .../nichecompass_leiden_test/nextflow.config | 126 + .../nextflow_labels.config | 0 .../nichecompass_leiden_test}/setup_logger.py | 0 .../from_cells2stats_to_h5mu/.config.vsh.yaml | 2 +- .../from_cells2stats_to_h5mu | 4 +- .../from_cosmx_to_h5mu/.config.vsh.yaml | 2 +- .../from_cosmx_to_h5mu/from_cosmx_to_h5mu | 4 +- .../.config.vsh.yaml | 2 +- .../from_cosmx_to_spatialexperiment | 4 +- .../.config.vsh.yaml | 2 +- .../from_h5mu_to_spatialexperiment | 4 +- .../from_spatialdata_to_h5mu/.config.vsh.yaml | 2 +- .../from_spatialdata_to_h5mu | 4 +- .../from_xenium_to_h5mu/.config.vsh.yaml | 2 +- .../from_xenium_to_h5mu/from_xenium_to_h5mu | 4 +- .../.config.vsh.yaml | 2 +- .../from_xenium_to_spatialdata | 4 +- .../.config.vsh.yaml | 2 +- .../from_xenium_to_spatialexperiment | 4 +- .../obsp_block_concatenation/.config.vsh.yaml | 2 +- .../obsp_block_concatenation | 4 +- .../spaceranger_count/.config.vsh.yaml | 2 +- .../spaceranger_count/spaceranger_count | 4 +- .../.config.vsh.yaml | 10 +- .../nextflow_labels.config | 68 + .../setup_logger.py | 12 + .../spatial_neighborhood_graph} | 20 +- .../gene_program_mask/.config.vsh.yaml | 469 ++ .../gene_program_mask/gene_program_mask | 1930 ++++++++ .../gene_program_mask/nextflow_labels.config | 68 + .../gene_program_mask/setup_logger.py | 12 + .../nichecompass/.config.vsh.yaml | 59 +- .../nichecompass/nichecompass/nichecompass | 196 +- .../from_cells2stats_to_h5mu/.config.vsh.yaml | 2 +- .../convert/from_cells2stats_to_h5mu/main.nf | 2 +- .../from_cosmx_to_h5mu/.config.vsh.yaml | 2 +- .../convert/from_cosmx_to_h5mu/main.nf | 2 +- .../.config.vsh.yaml | 2 +- .../from_cosmx_to_spatialexperiment/main.nf | 2 +- .../.config.vsh.yaml | 2 +- .../from_h5mu_to_spatialexperiment/main.nf | 2 +- .../from_spatialdata_to_h5mu/.config.vsh.yaml | 2 +- .../convert/from_spatialdata_to_h5mu/main.nf | 2 +- .../from_xenium_to_h5mu/.config.vsh.yaml | 2 +- .../convert/from_xenium_to_h5mu/main.nf | 2 +- .../.config.vsh.yaml | 2 +- .../from_xenium_to_spatialdata/main.nf | 2 +- .../.config.vsh.yaml | 2 +- .../from_xenium_to_spatialexperiment/main.nf | 2 +- .../obsp_block_concatenation/.config.vsh.yaml | 2 +- .../dataflow/obsp_block_concatenation/main.nf | 2 +- .../spaceranger_count/.config.vsh.yaml | 2 +- .../mapping/spaceranger_count/main.nf | 2 +- .../.config.vsh.yaml | 10 +- .../spatial_neighborhood_graph}/main.nf | 12 +- .../nextflow.config | 2 +- .../nextflow_labels.config | 68 + .../nextflow_schema.json | 2 +- .../setup_logger.py | 12 + .../gene_program_mask/.config.vsh.yaml | 469 ++ .../nichecompass/gene_program_mask/main.nf | 4343 +++++++++++++++++ .../gene_program_mask/nextflow.config | 126 + .../gene_program_mask/nextflow_labels.config | 68 + .../gene_program_mask/nextflow_schema.json | 224 + .../gene_program_mask/setup_logger.py | 12 + .../nichecompass/.config.vsh.yaml | 59 +- .../nichecompass/nichecompass/main.nf | 115 +- .../spatial_process_samples/.config.vsh.yaml | 2 +- .../spatial_process_samples/main.nf | 2 +- .../nichecompass_leiden/.config.vsh.yaml | 816 ++++ .../niche/nichecompass_leiden/main.nf | 4282 ++++++++++++++++ .../niche/nichecompass_leiden/nextflow.config | 126 + .../nextflow_labels.config | 68 + .../nichecompass_leiden/nextflow_schema.json | 453 ++ .../workflows/qc/spatial_qc/.config.vsh.yaml | 2 +- .../nextflow/workflows/qc/spatial_qc/main.nf | 2 +- 105 files changed, 21553 insertions(+), 1280 deletions(-) create mode 100644 resources_test_scripts/niche_analysis.sh create mode 100644 src/base/requirements/testworkflows_setup.yaml create mode 100644 src/nichecompass/gene_program_mask/config.vsh.yaml create mode 100644 src/nichecompass/gene_program_mask/script.py create mode 100644 src/nichecompass/gene_program_mask/test.py create mode 100644 src/workflows/niche/nichecompass_leiden/config.vsh.yaml create mode 100755 src/workflows/niche/nichecompass_leiden/integration_test.sh create mode 100644 src/workflows/niche/nichecompass_leiden/main.nf create mode 100644 src/workflows/niche/nichecompass_leiden/nextflow.config create mode 100644 src/workflows/niche/nichecompass_leiden/test.nf create mode 100644 src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml create mode 100644 src/workflows/test_workflows/niche/nichecompass_leiden/script.py create mode 100644 target/_test/executable/test_workflows/niche/nichecompass_leiden_test/.config.vsh.yaml rename target/{executable/spatial_neighborhood_graph/neighbors => _test/executable/test_workflows/niche/nichecompass_leiden_test}/nextflow_labels.config (100%) create mode 100755 target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nichecompass_leiden_test rename target/{executable/spatial_neighborhood_graph/neighbors => _test/executable/test_workflows/niche/nichecompass_leiden_test}/setup_logger.py (100%) create mode 100644 target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/.config.vsh.yaml create mode 100644 target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf create mode 100644 target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/nextflow.config rename target/{nextflow/spatial_neighborhood_graph/neighbors => _test/nextflow/test_workflows/niche/nichecompass_leiden_test}/nextflow_labels.config (100%) rename target/{nextflow/spatial_neighborhood_graph/neighbors => _test/nextflow/test_workflows/niche/nichecompass_leiden_test}/setup_logger.py (100%) rename target/executable/{spatial_neighborhood_graph/neighbors => neighbors/spatial_neighborhood_graph}/.config.vsh.yaml (96%) create mode 100644 target/executable/neighbors/spatial_neighborhood_graph/nextflow_labels.config create mode 100644 target/executable/neighbors/spatial_neighborhood_graph/setup_logger.py rename target/executable/{spatial_neighborhood_graph/neighbors/neighbors => neighbors/spatial_neighborhood_graph/spatial_neighborhood_graph} (98%) create mode 100644 target/executable/nichecompass/gene_program_mask/.config.vsh.yaml create mode 100755 target/executable/nichecompass/gene_program_mask/gene_program_mask create mode 100644 target/executable/nichecompass/gene_program_mask/nextflow_labels.config create mode 100644 target/executable/nichecompass/gene_program_mask/setup_logger.py rename target/nextflow/{spatial_neighborhood_graph/neighbors => neighbors/spatial_neighborhood_graph}/.config.vsh.yaml (96%) rename target/nextflow/{spatial_neighborhood_graph/neighbors => neighbors/spatial_neighborhood_graph}/main.nf (99%) rename target/nextflow/{spatial_neighborhood_graph/neighbors => neighbors/spatial_neighborhood_graph}/nextflow.config (98%) create mode 100644 target/nextflow/neighbors/spatial_neighborhood_graph/nextflow_labels.config rename target/nextflow/{spatial_neighborhood_graph/neighbors => neighbors/spatial_neighborhood_graph}/nextflow_schema.json (98%) create mode 100644 target/nextflow/neighbors/spatial_neighborhood_graph/setup_logger.py create mode 100644 target/nextflow/nichecompass/gene_program_mask/.config.vsh.yaml create mode 100644 target/nextflow/nichecompass/gene_program_mask/main.nf create mode 100644 target/nextflow/nichecompass/gene_program_mask/nextflow.config create mode 100644 target/nextflow/nichecompass/gene_program_mask/nextflow_labels.config create mode 100644 target/nextflow/nichecompass/gene_program_mask/nextflow_schema.json create mode 100644 target/nextflow/nichecompass/gene_program_mask/setup_logger.py create mode 100644 target/nextflow/workflows/niche/nichecompass_leiden/.config.vsh.yaml create mode 100644 target/nextflow/workflows/niche/nichecompass_leiden/main.nf create mode 100644 target/nextflow/workflows/niche/nichecompass_leiden/nextflow.config create mode 100644 target/nextflow/workflows/niche/nichecompass_leiden/nextflow_labels.config create mode 100644 target/nextflow/workflows/niche/nichecompass_leiden/nextflow_schema.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 68d1a3c..71224bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ * `convert/from_cosmx_to_h5mu`: Updated component to handle CosMx output bundles generated with AtoMx SIP versions < v1.3.2 (PR #25). +* `nichecompass/gene_program_mask`: Added a component to create a prior knowledge gene program mask for NicheComnpass analysis (PR #27). + # openpipeline_spatial 0.1.0 ## NEW FUNCTIONALITY diff --git a/resources_test_scripts/niche_analysis.sh b/resources_test_scripts/niche_analysis.sh new file mode 100644 index 0000000..01f5a7a --- /dev/null +++ b/resources_test_scripts/niche_analysis.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +set -eo pipefail + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +DIR="resources_test/niche" +ID="nichecompass" + +# create tempdir +MY_TEMP="${VIASH_TEMP:-/tmp}" +TMPDIR=$(mktemp -d "$MY_TEMP/$ID-XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT + +if [ ! -d "$DIR" ]; then + mkdir -p "$DIR" + + orthologue_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_annotations/human_mouse_gene_orthologs.csv" + orthologue_file="human_mouse_gene_orthologs.csv" + wget "$orthologue_url" -O "$DIR/$orthologue_file" + + enzymes_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_programs/metabolite_enzyme_sensor_gps/mouse_metabolite_enzymes.tsv" + enzymes_file="mouse_metabolite_enzymes.tsv" + wget "$enzymes_url" -O "$DIR/$enzymes_file" + + sensors_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_programs/metabolite_enzyme_sensor_gps/mouse_metabolite_sensors.tsv" + sensors_file="mouse_metabolite_sensors.tsv" + wget "$sensors_url" -O "$DIR/$sensors_file" +fi + +gp_mask="prior_knowledge_gp_mask.json" +viash run src/nichecompass/gene_program_mask/config.vsh.yaml -- \ + --input_gene_orthologs_mapping_file "$DIR/$orthologue_file" \ + --input_metabolite_enzymes "$DIR/$enzymes_file" \ + --input_metabolite_sensors "$DIR/$sensors_file" \ + --output "${DIR}/${gp_mask}" + +# Sync to S3 +aws s3 sync \ + --profile di \ + "$DIR" \ + s3://openpipelines-bio/openpipeline_spatial/resources_test/niche \ + --delete \ + --dryrun diff --git a/src/base/requirements/testworkflows_setup.yaml b/src/base/requirements/testworkflows_setup.yaml new file mode 100644 index 0000000..5e2520e --- /dev/null +++ b/src/base/requirements/testworkflows_setup.yaml @@ -0,0 +1,10 @@ +setup: + - type: apt + packages: + - procps + - git + - type: python + __merge__: + - /src/base/requirements/anndata_mudata.yaml + - /src/base/requirements/openpipeline_testutils.yaml + - /src/base/requirements/viashpy.yaml diff --git a/src/convert/from_cosmx_to_spatialexperiment/test.R b/src/convert/from_cosmx_to_spatialexperiment/test.R index 771035b..3424487 100644 --- a/src/convert/from_cosmx_to_spatialexperiment/test.R +++ b/src/convert/from_cosmx_to_spatialexperiment/test.R @@ -75,9 +75,8 @@ spe <- paste0(meta[["resources_dir"]], "/Lung5_Rep2_tiny") out_rds <- "output.rds" create_folder_archive <- function( - folder_path, - archive = "Lung5_Rep2_tiny.zip" -) { + folder_path, + archive = "Lung5_Rep2_tiny.zip") { old_wd <- getwd() on.exit(setwd(old_wd)) setwd(meta$resources_dir) diff --git a/src/dataflow/obsp_block_concatenation/test.py b/src/dataflow/obsp_block_concatenation/test.py index 1729f61..14000b5 100644 --- a/src/dataflow/obsp_block_concatenation/test.py +++ b/src/dataflow/obsp_block_concatenation/test.py @@ -1,11 +1,15 @@ import mudata as md import anndata as ad +import subprocess +from pathlib import Path import pandas as pd import numpy as np import pytest +import re import sys -from operator import attrgetter import scipy.sparse as sp +from openpipeline_testutils.utils import remove_annotation_column +from operator import attrgetter ## VIASH START meta = { @@ -324,844 +328,844 @@ def change_column_contents(): return wrapper -# def test_concatenate_samples_with_same_observation_ids_raises( -# run_component, -# wrap_anndata_to_mudata, -# write_mudata_to_file, -# sample_1_modality_1, -# sample_2_modality_1, -# random_h5mu_path, -# ): -# """ -# Test how concat handles overlapping observation IDs. -# This should raise. -# """ -# # introduce an overlapping observation -# input_1_mudata = wrap_anndata_to_mudata(sample_1_modality_1) -# old_obs_names = sample_2_modality_1.obs_names -# new_obs_names = old_obs_names.where( -# old_obs_names.isin([old_obs_names[0]]), sample_1_modality_1.obs.index[0] -# ) -# sample_2_modality_1.obs_names = new_obs_names -# input_2_mudata = wrap_anndata_to_mudata(sample_2_modality_1) - -# with pytest.raises(subprocess.CalledProcessError) as err: -# run_component( -# [ -# "--input_id", -# "foo;bar", -# "--input", -# write_mudata_to_file(input_1_mudata), -# "--input", -# write_mudata_to_file(input_2_mudata), -# "--output", -# random_h5mu_path(), -# "--other_axis_mode", -# "move", -# "--output_compression", -# "gzip", -# ] -# ) -# assert ( -# "ValueError: Observations are not unique across samples." -# in err.value.stdout.decode("utf-8") -# ) - - -# def test_concat_different_var_columns_per_sample( -# run_component, sample_1_h5mu, sample_2_h5mu, random_h5mu_path, write_mudata_to_file -# ): -# """ -# Test what happens when concatenating samples with differing auxiliary -# (like in .var) columns (present in 1 sample, absent in other). -# When concatenating the samples, all columns should be present in the -# resulting object, filling the values from samples with the missing -# column with NA. - -# Looking at Shared_feat here: - -# mod1 mod2 -# sample 1 present present -# sample 2 x x -# """ -# output_path = random_h5mu_path() -# # Before removing the 'Shared_feat' column from one of the samples, -# # check if they are present in both -# assert "Shared_feat" in sample_1_h5mu.var_keys() -# assert "Shared_feat" in sample_2_h5mu.var_keys() - -# sample_2_h5mu = remove_annotation_column(sample_2_h5mu, ["Shared_feat"], axis="var") -# assert "Shared_feat" in sample_1_h5mu.var_keys() -# assert "Shared_feat" not in sample_2_h5mu.var_keys() - -# # 'Shared_feat' column is not missing from sample2, which is what this test is about -# input_sample1_path = write_mudata_to_file(sample_1_h5mu) -# input_sample2_path = write_mudata_to_file(sample_2_h5mu) - -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# input_sample1_path, -# "--input", -# input_sample2_path, -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# ] -# ) - -# assert Path(output_path).is_file() -# concatenated_data = md.read(output_path) - -# data_sample1 = md.read(input_sample1_path) -# data_sample2 = md.read(input_sample2_path) - -# assert ( -# concatenated_data.n_vars -# == data_sample1.var.index.union(data_sample2.var.index).size -# ) - -# for mod_name in ("mod1", "mod2"): -# # Check if all features are present -# concatenated_mod = concatenated_data.mod[mod_name] -# sample1_original_mod = data_sample1.mod[mod_name] -# sample2_original_mod = data_sample2.mod[mod_name] - -# original_var_keys = set( -# sample1_original_mod.var_keys() -# + sample2_original_mod.var_keys() -# + list(sample1_original_mod.varm.keys()) -# + list(sample2_original_mod.varm.keys()) -# ) - -# assert original_var_keys == set(concatenated_mod.varm.keys()) | set( -# concatenated_mod.var.columns.tolist() -# ) - -# # Values from sample2 (which are also not in sample1) should have NA -# non_shared_features = data_sample2.var_names.difference(data_sample1.var_names) -# assert concatenated_data.var["Shared_feat"].loc[non_shared_features].isna().all() - -# # Values from sample1 should not have NA, and should be equal to the original values -# var_values = concatenated_data.var["Shared_feat"].loc[data_sample1.var_names] -# data_sample1.var["Shared_feat"].equals(var_values) - - -# def test_concat_different_columns_per_modality( -# run_component, sample_1_h5mu, sample_2_h5mu, write_mudata_to_file, random_h5mu_path -# ): -# """ -# Test what happens when concatenating samples that have auxiliary columns -# that is missing in one modality compared to the other, but the the column -# is missing from the same modalities in both samples. - -# Looking at Shared_feat here: - -# mod1 mod2 -# sample 1 x present -# sample 2 x present -# """ -# sample_2_h5mu = remove_annotation_column( -# sample_2_h5mu, ["Shared_feat"], axis="var", modality_name="mod1" -# ) -# sample_1_h5mu = remove_annotation_column( -# sample_1_h5mu, ["Shared_feat"], axis="var", modality_name="mod1" -# ) - -# input_sample1_path = write_mudata_to_file(sample_1_h5mu) -# input_sample2_path = write_mudata_to_file(sample_2_h5mu) - -# output_path = random_h5mu_path() -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# input_sample1_path, -# "--input", -# input_sample2_path, -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# ] -# ) - -# assert Path(output_path).is_file() is True -# concatenated_data = md.read(output_path) - -# data_sample1 = md.read(str(input_sample1_path)) -# data_sample2 = md.read(str(input_sample2_path)) - -# # Check if all features are present -# assert ( -# concatenated_data.n_vars -# == data_sample1.var.index.union(data_sample2.var.index).size -# ) - -# for mod_name in ("mod1", "mod2"): -# concatenated_mod = concatenated_data.mod[mod_name] -# data_sample1_mod = data_sample1.mod[mod_name] -# data_sample2_mod = data_sample2.mod[mod_name] -# original_var_keys = set( -# data_sample1_mod.var_keys() -# + data_sample2_mod.var_keys() -# + list(data_sample2_mod.varm.keys()) -# + list(data_sample1_mod.varm.keys()) -# ) - -# assert original_var_keys == set(concatenated_mod.varm.keys()) | set( -# concatenated_mod.var.columns.tolist() -# ) - -# # Check if the shared column stays removed from modality -# assert "Shared_feat" not in concatenated_data.mod["mod1"].var.columns - -# # Values from modality 1 have NA -# mod_1_features = data_sample1["mod1"].var_names.union( -# data_sample2["mod1"].var_names -# ) -# assert concatenated_data.var.loc[mod_1_features, "mod2:Shared_feat"].isna().all() - -# # Values from modalitu should not have NA, and should be equal to the original values -# mod2_data = pd.concat( -# [ -# data_sample2["mod2"].var["Shared_feat"], -# data_sample1["mod2"].var["Shared_feat"], -# ] -# ) -# mod2_features = mod2_data.index -# assert ( -# concatenated_data.var.loc[mod2_features, "mod2:Shared_feat"] -# .astype(str) -# .equals(mod2_data) -# ) - - -# def test_concat_different_columns_per_modality_and_per_sample( -# run_component, sample_1_h5mu, sample_2_h5mu, write_mudata_to_file, random_h5mu_path -# ): -# """ -# Test what happens when concatenating samples that have auxiliary columns -# that differ between the modalities and also between samples - - -# Looking at 'Feat4' from sample 2 here: -# mod1 mod2 -# sample 1 x x -# sample 2 x present -# """ - -# input_sample1_path = write_mudata_to_file(sample_1_h5mu) -# input_sample2_path = write_mudata_to_file(sample_2_h5mu) -# output_path = random_h5mu_path() - -# run_component( -# [ -# "--input_id", -# "mouse;human", -# "--input", -# input_sample1_path, -# "--input", -# input_sample2_path, -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# ] -# ) - -# assert Path(output_path).is_file() -# concatenated_data = md.read(output_path) - -# data_sample1 = md.read(input_sample1_path) -# data_sample2 = md.read(input_sample2_path) - -# # Check if all features are present -# assert ( -# concatenated_data.n_vars -# == data_sample1.var_names.union(data_sample2.var_names).size -# ) - -# # Check if all features are present -# for mod_name in ("mod1", "mod2"): -# concatenated_mod = concatenated_data.mod[mod_name] -# data_sample1_mod = data_sample1.mod[mod_name] -# data_sample2_mod = data_sample2.mod[mod_name] -# original_var_keys = set( -# data_sample1_mod.var_keys() -# + data_sample2_mod.var_keys() -# + list(data_sample2_mod.varm.keys()) -# + list(data_sample1_mod.varm.keys()) -# ) - -# assert original_var_keys == set( -# column_name.removeprefix("conflict_") -# for column_name in concatenated_mod.varm.keys() -# ) | set(concatenated_mod.var.columns.tolist()) - -# assert "Shared_feat" in concatenated_data.mod["mod2"].var.columns - -# # Values from modality 1 have NA -# mod_1_features = data_sample1["mod1"].var_names.union( -# data_sample2["mod1"].var_names -# ) -# assert concatenated_data.var.loc[mod_1_features, "mod2:Feat4"].isna().all() - -# # Values from modality 2 should not have NA if they originate from sample2 -# # These values should be equal to the original values -# mod2_data = data_sample2["mod2"].var["Feat4"].rename("mod2:Feat4") -# mod2_features = mod2_data.index -# assert ( -# concatenated_data.var.loc[mod2_features, "mod2:Feat4"] -# .astype(str) -# .equals(mod2_data) -# ) - -# # Values from modality2 should have NA if they originate from sample1 (and only from sample1) -# non_shared_features = data_sample1.var_names.difference(data_sample2.var_names) -# assert concatenated_data.var.loc[non_shared_features, "mod2:Feat4"].isna().all() - - -# @pytest.mark.parametrize( -# "test_value,test_value_dtype,expected", -# [ -# ("bar", "str", "bar"), -# (True, pd.BooleanDtype(), True), -# (1, pd.Int16Dtype(), 1), -# (0.1, float, 0.1), -# (0.1, np.float64, 0.1), -# (np.nan, np.float64, pd.NA), -# ], -# ) -# def test_concat_remove_na( -# run_component, -# sample_1_h5mu, -# sample_2_h5mu, -# write_mudata_to_file, -# random_h5mu_path, -# test_value, -# test_value_dtype, -# expected, -# change_column_contents, -# ): -# """ -# Test concatenation of samples where the column from one sample contains NA values -# NA values should be removed from the concatenated result - -# mod1 mod2 -# sample 1 NA NA -# sample 2 test_value NA -# """ -# change_column_contents( -# sample_1_h5mu, "var", "Shared_feat", {"mod1": np.nan, "mod2": np.nan} -# ) -# change_column_contents( -# sample_2_h5mu, "var", "Shared_feat", {"mod1": test_value, "mod2": np.nan} -# ) -# sample_2_h5mu.var["Shared_feat"] = sample_2_h5mu.var["Shared_feat"].astype( -# test_value_dtype -# ) -# output_path = random_h5mu_path() - -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# write_mudata_to_file(sample_1_h5mu), -# "--input", -# write_mudata_to_file(sample_2_h5mu), -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# ] -# ) - -# assert Path(output_path).is_file() -# concatenated_data = md.read(output_path) - -# # Values from modality 2 have NA -# mod_2_features = sample_1_h5mu["mod2"].var_names.union( -# sample_2_h5mu["mod2"].var_names -# ) -# assert concatenated_data.var.loc[mod_2_features, "Shared_feat"].isna().all() - -# # Values from modality 1 should not have NA if they originate from sample 1 -# # These values should be equal to the original values -# assert sample_1_h5mu["mod1"].var["Shared_feat"].isna().all() - -# # Values from modality 1 should hold a value if they originate from sample 2 -# mod1_features = sample_2_h5mu["mod1"].var_names.difference(sample_1_h5mu.var_names) -# if not pd.isna(expected): -# assert ( -# concatenated_data.var.loc[mod1_features, "Shared_feat"] == expected -# ).all() -# else: -# assert concatenated_data.var.loc[mod1_features, "Shared_feat"].isna().all() - -# # The 'Shared_feat' column for mod1 contains an overlapping feature. -# # For sample 1, it is NA, for sample 2 is is filled with test value. -# # The concat component should choose the test-value over NA -# shared_features = sample_2_h5mu.var_names.intersection(sample_1_h5mu.var_names) -# if not pd.isna(expected): -# assert ( -# concatenated_data.var.loc[shared_features, "Shared_feat"] == expected -# ).all() -# else: -# assert concatenated_data.var.loc[shared_features, "Shared_feat"].isna().all() - - -# def test_concat_invalid_h5_error_includes_path( -# run_component, tmp_path, sample_1_h5mu, write_mudata_to_file -# ): -# empty_file = tmp_path / "empty.h5mu" -# empty_file.touch() -# with pytest.raises(subprocess.CalledProcessError) as err: -# run_component( -# [ -# "--input_id", -# "mouse;empty", -# "--input", -# write_mudata_to_file(sample_1_h5mu), -# "--input", -# empty_file, -# "--output", -# "concat.h5mu", -# "--other_axis_mode", -# "move", -# ] -# ) -# assert re.search( -# rf"OSError: Failed to load .*{str(empty_file)}\. Is it a valid h5 file?", -# err.value.stdout.decode("utf-8"), -# ) - - -# @pytest.mark.parametrize( -# "test_value_1,value_1_dtype,test_value_2,value_2_dtype,expected", -# [ -# (1, float, "1", str, pd.CategoricalDtype(categories=["1.0", "1"])), -# (1, np.float64, "1", str, pd.CategoricalDtype(categories=["1.0", "1"])), -# (1, pd.Int16Dtype(), 2.0, pd.Int16Dtype(), pd.Int64Dtype()), -# (True, bool, False, bool, pd.BooleanDtype()), -# (True, pd.BooleanDtype(), False, bool, pd.BooleanDtype()), -# ("foo", str, "bar", str, pd.CategoricalDtype(categories=["bar", "foo"])), -# ], -# ) -# def test_concat_dtypes_per_modality( -# run_component, -# write_mudata_to_file, -# change_column_contents, -# sample_1_h5mu, -# sample_2_h5mu, -# test_value_1, -# value_1_dtype, -# test_value_2, -# value_2_dtype, -# expected, -# random_h5mu_path, -# ): -# """ -# Test joining column with different dtypes to make sure that they are writable. -# The default path is to convert all non-na values to strings and wrap the column into a categorical dtype. -# Here, we test on the level of a single modality only. Because the mod1 modality for both sample 1 and -# sample 2 contain a column 'test_col' and there is an overlapping feature name (overlapping_var_mod1), -# there is a conflict for this var column in mod 1 for this column. Upon concatenation, the column is moved -# to .varm, but for mod1 only. The column is concatenated for mod2 as planned. Here we check if the results -# for the test column in mod2 is still writable. -# """ -# change_column_contents( -# sample_1_h5mu, "var", "test_col", {"mod1": test_value_1, "mod2": test_value_1} -# ) -# sample_1_h5mu.var["test_col"] = sample_1_h5mu.var["test_col"].astype(value_1_dtype) -# change_column_contents( -# sample_2_h5mu, "var", "test_col", {"mod1": test_value_2, "mod2": test_value_2} -# ) -# sample_2_h5mu.var["test_col"] = sample_2_h5mu.var["test_col"].astype(value_2_dtype) - -# output_file = random_h5mu_path() -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# write_mudata_to_file(sample_1_h5mu), -# "--input", -# write_mudata_to_file(sample_2_h5mu), -# "--output", -# output_file, -# "--other_axis_mode", -# "move", -# ] -# ) -# concatenated_data = md.read(output_file) -# assert concatenated_data["mod2"].var["test_col"].dtype == expected - - -# @pytest.mark.parametrize( -# "test_value,value_dtype,expected", -# [ -# (1, float, pd.Int64Dtype()), -# (1, np.float64, pd.Int64Dtype()), -# (1, pd.Int16Dtype(), pd.Int16Dtype()), -# (True, bool, pd.BooleanDtype()), -# (True, pd.BooleanDtype(), pd.BooleanDtype()), -# ("foo", str, pd.CategoricalDtype(categories=["foo"])), -# ], -# ) -# def test_concat_dtypes_per_modality_multidim( -# run_component, -# write_mudata_to_file, -# sample_1_h5mu, -# sample_2_h5mu, -# test_value, -# value_dtype, -# expected, -# random_h5mu_path, -# ): -# """ -# Test if the result of concatenation is still writable when the input already contain -# data in .varm and this data is kept. Because we are joining observations, the dtype of this -# data may change and the result might not be writable anymore -# """ - -# sample_1_h5mu["mod1"].varm["test_df"] = pd.DataFrame( -# index=sample_1_h5mu["mod1"].var_names -# ) -# sample_1_h5mu["mod1"].varm["test_df"]["test_col"] = test_value -# sample_1_h5mu["mod1"].varm["test_df"]["test_col"] = ( -# sample_1_h5mu["mod1"].varm["test_df"]["test_col"].astype(value_dtype) -# ) - -# output_file = random_h5mu_path() -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# write_mudata_to_file(sample_1_h5mu), -# "--input", -# write_mudata_to_file(sample_2_h5mu), -# "--output", -# output_file, -# "--other_axis_mode", -# "move", -# ] -# ) -# concatenated_data = md.read(output_file) -# assert concatenated_data["mod1"].varm["test_df"]["test_col"].dtype == expected - - -# @pytest.mark.parametrize( -# "test_value_1,test_value_2,expected", -# [(1, "1", pd.CategoricalDtype(categories=["1.0", "1"]))], -# ) -# def test_concat_dtypes_global( -# run_component, -# write_mudata_to_file, -# change_column_contents, -# sample_1_h5mu, -# sample_2_h5mu, -# test_value_1, -# test_value_2, -# expected, -# random_h5mu_path, -# ): -# """ -# Test joining column with different dtypes to make sure that they are writable. -# The default path is to convert all non-na values to strings and wrap the column into a categorical dtype. -# Here, we test on the level of a column that is added to a global annotation matrix. -# """ -# change_column_contents( -# sample_1_h5mu, "var", "test_col", {"mod1": test_value_1, "mod2": test_value_1} -# ) -# change_column_contents( -# sample_2_h5mu, "var", "test_col", {"mod1": test_value_2, "mod2": test_value_2} -# ) -# sample1_mod1_names = sample_2_h5mu["mod1"].var_names -# # Here, we avoid a conflict between sample 1 and sample 2 by making sure there is no overlap in features -# # between sample 1 and sample 2 (no shared var_names). If this change would not be done, a different -# # value for sample 1 and sample 2 would be found by the concat component for the var feature -# # 'overlapping_var_mod1' for modality 'mod1'. The concat component would move the column for mod1 to -# # .varm because of this conflict, and in the global .var column of the concatenated object, only -# # a 'mod2:test_col' column would be present. But here, we want to test the column that is populated by -# # both 'mod1' and 'mod2' -# assert "overlapping_var_mod1" in sample1_mod1_names -# new_names = sample1_mod1_names.where( -# ~sample1_mod1_names.isin(["overlapping_var_mod1"]), "non_overlapping" -# ) -# sample_2_h5mu["mod1"].var_names = new_names -# sample_2_h5mu.update() -# output_file = random_h5mu_path() -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# write_mudata_to_file(sample_1_h5mu), -# "--input", -# write_mudata_to_file(sample_2_h5mu), -# "--output", -# output_file, -# "--other_axis_mode", -# "move", -# ] -# ) -# concatenated_data = md.read(output_file) -# assert concatenated_data.var["test_col"].dtype == expected - - -# def test_non_overlapping_modalities( -# run_component, sample_2_h5mu, sample_3_h5mu, random_h5mu_path, write_mudata_to_file -# ): -# """ -# Test that the component does not fail when the modalities are not shared between samples. -# """ -# output_path = random_h5mu_path() -# input_file_2 = write_mudata_to_file(sample_2_h5mu) -# input_file_3 = write_mudata_to_file(sample_3_h5mu) - -# run_component( -# [ -# "--input_id", -# "sample2;sample3", -# "--input", -# input_file_2, -# "--input", -# input_file_3, -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# ] -# ) -# output_data = md.read(output_path) -# assert set(output_data.mod.keys()) == {"mod1", "mod2", "mod3"} - - -# def test_resolve_annotation_conflict_missing_column( -# run_component, -# sample_1_h5mu, -# sample_2_h5mu, -# sample_3_h5mu, -# write_mudata_to_file, -# random_h5mu_path, -# ): -# """ -# Test using mode 'move' and resolving a conflict in metadata between the samples, -# but the metadata column is missing in one of the samples. -# """ -# output_path = random_h5mu_path() -# input_file_1 = write_mudata_to_file(sample_1_h5mu) -# input_file_2 = write_mudata_to_file(sample_2_h5mu) -# input_file_3 = write_mudata_to_file(sample_3_h5mu) - -# run_component( -# [ -# "--input_id", -# "sample1;sample2;sample3", -# "--input", -# input_file_1, -# "--input", -# input_file_2, -# "--input", -# input_file_3, -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# ] -# ) - -# concatenated_data = md.read(output_path) -# # 'Shared_feat' is defined for mod1 in sample 1 and 2 and there is a conflict -# assert "conflict_Shared_feat" in concatenated_data["mod1"].varm -# # 'Shared_feat' is defined for mod2 in sample 1 and 2 and there is no conflict -# assert "Shared_feat" in concatenated_data["mod2"].var.columns -# # 'Shared_feat' is not defined in any of the samples samples for modality 3 -# assert "Shared_feat" not in concatenated_data["mod3"].var.columns -# assert "Shared_feat" not in concatenated_data["mod3"].varm - - -# def test_mode_move( -# run_component, sample_1_h5mu, sample_2_h5mu, random_h5mu_path, write_mudata_to_file -# ): -# """ -# Test that in case of a conflict, the conflicting columns are move to the multidimensional annotation slot -# (.varm and .obsm). The key of the datafame in the slot should start with 'conflict_' followed by the name -# of the column and the columns of the dataframe should contain the sample names. -# """ -# output_path = random_h5mu_path() -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# write_mudata_to_file(sample_1_h5mu), -# "--input", -# write_mudata_to_file(sample_2_h5mu), -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# ] -# ) -# assert output_path.is_file() -# concatenated_data = md.read(output_path) - -# # Check if observations from all of the samples are present -# assert concatenated_data.n_obs == sample_1_h5mu.n_obs + sample_2_h5mu.n_obs - -# # Check if all modalities are present -# sample1_mods, sample2_mods = ( -# set(sample_1_h5mu.mod.keys()), -# set(sample_2_h5mu.mod.keys()), -# ) -# concatentated_mods = set(concatenated_data.mod.keys()) -# assert (sample1_mods | sample2_mods) == concatentated_mods - -# varm_check = { -# "mod1": ({"conflict_Shared_feat": ("sample1", "sample2")}), -# "mod2": {}, -# } - -# # Check if all features are present -# for mod_name in ("mod1", "mod2"): -# concatenated_mod = concatenated_data.mod[mod_name] -# sample_1_mod = sample_1_h5mu.mod[mod_name] -# sample_2_mod = sample_2_h5mu.mod[mod_name] -# original_varm_keys = set( -# list(sample_1_mod.varm.keys()) + list(sample_2_mod.varm.keys()) -# ) -# original_var_keys = ( -# set(sample_1_mod.var_keys() + sample_2_mod.var_keys()) | original_varm_keys -# ) - -# assert original_var_keys == set( -# column_name.removeprefix("conflict_") -# for column_name in concatenated_mod.varm.keys() -# ) | set(concatenated_mod.var.columns.tolist()) - -# varm_expected = varm_check[mod_name] -# assert set(concatenated_mod.varm.keys()) == set( -# varm_expected.keys() | original_varm_keys -# ) -# for varm_key, expected_columns in varm_expected.items(): -# assert tuple(concatenated_mod.varm[varm_key].columns) == expected_columns -# if not varm_expected: -# assert set(concatenated_mod.varm.keys()) == original_varm_keys -# assert concatenated_mod.obsm == {} - - -# # Execute this test multiple times, anndata.concat sometimes returns the observations in a different order -# @pytest.mark.parametrize("_", range(10)) -# def test_concat_var_obs_names_order( -# run_component, -# sample_1_h5mu, -# sample_2_h5mu, -# write_mudata_to_file, -# random_h5mu_path, -# _, -# ): -# """ -# Test that the var_names and obs_names are still linked to the correct count data. -# """ -# output_path = random_h5mu_path() -# sample_1_h5mu["mod1"].obs["sample_id"] = "sample1" -# sample_1_h5mu["mod2"].obs["sample_id"] = "sample1" -# sample_2_h5mu["mod1"].obs["sample_id"] = "sample2" -# sample_2_h5mu["mod2"].obs["sample_id"] = "sample2" -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# write_mudata_to_file(sample_1_h5mu), -# "--input", -# write_mudata_to_file(sample_2_h5mu), -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# ] -# ) -# assert output_path.is_file() -# for sample_name, sample_h5mu in { -# "sample1": sample_1_h5mu, -# "sample2": sample_2_h5mu, -# }.items(): -# for mod_name in ["mod1", "mod2"]: -# data_sample = sample_h5mu[mod_name].copy() -# processed_data_ad = md.read_h5ad(output_path, mod=mod_name) -# processed_data_ad = processed_data_ad[ -# processed_data_ad.obs["sample_id"] == sample_name -# ] -# processed_data_ad = processed_data_ad[:, data_sample.var_names] -# processed_data = pd.DataFrame( -# processed_data_ad.X, -# index=processed_data_ad.obs_names, -# columns=processed_data_ad.var_names, -# ) -# data_sample = pd.DataFrame( -# data_sample.X, -# index=data_sample.obs_names, -# columns=data_sample.var_names, -# ).reindex_like(processed_data) -# pd.testing.assert_frame_equal( -# processed_data, data_sample, check_dtype=False -# ) - - -# def test_keep_uns( -# run_component, sample_1_h5mu, sample_2_h5mu, write_mudata_to_file, random_h5mu_path -# ): -# sample_1_h5mu.uns["global_uns_sample1"] = "dolor" -# sample_1_h5mu.uns["overlapping_global"] = "sed" -# sample_2_h5mu.uns["global_uns_sample2"] = "amet" -# sample_2_h5mu.uns["overlapping_global"] = "elit" -# output_path = random_h5mu_path() -# run_component( -# [ -# "--input_id", -# "sample1;sample2", -# "--input", -# write_mudata_to_file(sample_1_h5mu), -# "--input", -# write_mudata_to_file(sample_2_h5mu), -# "--output", -# output_path, -# "--other_axis_mode", -# "move", -# "--uns_merge_mode", -# "make_unique", -# ] -# ) -# assert output_path.is_file() -# concatenated_data = md.read(output_path) -# mod1 = concatenated_data.mod["mod1"] -# mod2 = concatenated_data.mod["mod2"] -# assert set(concatenated_data.uns.keys()) == set( -# [ -# "global_uns_sample1", -# "global_uns_sample2", -# "sample1_overlapping_global", -# "sample2_overlapping_global", -# ] -# ) -# assert set(mod1.uns.keys()) == set( -# [ -# "sample1_overlapping_uns_key", -# "uns_unique_to_sample1", -# "sample2_overlapping_uns_key", -# "uns_unique_to_sample2", -# ] -# ) -# assert set(mod2.uns.keys()) == set() +def test_concatenate_samples_with_same_observation_ids_raises( + run_component, + wrap_anndata_to_mudata, + write_mudata_to_file, + sample_1_modality_1, + sample_2_modality_1, + random_h5mu_path, +): + """ + Test how concat handles overlapping observation IDs. + This should raise. + """ + # introduce an overlapping observation + input_1_mudata = wrap_anndata_to_mudata(sample_1_modality_1) + old_obs_names = sample_2_modality_1.obs_names + new_obs_names = old_obs_names.where( + old_obs_names.isin([old_obs_names[0]]), sample_1_modality_1.obs.index[0] + ) + sample_2_modality_1.obs_names = new_obs_names + input_2_mudata = wrap_anndata_to_mudata(sample_2_modality_1) + + with pytest.raises(subprocess.CalledProcessError) as err: + run_component( + [ + "--input_id", + "foo;bar", + "--input", + write_mudata_to_file(input_1_mudata), + "--input", + write_mudata_to_file(input_2_mudata), + "--output", + random_h5mu_path(), + "--other_axis_mode", + "move", + "--output_compression", + "gzip", + ] + ) + assert ( + "ValueError: Observations are not unique across samples." + in err.value.stdout.decode("utf-8") + ) + + +def test_concat_different_var_columns_per_sample( + run_component, sample_1_h5mu, sample_2_h5mu, random_h5mu_path, write_mudata_to_file +): + """ + Test what happens when concatenating samples with differing auxiliary + (like in .var) columns (present in 1 sample, absent in other). + When concatenating the samples, all columns should be present in the + resulting object, filling the values from samples with the missing + column with NA. + + Looking at Shared_feat here: + + mod1 mod2 + sample 1 present present + sample 2 x x + """ + output_path = random_h5mu_path() + # Before removing the 'Shared_feat' column from one of the samples, + # check if they are present in both + assert "Shared_feat" in sample_1_h5mu.var_keys() + assert "Shared_feat" in sample_2_h5mu.var_keys() + + sample_2_h5mu = remove_annotation_column(sample_2_h5mu, ["Shared_feat"], axis="var") + assert "Shared_feat" in sample_1_h5mu.var_keys() + assert "Shared_feat" not in sample_2_h5mu.var_keys() + + # 'Shared_feat' column is not missing from sample2, which is what this test is about + input_sample1_path = write_mudata_to_file(sample_1_h5mu) + input_sample2_path = write_mudata_to_file(sample_2_h5mu) + + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + input_sample1_path, + "--input", + input_sample2_path, + "--output", + output_path, + "--other_axis_mode", + "move", + ] + ) + + assert Path(output_path).is_file() + concatenated_data = md.read(output_path) + + data_sample1 = md.read(input_sample1_path) + data_sample2 = md.read(input_sample2_path) + + assert ( + concatenated_data.n_vars + == data_sample1.var.index.union(data_sample2.var.index).size + ) + + for mod_name in ("mod1", "mod2"): + # Check if all features are present + concatenated_mod = concatenated_data.mod[mod_name] + sample1_original_mod = data_sample1.mod[mod_name] + sample2_original_mod = data_sample2.mod[mod_name] + + original_var_keys = set( + sample1_original_mod.var_keys() + + sample2_original_mod.var_keys() + + list(sample1_original_mod.varm.keys()) + + list(sample2_original_mod.varm.keys()) + ) + + assert original_var_keys == set(concatenated_mod.varm.keys()) | set( + concatenated_mod.var.columns.tolist() + ) + + # Values from sample2 (which are also not in sample1) should have NA + non_shared_features = data_sample2.var_names.difference(data_sample1.var_names) + assert concatenated_data.var["Shared_feat"].loc[non_shared_features].isna().all() + + # Values from sample1 should not have NA, and should be equal to the original values + var_values = concatenated_data.var["Shared_feat"].loc[data_sample1.var_names] + data_sample1.var["Shared_feat"].equals(var_values) + + +def test_concat_different_columns_per_modality( + run_component, sample_1_h5mu, sample_2_h5mu, write_mudata_to_file, random_h5mu_path +): + """ + Test what happens when concatenating samples that have auxiliary columns + that is missing in one modality compared to the other, but the the column + is missing from the same modalities in both samples. + + Looking at Shared_feat here: + + mod1 mod2 + sample 1 x present + sample 2 x present + """ + sample_2_h5mu = remove_annotation_column( + sample_2_h5mu, ["Shared_feat"], axis="var", modality_name="mod1" + ) + sample_1_h5mu = remove_annotation_column( + sample_1_h5mu, ["Shared_feat"], axis="var", modality_name="mod1" + ) + + input_sample1_path = write_mudata_to_file(sample_1_h5mu) + input_sample2_path = write_mudata_to_file(sample_2_h5mu) + + output_path = random_h5mu_path() + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + input_sample1_path, + "--input", + input_sample2_path, + "--output", + output_path, + "--other_axis_mode", + "move", + ] + ) + + assert Path(output_path).is_file() is True + concatenated_data = md.read(output_path) + + data_sample1 = md.read(str(input_sample1_path)) + data_sample2 = md.read(str(input_sample2_path)) + + # Check if all features are present + assert ( + concatenated_data.n_vars + == data_sample1.var.index.union(data_sample2.var.index).size + ) + + for mod_name in ("mod1", "mod2"): + concatenated_mod = concatenated_data.mod[mod_name] + data_sample1_mod = data_sample1.mod[mod_name] + data_sample2_mod = data_sample2.mod[mod_name] + original_var_keys = set( + data_sample1_mod.var_keys() + + data_sample2_mod.var_keys() + + list(data_sample2_mod.varm.keys()) + + list(data_sample1_mod.varm.keys()) + ) + + assert original_var_keys == set(concatenated_mod.varm.keys()) | set( + concatenated_mod.var.columns.tolist() + ) + + # Check if the shared column stays removed from modality + assert "Shared_feat" not in concatenated_data.mod["mod1"].var.columns + + # Values from modality 1 have NA + mod_1_features = data_sample1["mod1"].var_names.union( + data_sample2["mod1"].var_names + ) + assert concatenated_data.var.loc[mod_1_features, "mod2:Shared_feat"].isna().all() + + # Values from modalitu should not have NA, and should be equal to the original values + mod2_data = pd.concat( + [ + data_sample2["mod2"].var["Shared_feat"], + data_sample1["mod2"].var["Shared_feat"], + ] + ) + mod2_features = mod2_data.index + assert ( + concatenated_data.var.loc[mod2_features, "mod2:Shared_feat"] + .astype(str) + .equals(mod2_data) + ) + + +def test_concat_different_columns_per_modality_and_per_sample( + run_component, sample_1_h5mu, sample_2_h5mu, write_mudata_to_file, random_h5mu_path +): + """ + Test what happens when concatenating samples that have auxiliary columns + that differ between the modalities and also between samples + + + Looking at 'Feat4' from sample 2 here: + mod1 mod2 + sample 1 x x + sample 2 x present + """ + + input_sample1_path = write_mudata_to_file(sample_1_h5mu) + input_sample2_path = write_mudata_to_file(sample_2_h5mu) + output_path = random_h5mu_path() + + run_component( + [ + "--input_id", + "mouse;human", + "--input", + input_sample1_path, + "--input", + input_sample2_path, + "--output", + output_path, + "--other_axis_mode", + "move", + ] + ) + + assert Path(output_path).is_file() + concatenated_data = md.read(output_path) + + data_sample1 = md.read(input_sample1_path) + data_sample2 = md.read(input_sample2_path) + + # Check if all features are present + assert ( + concatenated_data.n_vars + == data_sample1.var_names.union(data_sample2.var_names).size + ) + + # Check if all features are present + for mod_name in ("mod1", "mod2"): + concatenated_mod = concatenated_data.mod[mod_name] + data_sample1_mod = data_sample1.mod[mod_name] + data_sample2_mod = data_sample2.mod[mod_name] + original_var_keys = set( + data_sample1_mod.var_keys() + + data_sample2_mod.var_keys() + + list(data_sample2_mod.varm.keys()) + + list(data_sample1_mod.varm.keys()) + ) + + assert original_var_keys == set( + column_name.removeprefix("conflict_") + for column_name in concatenated_mod.varm.keys() + ) | set(concatenated_mod.var.columns.tolist()) + + assert "Shared_feat" in concatenated_data.mod["mod2"].var.columns + + # Values from modality 1 have NA + mod_1_features = data_sample1["mod1"].var_names.union( + data_sample2["mod1"].var_names + ) + assert concatenated_data.var.loc[mod_1_features, "mod2:Feat4"].isna().all() + + # Values from modality 2 should not have NA if they originate from sample2 + # These values should be equal to the original values + mod2_data = data_sample2["mod2"].var["Feat4"].rename("mod2:Feat4") + mod2_features = mod2_data.index + assert ( + concatenated_data.var.loc[mod2_features, "mod2:Feat4"] + .astype(str) + .equals(mod2_data) + ) + + # Values from modality2 should have NA if they originate from sample1 (and only from sample1) + non_shared_features = data_sample1.var_names.difference(data_sample2.var_names) + assert concatenated_data.var.loc[non_shared_features, "mod2:Feat4"].isna().all() + + +@pytest.mark.parametrize( + "test_value,test_value_dtype,expected", + [ + ("bar", "str", "bar"), + (True, pd.BooleanDtype(), True), + (1, pd.Int16Dtype(), 1), + (0.1, float, 0.1), + (0.1, np.float64, 0.1), + (np.nan, np.float64, pd.NA), + ], +) +def test_concat_remove_na( + run_component, + sample_1_h5mu, + sample_2_h5mu, + write_mudata_to_file, + random_h5mu_path, + test_value, + test_value_dtype, + expected, + change_column_contents, +): + """ + Test concatenation of samples where the column from one sample contains NA values + NA values should be removed from the concatenated result + + mod1 mod2 + sample 1 NA NA + sample 2 test_value NA + """ + change_column_contents( + sample_1_h5mu, "var", "Shared_feat", {"mod1": np.nan, "mod2": np.nan} + ) + change_column_contents( + sample_2_h5mu, "var", "Shared_feat", {"mod1": test_value, "mod2": np.nan} + ) + sample_2_h5mu.var["Shared_feat"] = sample_2_h5mu.var["Shared_feat"].astype( + test_value_dtype + ) + output_path = random_h5mu_path() + + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + write_mudata_to_file(sample_1_h5mu), + "--input", + write_mudata_to_file(sample_2_h5mu), + "--output", + output_path, + "--other_axis_mode", + "move", + ] + ) + + assert Path(output_path).is_file() + concatenated_data = md.read(output_path) + + # Values from modality 2 have NA + mod_2_features = sample_1_h5mu["mod2"].var_names.union( + sample_2_h5mu["mod2"].var_names + ) + assert concatenated_data.var.loc[mod_2_features, "Shared_feat"].isna().all() + + # Values from modality 1 should not have NA if they originate from sample 1 + # These values should be equal to the original values + assert sample_1_h5mu["mod1"].var["Shared_feat"].isna().all() + + # Values from modality 1 should hold a value if they originate from sample 2 + mod1_features = sample_2_h5mu["mod1"].var_names.difference(sample_1_h5mu.var_names) + if not pd.isna(expected): + assert ( + concatenated_data.var.loc[mod1_features, "Shared_feat"] == expected + ).all() + else: + assert concatenated_data.var.loc[mod1_features, "Shared_feat"].isna().all() + + # The 'Shared_feat' column for mod1 contains an overlapping feature. + # For sample 1, it is NA, for sample 2 is is filled with test value. + # The concat component should choose the test-value over NA + shared_features = sample_2_h5mu.var_names.intersection(sample_1_h5mu.var_names) + if not pd.isna(expected): + assert ( + concatenated_data.var.loc[shared_features, "Shared_feat"] == expected + ).all() + else: + assert concatenated_data.var.loc[shared_features, "Shared_feat"].isna().all() + + +def test_concat_invalid_h5_error_includes_path( + run_component, tmp_path, sample_1_h5mu, write_mudata_to_file +): + empty_file = tmp_path / "empty.h5mu" + empty_file.touch() + with pytest.raises(subprocess.CalledProcessError) as err: + run_component( + [ + "--input_id", + "mouse;empty", + "--input", + write_mudata_to_file(sample_1_h5mu), + "--input", + empty_file, + "--output", + "concat.h5mu", + "--other_axis_mode", + "move", + ] + ) + assert re.search( + rf"OSError: Failed to load .*{str(empty_file)}\. Is it a valid h5 file?", + err.value.stdout.decode("utf-8"), + ) + + +@pytest.mark.parametrize( + "test_value_1,value_1_dtype,test_value_2,value_2_dtype,expected", + [ + (1, float, "1", str, pd.CategoricalDtype(categories=["1.0", "1"])), + (1, np.float64, "1", str, pd.CategoricalDtype(categories=["1.0", "1"])), + (1, pd.Int16Dtype(), 2.0, pd.Int16Dtype(), pd.Int64Dtype()), + (True, bool, False, bool, pd.BooleanDtype()), + (True, pd.BooleanDtype(), False, bool, pd.BooleanDtype()), + ("foo", str, "bar", str, pd.CategoricalDtype(categories=["bar", "foo"])), + ], +) +def test_concat_dtypes_per_modality( + run_component, + write_mudata_to_file, + change_column_contents, + sample_1_h5mu, + sample_2_h5mu, + test_value_1, + value_1_dtype, + test_value_2, + value_2_dtype, + expected, + random_h5mu_path, +): + """ + Test joining column with different dtypes to make sure that they are writable. + The default path is to convert all non-na values to strings and wrap the column into a categorical dtype. + Here, we test on the level of a single modality only. Because the mod1 modality for both sample 1 and + sample 2 contain a column 'test_col' and there is an overlapping feature name (overlapping_var_mod1), + there is a conflict for this var column in mod 1 for this column. Upon concatenation, the column is moved + to .varm, but for mod1 only. The column is concatenated for mod2 as planned. Here we check if the results + for the test column in mod2 is still writable. + """ + change_column_contents( + sample_1_h5mu, "var", "test_col", {"mod1": test_value_1, "mod2": test_value_1} + ) + sample_1_h5mu.var["test_col"] = sample_1_h5mu.var["test_col"].astype(value_1_dtype) + change_column_contents( + sample_2_h5mu, "var", "test_col", {"mod1": test_value_2, "mod2": test_value_2} + ) + sample_2_h5mu.var["test_col"] = sample_2_h5mu.var["test_col"].astype(value_2_dtype) + + output_file = random_h5mu_path() + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + write_mudata_to_file(sample_1_h5mu), + "--input", + write_mudata_to_file(sample_2_h5mu), + "--output", + output_file, + "--other_axis_mode", + "move", + ] + ) + concatenated_data = md.read(output_file) + assert concatenated_data["mod2"].var["test_col"].dtype == expected + + +@pytest.mark.parametrize( + "test_value,value_dtype,expected", + [ + (1, float, pd.Int64Dtype()), + (1, np.float64, pd.Int64Dtype()), + (1, pd.Int16Dtype(), pd.Int16Dtype()), + (True, bool, pd.BooleanDtype()), + (True, pd.BooleanDtype(), pd.BooleanDtype()), + ("foo", str, pd.CategoricalDtype(categories=["foo"])), + ], +) +def test_concat_dtypes_per_modality_multidim( + run_component, + write_mudata_to_file, + sample_1_h5mu, + sample_2_h5mu, + test_value, + value_dtype, + expected, + random_h5mu_path, +): + """ + Test if the result of concatenation is still writable when the input already contain + data in .varm and this data is kept. Because we are joining observations, the dtype of this + data may change and the result might not be writable anymore + """ + + sample_1_h5mu["mod1"].varm["test_df"] = pd.DataFrame( + index=sample_1_h5mu["mod1"].var_names + ) + sample_1_h5mu["mod1"].varm["test_df"]["test_col"] = test_value + sample_1_h5mu["mod1"].varm["test_df"]["test_col"] = ( + sample_1_h5mu["mod1"].varm["test_df"]["test_col"].astype(value_dtype) + ) + + output_file = random_h5mu_path() + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + write_mudata_to_file(sample_1_h5mu), + "--input", + write_mudata_to_file(sample_2_h5mu), + "--output", + output_file, + "--other_axis_mode", + "move", + ] + ) + concatenated_data = md.read(output_file) + assert concatenated_data["mod1"].varm["test_df"]["test_col"].dtype == expected + + +@pytest.mark.parametrize( + "test_value_1,test_value_2,expected", + [(1, "1", pd.CategoricalDtype(categories=["1.0", "1"]))], +) +def test_concat_dtypes_global( + run_component, + write_mudata_to_file, + change_column_contents, + sample_1_h5mu, + sample_2_h5mu, + test_value_1, + test_value_2, + expected, + random_h5mu_path, +): + """ + Test joining column with different dtypes to make sure that they are writable. + The default path is to convert all non-na values to strings and wrap the column into a categorical dtype. + Here, we test on the level of a column that is added to a global annotation matrix. + """ + change_column_contents( + sample_1_h5mu, "var", "test_col", {"mod1": test_value_1, "mod2": test_value_1} + ) + change_column_contents( + sample_2_h5mu, "var", "test_col", {"mod1": test_value_2, "mod2": test_value_2} + ) + sample1_mod1_names = sample_2_h5mu["mod1"].var_names + # Here, we avoid a conflict between sample 1 and sample 2 by making sure there is no overlap in features + # between sample 1 and sample 2 (no shared var_names). If this change would not be done, a different + # value for sample 1 and sample 2 would be found by the concat component for the var feature + # 'overlapping_var_mod1' for modality 'mod1'. The concat component would move the column for mod1 to + # .varm because of this conflict, and in the global .var column of the concatenated object, only + # a 'mod2:test_col' column would be present. But here, we want to test the column that is populated by + # both 'mod1' and 'mod2' + assert "overlapping_var_mod1" in sample1_mod1_names + new_names = sample1_mod1_names.where( + ~sample1_mod1_names.isin(["overlapping_var_mod1"]), "non_overlapping" + ) + sample_2_h5mu["mod1"].var_names = new_names + sample_2_h5mu.update() + output_file = random_h5mu_path() + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + write_mudata_to_file(sample_1_h5mu), + "--input", + write_mudata_to_file(sample_2_h5mu), + "--output", + output_file, + "--other_axis_mode", + "move", + ] + ) + concatenated_data = md.read(output_file) + assert concatenated_data.var["test_col"].dtype == expected + + +def test_non_overlapping_modalities( + run_component, sample_2_h5mu, sample_3_h5mu, random_h5mu_path, write_mudata_to_file +): + """ + Test that the component does not fail when the modalities are not shared between samples. + """ + output_path = random_h5mu_path() + input_file_2 = write_mudata_to_file(sample_2_h5mu) + input_file_3 = write_mudata_to_file(sample_3_h5mu) + + run_component( + [ + "--input_id", + "sample2;sample3", + "--input", + input_file_2, + "--input", + input_file_3, + "--output", + output_path, + "--other_axis_mode", + "move", + ] + ) + output_data = md.read(output_path) + assert set(output_data.mod.keys()) == {"mod1", "mod2", "mod3"} + + +def test_resolve_annotation_conflict_missing_column( + run_component, + sample_1_h5mu, + sample_2_h5mu, + sample_3_h5mu, + write_mudata_to_file, + random_h5mu_path, +): + """ + Test using mode 'move' and resolving a conflict in metadata between the samples, + but the metadata column is missing in one of the samples. + """ + output_path = random_h5mu_path() + input_file_1 = write_mudata_to_file(sample_1_h5mu) + input_file_2 = write_mudata_to_file(sample_2_h5mu) + input_file_3 = write_mudata_to_file(sample_3_h5mu) + + run_component( + [ + "--input_id", + "sample1;sample2;sample3", + "--input", + input_file_1, + "--input", + input_file_2, + "--input", + input_file_3, + "--output", + output_path, + "--other_axis_mode", + "move", + ] + ) + + concatenated_data = md.read(output_path) + # 'Shared_feat' is defined for mod1 in sample 1 and 2 and there is a conflict + assert "conflict_Shared_feat" in concatenated_data["mod1"].varm + # 'Shared_feat' is defined for mod2 in sample 1 and 2 and there is no conflict + assert "Shared_feat" in concatenated_data["mod2"].var.columns + # 'Shared_feat' is not defined in any of the samples samples for modality 3 + assert "Shared_feat" not in concatenated_data["mod3"].var.columns + assert "Shared_feat" not in concatenated_data["mod3"].varm + + +def test_mode_move( + run_component, sample_1_h5mu, sample_2_h5mu, random_h5mu_path, write_mudata_to_file +): + """ + Test that in case of a conflict, the conflicting columns are move to the multidimensional annotation slot + (.varm and .obsm). The key of the datafame in the slot should start with 'conflict_' followed by the name + of the column and the columns of the dataframe should contain the sample names. + """ + output_path = random_h5mu_path() + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + write_mudata_to_file(sample_1_h5mu), + "--input", + write_mudata_to_file(sample_2_h5mu), + "--output", + output_path, + "--other_axis_mode", + "move", + ] + ) + assert output_path.is_file() + concatenated_data = md.read(output_path) + + # Check if observations from all of the samples are present + assert concatenated_data.n_obs == sample_1_h5mu.n_obs + sample_2_h5mu.n_obs + + # Check if all modalities are present + sample1_mods, sample2_mods = ( + set(sample_1_h5mu.mod.keys()), + set(sample_2_h5mu.mod.keys()), + ) + concatentated_mods = set(concatenated_data.mod.keys()) + assert (sample1_mods | sample2_mods) == concatentated_mods + + varm_check = { + "mod1": ({"conflict_Shared_feat": ("sample1", "sample2")}), + "mod2": {}, + } + + # Check if all features are present + for mod_name in ("mod1", "mod2"): + concatenated_mod = concatenated_data.mod[mod_name] + sample_1_mod = sample_1_h5mu.mod[mod_name] + sample_2_mod = sample_2_h5mu.mod[mod_name] + original_varm_keys = set( + list(sample_1_mod.varm.keys()) + list(sample_2_mod.varm.keys()) + ) + original_var_keys = ( + set(sample_1_mod.var_keys() + sample_2_mod.var_keys()) | original_varm_keys + ) + + assert original_var_keys == set( + column_name.removeprefix("conflict_") + for column_name in concatenated_mod.varm.keys() + ) | set(concatenated_mod.var.columns.tolist()) + + varm_expected = varm_check[mod_name] + assert set(concatenated_mod.varm.keys()) == set( + varm_expected.keys() | original_varm_keys + ) + for varm_key, expected_columns in varm_expected.items(): + assert tuple(concatenated_mod.varm[varm_key].columns) == expected_columns + if not varm_expected: + assert set(concatenated_mod.varm.keys()) == original_varm_keys + assert concatenated_mod.obsm == {} + + +# Execute this test multiple times, anndata.concat sometimes returns the observations in a different order +@pytest.mark.parametrize("_", range(10)) +def test_concat_var_obs_names_order( + run_component, + sample_1_h5mu, + sample_2_h5mu, + write_mudata_to_file, + random_h5mu_path, + _, +): + """ + Test that the var_names and obs_names are still linked to the correct count data. + """ + output_path = random_h5mu_path() + sample_1_h5mu["mod1"].obs["sample_id"] = "sample1" + sample_1_h5mu["mod2"].obs["sample_id"] = "sample1" + sample_2_h5mu["mod1"].obs["sample_id"] = "sample2" + sample_2_h5mu["mod2"].obs["sample_id"] = "sample2" + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + write_mudata_to_file(sample_1_h5mu), + "--input", + write_mudata_to_file(sample_2_h5mu), + "--output", + output_path, + "--other_axis_mode", + "move", + ] + ) + assert output_path.is_file() + for sample_name, sample_h5mu in { + "sample1": sample_1_h5mu, + "sample2": sample_2_h5mu, + }.items(): + for mod_name in ["mod1", "mod2"]: + data_sample = sample_h5mu[mod_name].copy() + processed_data_ad = md.read_h5ad(output_path, mod=mod_name) + processed_data_ad = processed_data_ad[ + processed_data_ad.obs["sample_id"] == sample_name + ] + processed_data_ad = processed_data_ad[:, data_sample.var_names] + processed_data = pd.DataFrame( + processed_data_ad.X, + index=processed_data_ad.obs_names, + columns=processed_data_ad.var_names, + ) + data_sample = pd.DataFrame( + data_sample.X, + index=data_sample.obs_names, + columns=data_sample.var_names, + ).reindex_like(processed_data) + pd.testing.assert_frame_equal( + processed_data, data_sample, check_dtype=False + ) + + +def test_keep_uns( + run_component, sample_1_h5mu, sample_2_h5mu, write_mudata_to_file, random_h5mu_path +): + sample_1_h5mu.uns["global_uns_sample1"] = "dolor" + sample_1_h5mu.uns["overlapping_global"] = "sed" + sample_2_h5mu.uns["global_uns_sample2"] = "amet" + sample_2_h5mu.uns["overlapping_global"] = "elit" + output_path = random_h5mu_path() + run_component( + [ + "--input_id", + "sample1;sample2", + "--input", + write_mudata_to_file(sample_1_h5mu), + "--input", + write_mudata_to_file(sample_2_h5mu), + "--output", + output_path, + "--other_axis_mode", + "move", + "--uns_merge_mode", + "make_unique", + ] + ) + assert output_path.is_file() + concatenated_data = md.read(output_path) + mod1 = concatenated_data.mod["mod1"] + mod2 = concatenated_data.mod["mod2"] + assert set(concatenated_data.uns.keys()) == set( + [ + "global_uns_sample1", + "global_uns_sample2", + "sample1_overlapping_global", + "sample2_overlapping_global", + ] + ) + assert set(mod1.uns.keys()) == set( + [ + "sample1_overlapping_uns_key", + "uns_unique_to_sample1", + "sample2_overlapping_uns_key", + "uns_unique_to_sample2", + ] + ) + assert set(mod2.uns.keys()) == set() def test_subset_modalities( diff --git a/src/neighbors/spatial_neighborhood_graph/config.vsh.yaml b/src/neighbors/spatial_neighborhood_graph/config.vsh.yaml index 507b9e4..77c2c36 100644 --- a/src/neighbors/spatial_neighborhood_graph/config.vsh.yaml +++ b/src/neighbors/spatial_neighborhood_graph/config.vsh.yaml @@ -1,5 +1,5 @@ -name: neighbors -namespace: spatial_neighborhood_graph +name: spatial_neighborhood_graph +namespace: neighbors scope: public description: Calculates a spatial neighborhood graph. diff --git a/src/nichecompass/gene_program_mask/config.vsh.yaml b/src/nichecompass/gene_program_mask/config.vsh.yaml new file mode 100644 index 0000000..8f1ac29 --- /dev/null +++ b/src/nichecompass/gene_program_mask/config.vsh.yaml @@ -0,0 +1,186 @@ +name: gene_program_mask +namespace: nichecompass +scope: public +description: Generation of a prior knowledge gene program mask for NicheCompass. + +authors: + - __merge__: /src/authors/dorien_roosen.yaml + roles: [ maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: "--input_gene_orthologs_mapping_file" + type: file + required: false + description: | + Path to a CSV file mapping human genes to mouse orthologs. + Required for the OmniPath and NicheNet masks if `--species mouse`. + - name: "--input_metabolite_enzymes" + type: file + required: false + description: | + Path to the MeBocost metabolite-enzymes TSV file. + Required for generating the MeBocost gene program mask. + - name: "--input_metabolite_sensors" + type: file + required: false + description: | + Path to the MeBocost metabolite-sensors TSV file. + Required for generating the MeBocost gene program mask. + + - name: Parameters + arguments: + - name: "--species" + type: string + choices: ["human", "mouse"] + default: "human" + description: Species of the organism (human or mouse). + - name: "--create_omnipath_gene_program_mask" + type: boolean + default: true + description: Whether to create the OmniPath gene program mask. + - name: "--create_nichenet_gene_program_mask" + type: boolean + default: true + description: Whether to create the NicheNet gene program mask. + - name: "--create_mebocost_gene_program_mask" + type: boolean + default: true + description: Whether to create the MeBocost gene program mask. + - name: "--create_collectri_tf_gene_program_mask" + type: boolean + default: true + description: Whether to create the CollecTRI TF gene program mask. + - name: "--overlap_thresh_target_genes" + type: double + default: 1.0 + min: 0.0 + max: 1.0 + description: | + The minimum ratio of target genes that need to overlap between a GP without source genes and another GP for the GP to be dropped. + Gene programs with different source genes are never combined or dropped. + + - name: Omnipath Parameters + arguments: + - name: "--omnipath_min_curation_effort" + type: integer + default: 2 + description: Minimum number of times an interaction has to be described in a paper and mentioned in a database to be included in the OmniPath gene programs. + + - name: NicheNet Parameters + arguments: + - name: "--nichenet_version" + type: string + choices: ["v1", "v2"] + default: "v2" + description: | + Version of the NicheNet ligand receptor network and ligand target gene regulatory potential matrix. + ´v2´ is an improved version of ´v1´, and has separate files for mouse and human. + - name: "--nichenet_keep_target_genes_ratio" + type: double + default: 1.0 + description: | + Ratio of target genes that are kept compared to total target genes. + This ratio is applied over the entire matrix (not on gene program level), and determines the ´all_gps_score_keep_threshold´, which will be used to filter target genes according to their regulatory potential scores. + - name: "--nichenet_max_n_target_genes_per_gp" + type: integer + default: 250 + description: | + Maximum number of target genes per gene program. If a gene program has more target genes than ´max_n_target_genes_per_gp´, only the ´max_n_target_genes_per_gp´ gene programs with the highest regulatory potential scores will be kept. + Default value is chosen based on MultiNicheNet specification (s. Browaeys, R. et al. MultiNicheNet: a flexible framework for differential cell-cell communication analysis from multi-sample multi-condition single-cell transcriptomics data. bioRxiv (2023) doi:10.1101/2023.06.13.544751). + + - name: Outputs + arguments: + - name: "--output" + type: file + direction: output + required: true + description: Path to the output gene program mask JSON file. + example: gp_mask.json + - name: "--output_omnipath_lr_network" + type: file + direction: output + required: false + description: Path to the output OmniPath ligand-receptor network CSV file. + example: omnipath_lr_network.csv + - name: "--output_nichenet_lr_network" + type: file + direction: output + required: false + description: Path to the output NicheNet ligand-receptor network CSV file. + example: nichenet_lr_network.csv + - name: "--output_nichenet_ligand_target_matrix" + type: file + direction: output + required: false + description: Path to the output NicheNet ligand-target gene regulatory potential matrix file. + example: nichenet_ligand_target_matrix.csv + - name: "--output_collectri_tf_network" + type: file + direction: output + required: false + description: Path to the output CollecTRI TF-target gene regulatory potential network CSV file. + example: collectri_tf_network.csv + - name: "--output_omnipath_gp_gene_count_distributions" + type: file + direction: output + required: false + description: Path to save the OmniPath gene program gene count distributions plot. + example: omnipath_gp_gene_count_distributions.svg + - name: "--output_nichenet_gp_gene_count_distributions" + type: file + direction: output + required: false + description: Path to save the NicheNet gene program gene count distributions plot. + example: nichenet_gp_gene_count_distributions.svg + - name: "--output_mebocost_gp_gene_count_distributions" + type: file + direction: output + required: false + description: Path to save the MeBocost gene program gene count distributions plot. + example: mebocost_gp_gene_count_distributions.svg + - name: "--output_collectri_tf_gp_gene_count_distributions" + type: file + direction: output + required: false + description: Path to save the CollecTRI TF gene program gene count distributions plot. + example: collectri_tf_gp_gene_count_distributions.svg + +resources: + - type: python_script + path: script.py + - path: /src/utils/setup_logger.py + +test_resources: + - type: python_script + path: test.py + - path: /resources_test/niche/ + +engines: +- type: docker + image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 + setup: + - type: apt + packages: + - libhdf5-dev + - python3-pip + - python3-dev + - python-is-python3 + - type: docker + run: | + pip install torch --index-url https://download.pytorch.org/whl/cu124 \ + && pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html + - type: python + packages: + - numpy<2 + - nichecompass + test_setup: + - type: python + __merge__: [ /src/base/requirements/viashpy.yaml, .] + +runners: +- type: executable +- type: nextflow + directives: + label: [lowcpu, lowmem, lowdisk] diff --git a/src/nichecompass/gene_program_mask/script.py b/src/nichecompass/gene_program_mask/script.py new file mode 100644 index 0000000..39f2626 --- /dev/null +++ b/src/nichecompass/gene_program_mask/script.py @@ -0,0 +1,207 @@ +import os +import sys +import shutil +import json + +from nichecompass.utils import ( + extract_gp_dict_from_mebocost_ms_interactions, + extract_gp_dict_from_nichenet_lrt_interactions, + extract_gp_dict_from_omnipath_lr_interactions, + filter_and_combine_gp_dict_gps_v2, + extract_gp_dict_from_collectri_tf_network, +) + + +## VIASH START +par = { + "species": "mouse", + "create_omnipath_gene_program_mask": True, + "create_nichenet_gene_program_mask": True, + "create_mebocost_gene_program_mask": True, + "create_collectri_tf_gene_program_mask": False, + # omnipath params + "input_gene_orthologs_mapping_file": "resources_test/niche/human_mouse_gene_orthologs.csv", + "omnipath_min_curation_effort": 2, + # nichenet params + "nichenet_version": "v2", + "nichenet_keep_target_genes_ratio": 1.0, + "nichenet_max_n_target_genes_per_gp": 250, + # mebocost_gene_program_mask + "input_metabolite_enzymes": "resources_test/niche/mouse_metabolite_enzymes.tsv", + "input_metabolite_sensors": "resources_test/niche/mouse_metabolite_sensors.tsv", + # filter and combine programs + "overlap_thresh_target_genes": 1.0, + # output paths + "output": "prior_knowledge_gene_program_mask.json", + "output_omnipath_lr_network": "omnipath_lr_network.csv", + "output_nichenet_lr_network": "nichenet_lr_network.csv", + "output_nichenet_ligand_target_matrix": "nichenet_ligand_target_matrix_v2_mouse.csv", + "output_collectri_tf_network": "collectri_tf_network.csv", + "output_omnipath_gp_gene_count_distributions": "omnipath_gp_gene_count_distributions.svg", + "output_nichenet_gp_gene_count_distributions": "nichenet_gp_gene_count_distributions.svg", + "output_mebocost_gp_gene_count_distributions": "mebocost_gp_gene_count_distributions.svg", + "output_collectri_tf_gp_gene_count_distributions": "collectri_tf_gp_gene_count_distributions.svg", +} + +meta = {"temp_dir": "tmp/", "resources_dir": "src/utils/"} +## VIASH END +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger + +logger = setup_logger() + +# Validate that inputs are provided correctly +if not any( + [ + par["create_omnipath_gene_program_mask"], + par["create_nichenet_gene_program_mask"], + par["create_mebocost_gene_program_mask"], + par["create_collectri_tf_gene_program_mask"], + ] +): + raise ValueError("At least one gene program mask must be set to True") +if ( + par["create_omnipath_gene_program_mask"] + and par["species"] == "mouse" + and not par["input_gene_orthologs_mapping_file"] +): + raise ValueError( + "For mouse species, a --input_gene_orthologs_mapping_file file must be provided for generating the omnipath mask." + ) +if ( + par["create_nichenet_gene_program_mask"] + and par["species"] == "mouse" + and not par["input_gene_orthologs_mapping_file"] +): + raise ValueError( + "For mouse species, a --input_gene_orthologs_mapping_file file must be provided for generating the nichenet mask." + ) +if par["create_mebocost_gene_program_mask"] and ( + not par["input_metabolite_enzymes"] or not par["input_metabolite_sensors"] +): + raise ValueError( + "For mebocost gene program mask, both --input_metabolite_enzymes and --input_metabolite_sensors files must be provided." + ) + +# Assemble gene program dictionaries +gp_dicts = [] + +if par["create_omnipath_gene_program_mask"]: + logger.info("Generating Omnipath gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_omnipath_gp_gene_count_distributions"] else False + ) + save_to_disk = True if par["output_omnipath_lr_network"] else False + + omnipath_gp_dict = extract_gp_dict_from_omnipath_lr_interactions( + species=par["species"], + min_curation_effort=par["omnipath_min_curation_effort"], + load_from_disk=False, + save_to_disk=True, + lr_network_file_path=par["output_omnipath_lr_network"], + gene_orthologs_mapping_file_path=par["input_gene_orthologs_mapping_file"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_omnipath_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(omnipath_gp_dict) + +if par["create_nichenet_gene_program_mask"]: + logger.info("Generating NicheNet gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_nichenet_gp_gene_count_distributions"] else False + ) + save_to_disk = ( + True + if ( + par["output_nichenet_lr_network"] + or par["output_nichenet_ligand_target_matrix"] + ) + else False + ) + + nichenet_gp_dict = extract_gp_dict_from_nichenet_lrt_interactions( + species=par["species"], + version=par["nichenet_version"], + keep_target_genes_ratio=par["nichenet_keep_target_genes_ratio"], + max_n_target_genes_per_gp=par["nichenet_max_n_target_genes_per_gp"], + load_from_disk=False, + save_to_disk=save_to_disk, + lr_network_file_path=par["output_nichenet_lr_network"], + ligand_target_matrix_file_path=par["output_nichenet_ligand_target_matrix"], + gene_orthologs_mapping_file_path=par["input_gene_orthologs_mapping_file"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_nichenet_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(nichenet_gp_dict) + +if par["create_mebocost_gene_program_mask"]: + logger.info("Generating MeBocost gene program mask...") + + shutil.copy2( + par["input_metabolite_enzymes"], + os.path.join(meta["temp_dir"], f"{par['species']}_metabolite_enzymes.tsv"), + ) + shutil.copy2( + par["input_metabolite_sensors"], + os.path.join(meta["temp_dir"], f"{par['species']}_metabolite_sensors.tsv"), + ) + plot_gp_gene_count_distributions = ( + True if par["output_mebocost_gp_gene_count_distributions"] else False + ) + + mebocost_gp_dict = extract_gp_dict_from_mebocost_ms_interactions( + dir_path=meta["temp_dir"], + species=par["species"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_mebocost_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(mebocost_gp_dict) + +if par["create_collectri_tf_gene_program_mask"]: + logger.info("Generating CollecTRI TF gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_collectri_tf_gp_gene_count_distributions"] else False + ) + save_to_disk = True if par["output_collectri_tf_network"] else False + + collectri_gp_dict = extract_gp_dict_from_collectri_tf_network( + species=par["species"], + save_to_disk=save_to_disk, + tf_network_file_path=par["output_collectri_tf_network"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_collectri_tf_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(collectri_gp_dict) + +# Filter and combine GPs +assert len(gp_dicts) > 0, "No gene program dictionaries were created." + +combined_gp_dict = filter_and_combine_gp_dict_gps_v2( + gp_dicts, + overlap_thresh_target_genes=par["overlap_thresh_target_genes"], + verbose=True, +) + +logger.info("Gene program mask generation completed.") +logger.info( + f"Number of gene programs after filtering and combining: {len(combined_gp_dict)}." +) + +logger.info(f"Saving combined gene program mask to: {par['output']}") +with open(par["output"], "w") as f: + json.dump(combined_gp_dict, f) diff --git a/src/nichecompass/gene_program_mask/test.py b/src/nichecompass/gene_program_mask/test.py new file mode 100644 index 0000000..3843582 --- /dev/null +++ b/src/nichecompass/gene_program_mask/test.py @@ -0,0 +1,132 @@ +import pytest + +import json + +## VIASH START +meta = { + "executable": "./target/executable/nichecompass/gene_program_mask/gene_program_mask", + "resources_dir": "./resources_test/niche/", +} +## VIASH END + +import sys + +sys.path.append(meta["resources_dir"]) + +ortholog_file = f"{meta['resources_dir']}/niche/human_mouse_gene_orthologs.csv" +enzymes_file = f"{meta['resources_dir']}/niche/mouse_metabolite_enzymes.tsv" +sensors_file = f"{meta['resources_dir']}/niche/mouse_metabolite_sensors.tsv" + + +def test_simple_execution(run_component, tmp_path): + output = tmp_path / "output.json" + + args = [ + "--input_gene_orthologs_mapping_file", + ortholog_file, + "--input_metabolite_enzymes", + enzymes_file, + "--input_metabolite_sensors", + sensors_file, + "--species", + "mouse", + "--output", + output, + ] + + run_component(args) + + # check files + assert output.is_file(), "Output file does not exist" + + # Read gene program mask + with open(output, "r") as f: + gp_mask = json.load(f) + + expected_gp_keys = [ + "igand_receptor_GP", + "ligand_receptor_target_gene_GP", + "metabolite_enzyme_sensor_GP", + "TF_target_genes_GP", + "combined_GP", + ] + matching_gp = [] + for key in expected_gp_keys: + assert any(key in gp for gp in gp_mask.keys()), ( + f"No gene programs containing '{key}' found" + ) + + gp = next(gp for gp in gp_mask.keys() if key in gp) + matching_gp.append(gp) + + for gp in matching_gp: + expected_keys = [ + "sources", + "targets", + "sources_categories", + "targets_categories", + ] + assert all([key in gp_mask[gp] for key in expected_keys]), ( + f"Gene program {gp} is missing expected keys" + ) + + +def test_outputs(run_component, tmp_path): + output = tmp_path / "output.json" + omnipath_lr = tmp_path / "omnipath_lr_network.tsv" + nichenet_lr = tmp_path / "nichenet_lr_network.tsv" + nichenet_lt = tmp_path / "nichenet_ligand_target_matrix.csv" + collectri_tf = tmp_path / "output_collectri_tf_network.csv" + omnipath_distr = tmp_path / "omnipath_distr.svg" + nichenet_distr = tmp_path / "nichenet_distr.svg" + mebocost_distr = tmp_path / "mebocost_distr.svg" + collectri_distr = tmp_path / "collectri_distr.svg" + + args = [ + "--input_gene_orthologs_mapping_file", + ortholog_file, + "--input_metabolite_enzymes", + enzymes_file, + "--input_metabolite_sensors", + sensors_file, + "--species", + "mouse", + "--output", + output, + "--output_omnipath_lr_network", + omnipath_lr, + "--output_nichenet_lr_network", + nichenet_lr, + "--output_nichenet_ligand_target_matrix", + nichenet_lt, + "--output_collectri_tf_network", + collectri_tf, + "--output_omnipath_gp_gene_count_distributions", + omnipath_distr, + "--output_nichenet_gp_gene_count_distributions", + nichenet_distr, + "--output_mebocost_gp_gene_count_distributions", + mebocost_distr, + "--output_collectri_tf_gp_gene_count_distributions", + collectri_distr, + ] + + run_component(args) + + expected_outputs = [ + omnipath_lr, + nichenet_lr, + nichenet_lt, + collectri_tf, + omnipath_distr, + nichenet_distr, + mebocost_distr, + collectri_distr, + ] + + for output in expected_outputs: + assert output.is_file(), f"Expected output file {output} does not exist" + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__])) diff --git a/src/nichecompass/nichecompass/config.vsh.yaml b/src/nichecompass/nichecompass/config.vsh.yaml index d085948..01e7e82 100644 --- a/src/nichecompass/nichecompass/config.vsh.yaml +++ b/src/nichecompass/nichecompass/config.vsh.yaml @@ -42,30 +42,6 @@ argument_groups: multiple: true description: "Keys of the adata.obs fields to use as covariates." - - name: "Spatial Neighbors Calculation" - arguments: - - name: "--coord_type" - type: string - choices: ["generic", "grid"] - description: | - Type of coordinate system. Valid options are: - `grid` - grid coordinates. - `generic` - generic coordinates. - If not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used. - - name: "--n_spatial_neighbors" - type: integer - default: 6 - description: | - Depending on `--coord_type`: - `grid` - number of neighboring tiles. - `generic` - number of neighborhoods for non-grid data. Only used when `--delaunay False`. - - name: "--delaunay" - type: boolean - default: false - description: | - Whether to use Delaunay triangulation to determine spatial neighborhood graph. - Only used when `--coord_type generic`. - - name: Gene Program Mask arguments: - name: "--min_genes_per_gp" @@ -416,9 +392,9 @@ argument_groups: default: nichecompass_active_gp_names description: | Key of the uns field where the active gene program names will be stored. - - name: "--output_uns_gene_index" + - name: "--output_uns_genes_index" type: string - default: nichecompass_gene_idx + default: nichecompass_genes_idx description: | Key of the uns field where the index of a concatenated vector of target and source genes that are in the gene program masks will be stored. - name: "--output_uns_target_genes_index" @@ -447,6 +423,7 @@ argument_groups: default: nichecompass_agg_weights description: | Key of the obsp field where the aggregation weights of the node label aggregator will be stored. + __merge__: [., /src/base/h5_compression_argument.yaml] resources: - type: python_script diff --git a/src/nichecompass/nichecompass/script.py b/src/nichecompass/nichecompass/script.py index 61974bf..7cfba48 100644 --- a/src/nichecompass/nichecompass/script.py +++ b/src/nichecompass/nichecompass/script.py @@ -9,7 +9,7 @@ from torch.cuda import is_available as cuda_is_available ## VIASH START par = { # Inputs - "input": "resources_test/cosmx/Lung5_Rep2_tiny.h5mu", + "input": "work/8c/d3f8f50ac967abac81eabcda42798e/_viash_par/input_1/merged.obsp_block_concatenation.output.h5mu", "modality": "rna", "layer": None, "input_gp_mask": "resources_test/niche/prior_knowledge_gp_mask.json", @@ -98,21 +98,19 @@ logger.info("GPU enabled? %s", use_gpu) ## Read in data adata = mu.read_h5ad(par["input"], mod=par["modality"]) -# ## Compute spatial neighbor graph -# logger.info("Computing spatial neighbor graph...") -# # Compute connectivities and distances -# sq.gr.spatial_neighbors( -# adata, -# coord_type=par["coord_type"], -# spatial_key=par["input_obsm_spatial_coords"], -# n_neighs=par["n_spatial_neighbors"], -# delaunay=par["delaunay"], -# ) - -# # Making the connectivity matrix symmetric -# adata.obsp["spatial_connectivities"] = adata.obsp["spatial_connectivities"].maximum( -# adata.obsp["spatial_connectivities"].T -# ) +# Counts need to be float32 to be processed by nichecompass model +# See https://discuss.pytorch.org/t/runtimeerror-mat1-and-mat2-must-have-the-same-dtype/166759 +counts_dtype = ( + adata.layers[par["layer"]].dtype if par["layer"] is not None else adata.X.dtype +) +if counts_dtype != "float32": + logger.info( + f"Converting count data to float32 from {counts_dtype} for model compatibility..." + ) + if par["layer"] is not None: + adata.layers[par["layer"]] = adata.layers[par["layer"]].astype("float32") + else: + adata.X = adata.X.astype("float32") ## Add GP mask to data logger.info("Adding prior knowledge gene program mask to data...") @@ -125,7 +123,7 @@ add_gps_from_gp_dict_to_adata( gp_targets_mask_key=par["output_varm_gp_targets_mask"], gp_sources_mask_key=par["output_varm_gp_sources_mask"], gp_names_key=par["output_uns_gp_names"], - genes_idx_key=par["output_uns_gene_index"], + genes_idx_key=par["output_uns_genes_index"], target_genes_idx_key=par["output_uns_target_genes_index"], source_genes_idx_key=par["output_uns_source_genes_index"], min_genes_per_gp=par["min_genes_per_gp"], @@ -148,12 +146,12 @@ model = NicheCompass( gp_sources_mask_key=par["output_varm_gp_sources_mask"], latent_key=par["output_obsm_embedding"], cat_covariates_keys=par["input_obs_covariates"], - cat_covariates_no_edges=par["covariates_edges"], + cat_covariates_no_edges=par["covariate_edges"], cat_covariates_embeds_keys=par["output_uns_covariate_embeddings"], - cat_covariates_embeds_injection_layers=par["covariate_embedding_injection_layers"], - gene_idx_key=par["output_uns_gene_index"], - target_gene_idx_key=par["output_uns_target_genes_index"], - source_gene_idx_key=par["output_uns_source_genes_index"], + cat_covariates_embeds_injection=par["covariate_embedding_injection_layers"], + genes_idx_key=par["output_uns_genes_index"], + target_genes_idx_key=par["output_uns_target_genes_index"], + source_genes_idx_key=par["output_uns_source_genes_index"], recon_adj_key=par["output_obsp_reconstructed_adj_edge_proba"], agg_weights_key=par["output_obsp_agg_weights"], include_edge_recon_loss=par["include_edge_recon_loss"], @@ -174,7 +172,6 @@ model = NicheCompass( encoder_use_bn=par["encoder_use_bn"], dropout_rate_encoder=par["dropout_rate_encoder"], dropout_rate_graph_decoder=par["dropout_rate_graph_decoder"], - cat_covariates_cats=par["cat_covariates_cats"], n_addon_gp=par["n_addon_gp"], cat_covariates_embeds_nums=par["cat_covariates_embeds_nums"], seed=par["random_state"], @@ -212,6 +209,6 @@ model.train( ## Save model and data logger.info("Saving NicheCompass model and data...") mdata = mu.MuData({par["modality"]: adata}) -mdata.write_h5mu(par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) model.save(par["output_model"], save_adata=False) diff --git a/src/nichecompass/nichecompass/test.py b/src/nichecompass/nichecompass/test.py index 995cc0f..6bed03f 100644 --- a/src/nichecompass/nichecompass/test.py +++ b/src/nichecompass/nichecompass/test.py @@ -1,5 +1,6 @@ import pytest import mudata as mu +import sys ## VIASH START meta = { @@ -24,13 +25,16 @@ def test_simple_execution_xenium(run_component, tmp_path): gp_mask, "--n_epochs", "1", - "n_epochs_all_gps", + "--n_epochs_all_gps", "0", - "n_epochs_no_edge_recon", + "--n_epochs_no_edge_recon", "0", - "n_epochs_no_cat_covariates_contrastive", - "0--output", + "--n_epochs_no_cat_covariates_contrastive", + "0", + "--output", str(output), + "--output_model", + "test_model", "--output_compression", "gzip", ] @@ -50,17 +54,15 @@ def test_simple_execution_xenium(run_component, tmp_path): "nichecompass_gp_names", "nichecompass_active_gp_names", ] - assert all([uns in expected_uns_keys for uns in adata.uns.keys()]) + assert all([uns in adata.uns.keys() for uns in expected_uns_keys]), ( + f"Expected uns keys: {expected_uns_keys}, found: {list(adata.uns.keys())}" + ) assert len(adata.uns["nichecompass_gp_names"]) > len( adata.uns["nichecompass_active_gp_names"] ), "Expected less active GP names than total GP names" - assert adata.uns["nichecompass_genes_idx"] == ( - adata.uns["nichecompass_source_genes_idx"] - + adata.uns["nichecompass_target_genes_idx"] - ), "Expected genes idx to be union of source and target genes idx" expected_obsm_keys = ["nichecompass_latent"] - assert all([obsm in expected_obsm_keys for obsm in adata.obsm.keys()]), ( + assert all([obsm in adata.obsm.keys() for obsm in expected_obsm_keys]), ( "Not all expected obsm keys found" ) assert all(adata.obsm[obsm].dtype.kind == "f" for obsm in expected_obsm_keys), ( @@ -73,7 +75,7 @@ def test_simple_execution_xenium(run_component, tmp_path): "nichecompass_gp_sources_categories", "nichecompass_gp_targets_categories", ] - assert all([varm in expected_varm_keys for varm in adata.varm.keys()]), ( + assert all([varm in adata.varm.keys() for varm in expected_varm_keys]), ( "Not all expected varm keys found" ) assert ( @@ -83,4 +85,4 @@ def test_simple_execution_xenium(run_component, tmp_path): if __name__ == "__main__": - pytest.main([__file__]) + sys.exit(pytest.main([__file__])) diff --git a/src/workflows/niche/nichecompass_leiden/config.vsh.yaml b/src/workflows/niche/nichecompass_leiden/config.vsh.yaml new file mode 100644 index 0000000..b8a255c --- /dev/null +++ b/src/workflows/niche/nichecompass_leiden/config.vsh.yaml @@ -0,0 +1,371 @@ +name: "nichecompass_leiden" +namespace: "workflows/niche" +scope: "public" +description: "A pipeline to compute the spatial neighborhood graph, perform nichecompass embedding followed by Leiden clustering." +authors: + - __merge__: /src/authors/dorien_roosen.yaml + roles: [ author, maintainer ] + - __merge__: /src/authors/weiwei_schultz.yaml + roles: [ contributor ] +info: + test_dependencies: + - name: nichecompass_leiden_test + namespace: test_workflows/niche +argument_groups: + - name: Inputs + arguments: + - name: "--id" + required: true + type: string + description: ID of the sample. + example: foo + - name: "--input" + alternatives: [-i] + description: Path to the sample. + required: true + example: input.h5mu + type: file + - name: "--input_gp_mask" + type: file + required: true + description: | + JSON file containing a nested dictionary containing the gene programs, + with keys being gene program names and values being dictionaries with keys `targets` and `sources`, + where `targets` contains a list of the names of genes in the gene program for the reconstruction of the gene expression of the node itself (receiving node) + and `sources` contains a list of the names of genes in the gene program for the reconstruction of the gene expression of the node's neighbors (transmitting nodes). + example: prior_knowledge_gp_mask.json + - name: "--modality" + description: Which modality to process. + type: string + default: "rna" + required: false + - name: "--layer" + description: "Use specified layer for calculation of qc metrics. If not specified, adata.X is used." + type: string + example: "raw_counts" + required: false + - name: "--input_obs_covariates" + type: string + multiple: true + default: ["sample_id"] + description: "Keys of the adata.obs fields to use as covariates." + - name: "--input_obsm_spatial_coords" + type: string + default: "spatial" + description: "Key in adata.obsm where spatial coordinates are stored" + + - name: "Sample ID options" + description: | + Options for adding the id to .obs on the MuData object. Having a sample + id present in a requirement of several components for this pipeline. + arguments: + - name: "--include_sample_as_covariate" + description: | + Whether to include the sample information as a categorical covariate for the + NicheCompass model. + type: boolean + default: true + - name: "--add_id_to_obs" + description: "Add the value passed with --id to .obs." + type: boolean + default: true + - name: --add_id_obs_output + description: | + .Obs column to add the sample IDs to. Required and only used when + --add_id_to_obs is set to 'true' + type: string + default: "sample_id" + - name: "--add_id_make_observation_keys_unique" + type: boolean + description: | + Join the id to the .obs index (.obs_names). + Only used when --add_id_to_obs is set to 'true'. + default: true + + - name: "Spatial Neighbors Calculation" + description: | + Options for the calculation of the spatial neighborhood graph. + arguments: + - name: "--coord_type" + type: string + choices: ["generic", "grid"] + description: | + Type of coordinate system provided by `--input_obsm_spatial_coords`. Valid options are: + `grid` - grid coordinates. + `generic` - generic coordinates. + If not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used. + - name: "--n_spatial_neighbors" + type: integer + default: 6 + description: | + Depending on `--coord_type`: + `grid` - number of neighboring tiles. + `generic` - number of neighborhoods for non-grid data. Only used when `--delaunay False`. + - name: "--delaunay" + type: boolean + default: false + description: | + Whether to use Delaunay triangulation to determine spatial neighborhood graph. + Only used when `--coord_type generic`. + + - name: Gene Program Mask + description: Options for filtering gene programs based on the number of genes available in the data. + arguments: + - name: "--min_genes_per_gp" + type: integer + default: 1 + min: 0 + description: | + Minimum number of genes in a gene program inluding both target and source genes that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded. + - name: "--min_source_genes_per_gp" + type: integer + default: 0 + min: 0 + description: | + Minimum number of source genes in a gene program that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded. + - name: "--min_target_genes_per_gp" + type: integer + default: 0 + min: 0 + description: | + Minimum number of target genes in a gene program that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded. + - name: "--max_genes_per_gp" + type: integer + min: 1 + description: | + Maximum number of genes in a gene program inluding both target and source genes that can be available in the input data (gene expression has been probed) for a gene program not to be discarded. + - name: "--max_source_genes_per_gp" + type: integer + min: 1 + description: | + Maximum number of source genes in a gene program that can be available in the input data (gene expression has been probed) for a gene program not to be discarded. + - name: "--max_target_genes_per_gp" + type: integer + min: 1 + description: | + Maximum number of target genes in a gene program that can be available in the input data (gene expression has been probed) for a gene program not to be discarded. + - name: "--filter_genes_not_in_masks" + type: boolean_true + description: | + Whether to remove the genes that are not in the gp masks from the input data. + + - name: NicheCompass Model Architecture + description: Options for the NicheCompass model architecture. + arguments: + - name: "--covariate_edges" + type: boolean + multiple: true + description: | + List of booleans that indicate whether there can be edges between different categories of the categorical covariates. + If this is `True` for a specific categorical covariate, this covariate will be excluded from the edge reconstruction loss. + Needs to match the length and order of `--input_obs_covariates`. + - name: "--gene_expr_recon_dist" + type: string + choices: ["nb", "zinb"] + default: "nb" + description: | + The distribution used for gene expression reconstruction. + If `nb`, uses a negative binomial distribution. + If `zinb`, uses a zero-inflated negative binomial distribution. + - name: "--log_variational" + type: boolean + default: true + description: | + Whether to transform x by log(x+1) prior to encoding for numerical stability (not for normalization). + - name: "--node_label_method" + type: string + choices: ["one-hop-norm", "two-hop-norm", "one-hop-attention"] + default: "one-hop-norm" + description: | + Node label method that will be used for omics reconstruction. + If `one-hop-sum`, uses a concatenation of the node's input features with the sum of the input features of all nodes in the node's one-hop neighborhood. + If `one-hop-norm`, uses a concatenation of the node's input features with the node's one-hop neighbors input features normalized as per Kipf, T. N. & Welling, M. Semi-Supervised Classification with Graph Convolutional Networks. arXiv [cs.LG] (2016). + If `one-hop-attention`, uses a concatenation of the node's input features with the node's one-hop neighbors input features weighted by an attention mechanism. + - name: "--active_gp_thresh_ratio" + type: double + default: 0.1 + min: 0.0 + max: 1.0 + description: | + Ratio that determines which gene programs are considered active and are used in the latent representation after model training. + All inactive gene programs will be dropped during model training after a determined number of epochs. + Aggregations of the absolute values of the gene weights of the gene expression decoder per gene program are calculated. + The maximum value, i.e. the value of the gene program with the highest aggregated value will be used as a benchmark and all gene programs whose aggregated value is smaller than `--active_gp_thresh_ratio` times this maximum value will be set to inactive. + If set to 0, all gene programs will be considered active. + - name: "--active_gp_type" + type: string + choices: ["mixed", "separate"] + default: "separate" + description: | + Type to determine active gene programs. + Can be `mixed`, in which case active gene programs are determined across prior and add-on gene programs jointly, + or `separate` in which case they are determined separately for prior and add-on gene programs. + - name: "--n_addon_gp" + type: integer + default: 100 + min: 0 + description: | + Number of addon gene programs (i.e. gene programs that are not included in masks but can be learned de novo). + - name: "--cat_covariates_embeds_nums" + type: integer + multiple: true + description: | + Number of embedding nodes for all categorical covariates. + Must be the same length as `--input_obs_covariates`. + - name: "--random_state" + default: 0 + type: integer + min: 0 + description: | + Random seed for reproducibility. + + - name: NicheCompass Training Parameters + description: Options for training the NicheCompass model. + arguments: + - name: "--n_epochs" + type: integer + min: 1 + default: 100 + description: Number of training epochs + - name: "--n_epochs_all_gps" + type: integer + min: 0 + default: 25 + description: | + Number of epochs during which all gene programs are used for model training. + After that only active gene programs are retained. + - name: "--n_epochs_no_edge_recon" + type: integer + default: 0 + min: 0 + description: | + Number of epochs during which the edge reconstruction loss is excluded from backpropagation for pretraining using the other loss components. + - name: "--n_epochs_no_cat_covariates_contrastive" + type: integer + default: 5 + min: 0 + description: | + Number of epochs during which the categorical covariates contrastive loss is excluded from backpropagation for pretraining using the other loss components. + - name: "--lr" + type: double + default: 0.001 + min: 0.0 + max: 1.0 + description: Learning rate + - name: "--weight_decay" + type: double + default: 0.001 + description: Weight decay (L2 penalty). + - name: "--edge_val_ratio" + type: double + default: 0.1 + min: 0.0 + max: 1.0 + description: | + Fraction of the data that is used as validation set on edge-level. The rest of the data will be used as training set on edge-level. + - name: "--node_val_ratio" + type: double + default: 0.1 + min: 0.0 + max: 1.0 + description: | + Fraction of the data that is used as validation set on node-level. The rest of the data will be used as training set on node-level. + - name: "--edge_batch_size" + type: integer + min: 1 + default: 256 + description: | + Batch size for the edge-level dataloaders. + - name: "--node_batch_size" + type: integer + min: 1 + description: | + Batch size for the node-level dataloaders. + If not provided, is automatically determined based on `--edge_batch_size`. + - name: "--n_sampled_neighbors" + type: integer + default: -1 + min: -1 + description: | + Number of neighbors that are sampled during model training from the spatial neighborhood graph. + If set to -1, all direct neighbors are included. + + - name: Clustering options + arguments: + - name: "--obs_cluster" + type: string + description: | + Prefix for the .obs keys under which to add the cluster labels. Newly created columns in .obs will + be created from the specified value for '--obs_cluster' suffixed with an underscore and one of the resolutions + resolutions specified in '--leiden_resolution'. + default: "nichecompass_leiden" + - name: "--leiden_resolution" + type: double + description: Control the coarseness of the clustering. Higher values lead to more clusters. + default: [1] + multiple: true + + - name: Umap options + arguments: + - name: "--obsm_umap" + type: string + default: "X_leiden_nichecompass_umap" + required: false + description: "In which .obsm slot to store the resulting UMAP embedding." + + - name: Neighbour calculation + arguments: + - name: "--uns_neighbors" + type: string + default: nichecompass_neighbors + description: In which .uns slot to store various neighbor output objects. + - name: "--obsp_neighbor_distances" + type: string + default: "nichecompass_distances" + description: "In which .obsp slot to store the distance matrix between the resulting neighbors." + - name: "--obsp_neighbor_connectivities" + type: string + default: "nichecompass_connectivities" + description: "In which .obsp slot to store the connectivities matrix between the resulting neighbors." + + - name: "Outputs" + arguments: + - name: "--output" + type: file + required: true + direction: output + description: Destination path to the output. + example: output.h5mu + - name: "--output_model" + type: file + required: true + direction: output + description: Directory to save the trained NicheCompass model. + - name: "--output_obsm_embedding" + type: string + default: nichecompass_latent + description: | + Key of the obsm field where the latent / gene program representation of active gene programs will be stored after NicheCompass model training. + +dependencies: + - name: dataflow/obsp_block_concatenation + - name: neighbors/spatial_neighborhood_graph + - name: nichecompass/nichecompass + - name: metadata/add_id + repository: openpipeline + - name: workflows/multiomics/neighbors_leiden_umap + repository: openpipeline + +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf +test_resources: + - type: nextflow_script + path: test.nf + entrypoint: test_wf + - path: /resources_test/xenium/xenium_tiny.h5mu + - path: /resources_test/cosmx/Lung5_Rep2_tiny.h5mu + - path: /resources_test/niche/prior_knowledge_gp_mask.json +runners: + - type: nextflow diff --git a/src/workflows/niche/nichecompass_leiden/integration_test.sh b/src/workflows/niche/nichecompass_leiden/integration_test.sh new file mode 100755 index 0000000..8bdaaf1 --- /dev/null +++ b/src/workflows/niche/nichecompass_leiden/integration_test.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# get the root of the directory +REPO_ROOT=$(git rev-parse --show-toplevel) + +# ensure that the command below is run from the root of the repository +cd "$REPO_ROOT" + +nextflow \ + run . \ + -main-script src/workflows/niche/nichecompass_leiden/test.nf \ + -entry test_wf \ + -resume \ + -profile docker,no_publish \ + -c src/workflows/utils/labels_ci.config \ + -c src/workflows/utils/integration_tests.config diff --git a/src/workflows/niche/nichecompass_leiden/main.nf b/src/workflows/niche/nichecompass_leiden/main.nf new file mode 100644 index 0000000..f37cc83 --- /dev/null +++ b/src/workflows/niche/nichecompass_leiden/main.nf @@ -0,0 +1,162 @@ +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + [id, state + [ + workflow_output: state.output, + _meta: [join_id: id] + ]] + } + // If requested, add the id of the events (samples) to a column in .obs. + // Also allows to make .obs_names (the .obs index) unique, by prefixing the values with an unique id per .h5mu file. + // The latter is usefull to avoid duplicate observations during concatenation. + | add_id.run( + filter: {id, state -> state.add_id_to_obs }, + fromState: {id, state -> + def newState = [ + "input": state.input, + "input_id": id, + "make_observation_keys_unique": state.add_id_make_observation_keys_unique, + "obs_output": state.add_id_obs_output, + "add_id_to_obs": state.add_id_to_obs + ] + newState + }, + toState: {id, output, state -> + def keysToRemove = ["add_id_to_obs", "add_id_obs_output", "add_id_make_observation_keys_unique"] + def newState = state.findAll{it.key !in keysToRemove} + newState + ["input": output.output] + } + ) + + | spatial_neighborhood_graph.run( + fromState: {id, state -> [ + "input": state.input, + "modality": state.modality, + "layer": state.layer, + "input_obsm_spatial_coords": state.input_obsm_spatial_coords, + "coord_type": state.coord_type, + "n_spatial_neighbors": state.n_spatial_neighbors, + "delaunay": state.delaunay + ]}, + toState: {id, output, state -> + def keysToRemove = ["input_obsm_spatial_coords", "coord_type", "n_spatial_neighbors", "delaunay"] + def newState = state.findAll{it.key !in keysToRemove} + newState + ["input": output.output] + } + ) + + | joinStates { ids, states -> + def newId = "merged" + // gather keys with unique values across states that should be combined + def new_state_non_unique_values = [ + input: states.collect{it.input}, + input_id: ids, + _meta: [join_id: ids[0]] + ] + // gather keys from different states + def all_state_keys = states.inject([].toSet()){ current_keys, state -> + def new_keys = current_keys + state.keySet() + return new_keys + }.minus(["output", "id", "input", "_meta"]) + // Create the new state from the keys, values should be the same across samples + def new_state = all_state_keys.inject([:]){ old_state, argument_name -> + argument_values = states.collect{it.get(argument_name)}.unique() + assert argument_values.size() == 1, "Arguments should be the same across samples. Argument name: $argument_name, \ + argument value: $argument_values" + // take the unique value from the set (there is only one) + def argument_value + argument_values.each { argument_value = it } + def current_state = old_state + [(argument_name): argument_value] + return current_state + } + def data_state = new_state_non_unique_values + new_state + [ newId, data_state ] + } + + | obsp_block_concatenation.run( + fromState: { id, state -> [ + "input": state.input, + "modality": state.modality, + "input_id": state.input_id + ]}, + toState: {id, output, state -> + def keysToRemove = ["input_id"] + def newState = state.findAll{it.key !in keysToRemove} + newState + ["input": output.output] + } + ) + + | nichecompass.run( + fromState: {id, state -> [ + "input": state.input, + "input_gp_mask": state.input_gp_mask, + "input_obs_covariates": state.input_obs_covariates, + "modality": state.modality, + "layer": state.layer, + "min_genes_per_gp": state.min_genes_per_gp, + "min_source_genes_per_gp": state.min_source_genes_per_gp, + "min_target_genes_per_gp": state.min_target_genes_per_gp, + "max_genes_per_gp": state.max_genes_per_gp, + "max_source_genes_per_gp": state.max_source_genes_per_gp, + "max_target_genes_per_gp": state.max_target_genes_per_gp, + "filter_genes_not_in_masks": state.filter_genes_not_in_masks, + "covariate_edges": state.covariate_edges, + "gene_expr_recon_distribution": state.gene_expr_recon_dist, + "log_variational": state.log_variational, + "node_label_method": state.node_label_method, + "active_gp_thresh_ratio": state.active_gp_thresh_ratio, + "active_gp_type": state.active_gp_type, + "n_addon_gp": state.n_addon_gp, + "cat_covariates_embeds_nums": state.cat_covariates_embeds_nums, + "random_state": state.random_state, + "n_epochs": state.n_epochs, + "n_epochs_all_gps": state.n_epochs_all_gps, + "n_epochs_no_edge_recon": state.n_epochs_no_edge_recon, + "n_epochs_no_cat_covariates_contrastive_loss": state.n_epochs_no_cat_covariates_contrastive_loss, + "lr": state.lr, + "weight_decay": state.weight_decay, + "edge_val_ratio": state.edge_val_ratio, + "node_val_ratio": state.node_val_ratio, + "edge_batch_size": state.edge_batch_size, + "node_batch_size": state.node_batch_size, + "n_sampled_neighbors": state.n_sampled_neighbors, + "output_obsm_embedding": state.output_obsm_embedding, + "output_model": state.output_model + ]}, + args: [ + "input_obsm_spatial_connectivities": "spatial_connectivities" + ], + toState: [ + "input": "output", + "output_model": "output_model" + ] + ) + + | neighbors_leiden_umap.run( + fromState: { id, state -> [ + "input": state.input, + "modality": state.modality, + "obsm_input": state.output_obsm_embedding, + "output": state.workflow_output, + "uns_neighbors": state.uns_neighbors, + "obsp_neighbor_distances": state.obsp_neighbor_distances, + "obsp_neighbor_connectivities": state.obsp_neighbor_connectivities, + "leiden_resolution": state.leiden_resolution, + "obs_cluster": state.obs_cluster, + "obsm_umap": state.obsm_umap, + ]}, + toState: ["output": "output"] + ) + + | setState(["output": "output", "output_model": "output_model", "_meta": "_meta"]) + + | view() + + emit: + output_ch +} \ No newline at end of file diff --git a/src/workflows/niche/nichecompass_leiden/nextflow.config b/src/workflows/niche/nichecompass_leiden/nextflow.config new file mode 100644 index 0000000..8108bc2 --- /dev/null +++ b/src/workflows/niche/nichecompass_leiden/nextflow.config @@ -0,0 +1,10 @@ +manifest { + nextflowVersion = '!>=20.12.1-edge' +} + +params { + rootDir = java.nio.file.Paths.get("$projectDir/../../../../").toAbsolutePath().normalize().toString() +} + +// include common settings +includeConfig("${params.rootDir}/src/workflows/utils/labels.config") \ No newline at end of file diff --git a/src/workflows/niche/nichecompass_leiden/test.nf b/src/workflows/niche/nichecompass_leiden/test.nf new file mode 100644 index 0000000..ccb0578 --- /dev/null +++ b/src/workflows/niche/nichecompass_leiden/test.nf @@ -0,0 +1,70 @@ +nextflow.enable.dsl=2 + +include { nichecompass_leiden } from params.rootDir + "/target/nextflow/workflows/niche/nichecompass_leiden/main.nf" +include { nichecompass_leiden_test } from params.rootDir + "/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf" + +params.resources_test = params.rootDir + "/resources_test" + +workflow test_wf { + + resources_test = file(params.resources_test) + + output_ch = + Channel.fromList([ + [ + id: "xenium", + input: resources_test.resolve("xenium/xenium_tiny.h5mu"), + input_gp_mask: resources_test.resolve("niche/prior_knowledge_gp_mask.json"), + n_epochs: 1, + n_epochs_all_gps: 0, + n_epochs_no_edge_recon: 0, + n_epochs_no_cat_covariates_contrastive_loss: 0, + output_model: "simple_execution_test_model" + ], + [ + id: "cosmx", + input: resources_test.resolve("cosmx/Lung5_Rep2_tiny.h5mu"), + input_gp_mask: resources_test.resolve("niche/prior_knowledge_gp_mask.json"), + n_epochs: 1, + n_epochs_all_gps: 0, + n_epochs_no_edge_recon: 0, + n_epochs_no_cat_covariates_contrastive_loss: 0, + output_model: "simple_execution_test_model" + ] + ]) + | map { state -> [state.id, state] } + | nichecompass_leiden.run( + toState: { id, output, state -> output + [og_input: state.input] } + ) + + | view { output -> + assert output.size() == 2 : "Outputs should contain two elements; [id, state]" + + // check id + def id = output[0] + assert id == "merged" + + // check output + def state = output[1] + assert state instanceof Map : "State should be a map. Found: ${state}" + assert state.containsKey("output") : "Output should contain key 'output'." + assert state.output.isFile() : "'output' should be a file." + assert state.output.toString().endsWith(".h5mu") : "Output file should end with '.h5mu'. Found: ${state.output}" + + // check model_output + assert state.containsKey("output_model") : "Output should contain key 'output_model'." + assert state.output_model.isDirectory() : "'output_model' should be a directory." + assert state.output_model.toString().endsWith("_model") : "Model output directory should end with '_model'. Found: ${state.output_model}" + + "Output: $output" + } + | nichecompass_leiden_test.run( + fromState: [ + "input": "output" + ] + ) + | toSortedList({a, b -> a[0] <=> b[0]}) + | map { output_list -> + assert output_list.size() == 1 : "output channel should contain 1 events" + } +} \ No newline at end of file diff --git a/src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml b/src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml new file mode 100644 index 0000000..0d13e8f --- /dev/null +++ b/src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml @@ -0,0 +1,25 @@ +name: "nichecompass_leiden_test" +namespace: "test_workflows/niche" +scope: "test" +description: "This component tests the output of nichecompass leiden workflow." +authors: + - __merge__: /src/authors/dorien_roosen.yaml +argument_groups: + - name: Inputs + arguments: + - name: "--input" + type: file + required: true + description: Path to h5mu output. + example: foo.h5mu +resources: + - type: python_script + path: script.py + - path: /src/utils/setup_logger.py +engines: + - type: docker + image: python:3.12-slim + __merge__: /src/base/requirements/testworkflows_setup.yaml +runners: + - type: executable + - type: nextflow diff --git a/src/workflows/test_workflows/niche/nichecompass_leiden/script.py b/src/workflows/test_workflows/niche/nichecompass_leiden/script.py new file mode 100644 index 0000000..5265a33 --- /dev/null +++ b/src/workflows/test_workflows/niche/nichecompass_leiden/script.py @@ -0,0 +1,64 @@ +from mudata import read_h5mu +import sys +import pytest + +##VIASH START +par = {"input": "nichecompass_leiden/output.h5mu"} + +meta = {"resources_dir": "resources_test"} +##VIASH END + + +def test_run(): + input_mudata = read_h5mu(par["input"]) + + expected_mod = ["rna"] + expected_obsm = ["X_leiden_nichecompass_umap", "nichecompass_latent"] + expected_obs = ["sample_id", "nichecompass_leiden_1.0"] + expected_obsp = [ + "spatial_distances", + "spatial_connectivities", + "nichecompass_connectivities", + "nichecompass_distances" + ] + expected_varm = [ + "nichecompass_gp_sources", + "nichecompass_gp_targets", + "nichecompass_gp_sources_categories", + "nichecompass_gp_targets_categories" + ] + expected_uns = [ + "nichecompass_sources_categories_label_encoder", + "nichecompass_targets_categories_label_encoder", + "nichecompass_source_genes_idx", + "nichecompass_target_genes_idx", + "nichecompass_genes_idx", + "nichecompass_gp_names", + "nichecompass_active_gp_names", + "nichecompass_neighbors", + "spatial", + "xenium_spatial_neighbors" + ] + + assert all(key in list(input_mudata.mod) for key in expected_mod), ( + f"Input modalities should be: {expected_mod}, found: {input_mudata.mod.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obsm) for key in expected_obsm), ( + f"Input mod['rna'] obsm columns should be: {expected_obsm}, found: {input_mudata.mod['rna'].obsm.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obs) for key in expected_obs), ( + f"Input mod['rna'] obs columns should be: {expected_obs}, found: {input_mudata.mod['rna'].obs.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obsp) for key in expected_obsp), ( + f"Input mod['rna'] obsp columns should be: {expected_obsp}, found: {input_mudata.mod['rna'].obsp.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].uns) for key in expected_uns), ( + f"Input mod['rna'] uns columns should be: {expected_uns}, found: {input_mudata.mod['rna'].uns.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].varm) for key in expected_varm), ( + f"Input mod['rna'] varm columns should be: {expected_varm}, found: {input_mudata.mod['rna'].varm.keys()}." + ) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "--import-mode=importlib"])) diff --git a/target/_private/executable/filter/subset_cosmx/.config.vsh.yaml b/target/_private/executable/filter/subset_cosmx/.config.vsh.yaml index 65f0f75..d605686 100644 --- a/target/_private/executable/filter/subset_cosmx/.config.vsh.yaml +++ b/target/_private/executable/filter/subset_cosmx/.config.vsh.yaml @@ -228,7 +228,7 @@ build_info: output: "target/_private/executable/filter/subset_cosmx" executable: "target/_private/executable/filter/subset_cosmx/subset_cosmx" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/_private/executable/filter/subset_cosmx/subset_cosmx b/target/_private/executable/filter/subset_cosmx/subset_cosmx index 1e9cf12..8dde96f 100755 --- a/target/_private/executable/filter/subset_cosmx/subset_cosmx +++ b/target/_private/executable/filter/subset_cosmx/subset_cosmx @@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component filter subset_cosmx" -LABEL org.opencontainers.image.created="2025-12-08T20:39:05Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:07Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/_private/nextflow/filter/subset_cosmx/.config.vsh.yaml b/target/_private/nextflow/filter/subset_cosmx/.config.vsh.yaml index 86828d9..b05e35d 100644 --- a/target/_private/nextflow/filter/subset_cosmx/.config.vsh.yaml +++ b/target/_private/nextflow/filter/subset_cosmx/.config.vsh.yaml @@ -228,7 +228,7 @@ build_info: output: "target/_private/nextflow/filter/subset_cosmx" executable: "target/_private/nextflow/filter/subset_cosmx/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/_private/nextflow/filter/subset_cosmx/main.nf b/target/_private/nextflow/filter/subset_cosmx/main.nf index 04dd822..2634a2f 100644 --- a/target/_private/nextflow/filter/subset_cosmx/main.nf +++ b/target/_private/nextflow/filter/subset_cosmx/main.nf @@ -3334,7 +3334,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/_private/nextflow/filter/subset_cosmx", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/_test/executable/test_workflows/niche/nichecompass_leiden_test/.config.vsh.yaml b/target/_test/executable/test_workflows/niche/nichecompass_leiden_test/.config.vsh.yaml new file mode 100644 index 0000000..01a6376 --- /dev/null +++ b/target/_test/executable/test_workflows/niche/nichecompass_leiden_test/.config.vsh.yaml @@ -0,0 +1,185 @@ +name: "nichecompass_leiden_test" +namespace: "test_workflows/niche" +version: "niche-compass" +authors: +- name: "Dorien Roosen" + info: + role: "Core Team Member" + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Path to h5mu output." + info: null + example: + - "foo.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "setup_logger.py" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "This component tests the output of nichecompass leiden workflow." +info: null +status: "enabled" +scope: + image: "test" + target: "test" +repositories: +- type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" +links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:3.12-slim" + target_registry: "images.viash-hub.com" + target_tag: "niche-compass" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "anndata~=0.11.1" + - "mudata~=0.3.1" + - "viashpy==0.9.0" + github: + - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" + script: + - "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\ + nelse: exit(1)\")" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/_test/executable/test_workflows/niche/nichecompass_leiden_test" + executable: "target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nichecompass_leiden_test" + viash_version: "0.9.4" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" + git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" +package_config: + name: "openpipeline_spatial" + version: "niche-compass" + info: + test_resources: + - type: "s3" + path: "s3://openpipelines-bio/openpipeline_spatial/resources_test" + dest: "resources_test" + repositories: + - type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'niche-compass'" + organization: "vsh" + links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" diff --git a/target/executable/spatial_neighborhood_graph/neighbors/nextflow_labels.config b/target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nextflow_labels.config similarity index 100% rename from target/executable/spatial_neighborhood_graph/neighbors/nextflow_labels.config rename to target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nextflow_labels.config diff --git a/target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nichecompass_leiden_test b/target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nichecompass_leiden_test new file mode 100755 index 0000000..0d4d8e3 --- /dev/null +++ b/target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nichecompass_leiden_test @@ -0,0 +1,1134 @@ +#!/usr/bin/env bash + +# nichecompass_leiden_test niche-compass +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dorien Roosen + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="nichecompass_leiden_test" +VIASH_META_FUNCTIONALITY_NAME="nichecompass_leiden_test" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM python:3.12-slim +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y procps git && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "anndata~=0.11.1" "mudata~=0.3.1" "viashpy==0.9.0" && \ + pip install --upgrade --no-cache-dir "git+https://github.com/openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" && \ + python -c 'exec("try:\n import awkward\nexcept ModuleNotFoundError:\n exit(0)\nelse: exit(1)")' + +LABEL org.opencontainers.image.authors="Dorien Roosen" +LABEL org.opencontainers.image.description="Companion container for running component test_workflows/niche nichecompass_leiden_test" +LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" +LABEL org.opencontainers.image.version="niche-compass" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "nichecompass_leiden_test niche-compass" + echo "" + echo "This component tests the output of nichecompass leiden workflow." + echo "" + echo "Inputs:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example: foo.h5mu" + echo " Path to h5mu output." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "nichecompass_leiden_test niche-compass" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/openpipeline_spatial/test_workflows/niche/nichecompass_leiden_test:niche-compass' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-nichecompass_leiden_test-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +from mudata import read_h5mu +import sys +import pytest + +##VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +dep = { + +} + +##VIASH END + + +def test_run(): + input_mudata = read_h5mu(par["input"]) + + expected_mod = ["rna"] + expected_obsm = ["X_leiden_nichecompass_umap", "nichecompass_latent"] + expected_obs = ["sample_id", "nichecompass_leiden_1.0"] + expected_obsp = [ + "spatial_distances", + "spatial_connectivities", + "nichecompass_connectivities", + "nichecompass_distances" + ] + expected_varm = [ + "nichecompass_gp_sources", + "nichecompass_gp_targets", + "nichecompass_gp_sources_categories", + "nichecompass_gp_targets_categories" + ] + expected_uns = [ + "nichecompass_sources_categories_label_encoder", + "nichecompass_targets_categories_label_encoder", + "nichecompass_source_genes_idx", + "nichecompass_target_genes_idx", + "nichecompass_genes_idx", + "nichecompass_gp_names", + "nichecompass_active_gp_names", + "nichecompass_neighbors", + "spatial", + "xenium_spatial_neighbors" + ] + + assert all(key in list(input_mudata.mod) for key in expected_mod), ( + f"Input modalities should be: {expected_mod}, found: {input_mudata.mod.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obsm) for key in expected_obsm), ( + f"Input mod['rna'] obsm columns should be: {expected_obsm}, found: {input_mudata.mod['rna'].obsm.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obs) for key in expected_obs), ( + f"Input mod['rna'] obs columns should be: {expected_obs}, found: {input_mudata.mod['rna'].obs.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obsp) for key in expected_obsp), ( + f"Input mod['rna'] obsp columns should be: {expected_obsp}, found: {input_mudata.mod['rna'].obsp.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].uns) for key in expected_uns), ( + f"Input mod['rna'] uns columns should be: {expected_uns}, found: {input_mudata.mod['rna'].uns.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].varm) for key in expected_varm), ( + f"Input mod['rna'] varm columns should be: {expected_varm}, found: {input_mudata.mod['rna'].varm.keys()}." + ) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "--import-mode=importlib"])) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +exit 0 diff --git a/target/executable/spatial_neighborhood_graph/neighbors/setup_logger.py b/target/_test/executable/test_workflows/niche/nichecompass_leiden_test/setup_logger.py similarity index 100% rename from target/executable/spatial_neighborhood_graph/neighbors/setup_logger.py rename to target/_test/executable/test_workflows/niche/nichecompass_leiden_test/setup_logger.py diff --git a/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/.config.vsh.yaml b/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/.config.vsh.yaml new file mode 100644 index 0000000..a9e4b7d --- /dev/null +++ b/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/.config.vsh.yaml @@ -0,0 +1,185 @@ +name: "nichecompass_leiden_test" +namespace: "test_workflows/niche" +version: "niche-compass" +authors: +- name: "Dorien Roosen" + info: + role: "Core Team Member" + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input" + description: "Path to h5mu output." + info: null + example: + - "foo.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "setup_logger.py" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "This component tests the output of nichecompass leiden workflow." +info: null +status: "enabled" +scope: + image: "test" + target: "test" +repositories: +- type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" +links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "python:3.12-slim" + target_registry: "images.viash-hub.com" + target_tag: "niche-compass" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "procps" + - "git" + interactive: false + - type: "python" + user: false + packages: + - "anndata~=0.11.1" + - "mudata~=0.3.1" + - "viashpy==0.9.0" + github: + - "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" + script: + - "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\ + nelse: exit(1)\")" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test" + executable: "target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf" + viash_version: "0.9.4" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" + git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" +package_config: + name: "openpipeline_spatial" + version: "niche-compass" + info: + test_resources: + - type: "s3" + path: "s3://openpipelines-bio/openpipeline_spatial/resources_test" + dest: "resources_test" + repositories: + - type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'niche-compass'" + organization: "vsh" + links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" diff --git a/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf b/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf new file mode 100644 index 0000000..1f38b51 --- /dev/null +++ b/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf @@ -0,0 +1,3877 @@ +// nichecompass_leiden_test niche-compass +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dorien Roosen + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "nichecompass_leiden_test", + "namespace" : "test_workflows/niche", + "version" : "niche-compass", + "authors" : [ + { + "name" : "Dorien Roosen", + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input", + "description" : "Path to h5mu output.", + "example" : [ + "foo.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/utils/setup_logger.py" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "This component tests the output of nichecompass leiden workflow.", + "status" : "enabled", + "scope" : { + "image" : "test", + "target" : "test" + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "openpipeline", + "repo" : "openpipeline", + "tag" : "v3.0.0" + } + ], + "links" : { + "repository" : "https://github.com/openpipelines-bio/openpipeline_spatial", + "docker_registry" : "ghcr.io" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "python:3.12-slim", + "target_registry" : "images.viash-hub.com", + "target_tag" : "niche-compass", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "procps", + "git" + ], + "interactive" : false + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "anndata~=0.11.1", + "mudata~=0.3.1", + "viashpy==0.9.0" + ], + "github" : [ + "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils" + ], + "script" : [ + "exec(\\"try:\\\\n import awkward\\\\nexcept ModuleNotFoundError:\\\\n exit(0)\\\\nelse: exit(1)\\")" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "/workdir/root/repo/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test", + "viash_version" : "0.9.4", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" + }, + "package_config" : { + "name" : "openpipeline_spatial", + "version" : "niche-compass", + "info" : { + "test_resources" : [ + { + "type" : "s3", + "path" : "s3://openpipelines-bio/openpipeline_spatial/resources_test", + "dest" : "resources_test" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "openpipeline", + "repo" : "openpipeline", + "tag" : "v3.0.0" + } + ], + "viash_version" : "0.9.4", + "source" : "/workdir/root/repo/src", + "target" : "/workdir/root/repo/target", + "config_mods" : [ + ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'niche-compass'" + ], + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/openpipelines-bio/openpipeline_spatial", + "docker_registry" : "ghcr.io" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.py" +cat > "$tempscript" << VIASHMAIN +from mudata import read_h5mu +import sys +import pytest + +##VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +dep = { + +} + +##VIASH END + + +def test_run(): + input_mudata = read_h5mu(par["input"]) + + expected_mod = ["rna"] + expected_obsm = ["X_leiden_nichecompass_umap", "nichecompass_latent"] + expected_obs = ["sample_id", "nichecompass_leiden_1.0"] + expected_obsp = [ + "spatial_distances", + "spatial_connectivities", + "nichecompass_connectivities", + "nichecompass_distances" + ] + expected_varm = [ + "nichecompass_gp_sources", + "nichecompass_gp_targets", + "nichecompass_gp_sources_categories", + "nichecompass_gp_targets_categories" + ] + expected_uns = [ + "nichecompass_sources_categories_label_encoder", + "nichecompass_targets_categories_label_encoder", + "nichecompass_source_genes_idx", + "nichecompass_target_genes_idx", + "nichecompass_genes_idx", + "nichecompass_gp_names", + "nichecompass_active_gp_names", + "nichecompass_neighbors", + "spatial", + "xenium_spatial_neighbors" + ] + + assert all(key in list(input_mudata.mod) for key in expected_mod), ( + f"Input modalities should be: {expected_mod}, found: {input_mudata.mod.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obsm) for key in expected_obsm), ( + f"Input mod['rna'] obsm columns should be: {expected_obsm}, found: {input_mudata.mod['rna'].obsm.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obs) for key in expected_obs), ( + f"Input mod['rna'] obs columns should be: {expected_obs}, found: {input_mudata.mod['rna'].obs.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].obsp) for key in expected_obsp), ( + f"Input mod['rna'] obsp columns should be: {expected_obsp}, found: {input_mudata.mod['rna'].obsp.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].uns) for key in expected_uns), ( + f"Input mod['rna'] uns columns should be: {expected_uns}, found: {input_mudata.mod['rna'].uns.keys()}." + ) + assert all(key in list(input_mudata.mod["rna"].varm) for key in expected_varm), ( + f"Input mod['rna'] varm columns should be: {expected_varm}, found: {input_mudata.mod['rna'].varm.keys()}." + ) + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "--import-mode=importlib"])) +VIASHMAIN +python -B "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/openpipeline_spatial/test_workflows/niche/nichecompass_leiden_test", + "tag" : "niche-compass" + }, + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/nextflow.config b/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/nextflow.config new file mode 100644 index 0000000..6c56735 --- /dev/null +++ b/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'test_workflows/niche/nichecompass_leiden_test' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'niche-compass' + description = 'This component tests the output of nichecompass leiden workflow.' + author = 'Dorien Roosen' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/spatial_neighborhood_graph/neighbors/nextflow_labels.config b/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/nextflow_labels.config similarity index 100% rename from target/nextflow/spatial_neighborhood_graph/neighbors/nextflow_labels.config rename to target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/nextflow_labels.config diff --git a/target/nextflow/spatial_neighborhood_graph/neighbors/setup_logger.py b/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/setup_logger.py similarity index 100% rename from target/nextflow/spatial_neighborhood_graph/neighbors/setup_logger.py rename to target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/setup_logger.py diff --git a/target/executable/convert/from_cells2stats_to_h5mu/.config.vsh.yaml b/target/executable/convert/from_cells2stats_to_h5mu/.config.vsh.yaml index 48711db..d301ebc 100644 --- a/target/executable/convert/from_cells2stats_to_h5mu/.config.vsh.yaml +++ b/target/executable/convert/from_cells2stats_to_h5mu/.config.vsh.yaml @@ -301,7 +301,7 @@ build_info: output: "target/executable/convert/from_cells2stats_to_h5mu" executable: "target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu b/target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu index 7a732d0..ca727b0 100755 --- a/target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu +++ b/target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu @@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_cells2stats_to_h5mu" -LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/convert/from_cosmx_to_h5mu/.config.vsh.yaml b/target/executable/convert/from_cosmx_to_h5mu/.config.vsh.yaml index c1803a2..ca8ce9f 100644 --- a/target/executable/convert/from_cosmx_to_h5mu/.config.vsh.yaml +++ b/target/executable/convert/from_cosmx_to_h5mu/.config.vsh.yaml @@ -238,7 +238,7 @@ build_info: output: "target/executable/convert/from_cosmx_to_h5mu" executable: "target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu b/target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu index 954bce8..1c685e2 100755 --- a/target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu +++ b/target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu @@ -460,9 +460,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component convert from_cosmx_to_h5mu" -LABEL org.opencontainers.image.created="2025-12-08T20:39:07Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml b/target/executable/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml index b4cad01..dbdd8ee 100644 --- a/target/executable/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml +++ b/target/executable/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml @@ -234,7 +234,7 @@ build_info: output: "target/executable/convert/from_cosmx_to_spatialexperiment" executable: "target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment b/target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment index 5ad0a23..3ee6c23 100755 --- a/target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment +++ b/target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment @@ -457,9 +457,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_cosmx_to_spatialexperiment" -LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml b/target/executable/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml index f232995..8b538af 100644 --- a/target/executable/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml +++ b/target/executable/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml @@ -224,7 +224,7 @@ build_info: output: "target/executable/convert/from_h5mu_to_spatialexperiment" executable: "target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment b/target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment index 8b7c23d..8356b23 100755 --- a/target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment +++ b/target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment @@ -458,9 +458,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TR LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_h5mu_to_spatialexperiment" -LABEL org.opencontainers.image.created="2025-12-08T20:39:07Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/convert/from_spatialdata_to_h5mu/.config.vsh.yaml b/target/executable/convert/from_spatialdata_to_h5mu/.config.vsh.yaml index e9d88d4..8a3ee54 100644 --- a/target/executable/convert/from_spatialdata_to_h5mu/.config.vsh.yaml +++ b/target/executable/convert/from_spatialdata_to_h5mu/.config.vsh.yaml @@ -221,7 +221,7 @@ build_info: output: "target/executable/convert/from_spatialdata_to_h5mu" executable: "target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu b/target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu index d41c143..f8d0215 100755 --- a/target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu +++ b/target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu @@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component convert from_spatialdata_to_h5mu" -LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:07Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/convert/from_xenium_to_h5mu/.config.vsh.yaml b/target/executable/convert/from_xenium_to_h5mu/.config.vsh.yaml index 88a8148..d8a93e7 100644 --- a/target/executable/convert/from_xenium_to_h5mu/.config.vsh.yaml +++ b/target/executable/convert/from_xenium_to_h5mu/.config.vsh.yaml @@ -244,7 +244,7 @@ build_info: output: "target/executable/convert/from_xenium_to_h5mu" executable: "target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu b/target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu index 8fc3ebb..32af287 100755 --- a/target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu +++ b/target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu @@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_h5mu" -LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/convert/from_xenium_to_spatialdata/.config.vsh.yaml b/target/executable/convert/from_xenium_to_spatialdata/.config.vsh.yaml index 7e329de..25971f3 100644 --- a/target/executable/convert/from_xenium_to_spatialdata/.config.vsh.yaml +++ b/target/executable/convert/from_xenium_to_spatialdata/.config.vsh.yaml @@ -326,7 +326,7 @@ build_info: output: "target/executable/convert/from_xenium_to_spatialdata" executable: "target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata b/target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata index abde1f5..44c7e6b 100755 --- a/target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata +++ b/target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata @@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz" LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_spatialdata" -LABEL org.opencontainers.image.created="2025-12-08T20:39:05Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml b/target/executable/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml index 4d0584e..344d43d 100644 --- a/target/executable/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml +++ b/target/executable/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml @@ -224,7 +224,7 @@ build_info: output: "target/executable/convert/from_xenium_to_spatialexperiment" executable: "target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment b/target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment index d8b5425..04d24c9 100755 --- a/target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment +++ b/target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment @@ -457,9 +457,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_spatialexperiment" -LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/dataflow/obsp_block_concatenation/.config.vsh.yaml b/target/executable/dataflow/obsp_block_concatenation/.config.vsh.yaml index 26b7e74..d7589a6 100644 --- a/target/executable/dataflow/obsp_block_concatenation/.config.vsh.yaml +++ b/target/executable/dataflow/obsp_block_concatenation/.config.vsh.yaml @@ -296,7 +296,7 @@ build_info: output: "target/executable/dataflow/obsp_block_concatenation" executable: "target/executable/dataflow/obsp_block_concatenation/obsp_block_concatenation" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/dataflow/obsp_block_concatenation/obsp_block_concatenation b/target/executable/dataflow/obsp_block_concatenation/obsp_block_concatenation index 71c0bc9..7939c8d 100755 --- a/target/executable/dataflow/obsp_block_concatenation/obsp_block_concatenation +++ b/target/executable/dataflow/obsp_block_concatenation/obsp_block_concatenation @@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dries Schaumont, Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component dataflow obsp_block_concatenation" -LABEL org.opencontainers.image.created="2025-12-08T20:39:07Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/mapping/spaceranger_count/.config.vsh.yaml b/target/executable/mapping/spaceranger_count/.config.vsh.yaml index 08340f1..4b97972 100644 --- a/target/executable/mapping/spaceranger_count/.config.vsh.yaml +++ b/target/executable/mapping/spaceranger_count/.config.vsh.yaml @@ -426,7 +426,7 @@ build_info: output: "target/executable/mapping/spaceranger_count" executable: "target/executable/mapping/spaceranger_count/spaceranger_count" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/mapping/spaceranger_count/spaceranger_count b/target/executable/mapping/spaceranger_count/spaceranger_count index df713aa..1bbe694 100755 --- a/target/executable/mapping/spaceranger_count/spaceranger_count +++ b/target/executable/mapping/spaceranger_count/spaceranger_count @@ -453,9 +453,9 @@ apt upgrade -y && apt install -y procps && rm -rf /var/lib/apt/lists/* LABEL org.opencontainers.image.authors="Jakub Majercik" LABEL org.opencontainers.image.description="Companion container for running component mapping spaceranger_count" -LABEL org.opencontainers.image.created="2025-12-08T20:39:07Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER diff --git a/target/executable/spatial_neighborhood_graph/neighbors/.config.vsh.yaml b/target/executable/neighbors/spatial_neighborhood_graph/.config.vsh.yaml similarity index 96% rename from target/executable/spatial_neighborhood_graph/neighbors/.config.vsh.yaml rename to target/executable/neighbors/spatial_neighborhood_graph/.config.vsh.yaml index f52145a..43810f2 100644 --- a/target/executable/spatial_neighborhood_graph/neighbors/.config.vsh.yaml +++ b/target/executable/neighbors/spatial_neighborhood_graph/.config.vsh.yaml @@ -1,5 +1,5 @@ -name: "neighbors" -namespace: "spatial_neighborhood_graph" +name: "spatial_neighborhood_graph" +namespace: "neighbors" version: "niche-compass" authors: - name: "Dorien Roosen" @@ -269,10 +269,10 @@ build_info: config: "src/neighbors/spatial_neighborhood_graph/config.vsh.yaml" runner: "executable" engine: "docker|native" - output: "target/executable/spatial_neighborhood_graph/neighbors" - executable: "target/executable/spatial_neighborhood_graph/neighbors/neighbors" + output: "target/executable/neighbors/spatial_neighborhood_graph" + executable: "target/executable/neighbors/spatial_neighborhood_graph/spatial_neighborhood_graph" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/neighbors/spatial_neighborhood_graph/nextflow_labels.config b/target/executable/neighbors/spatial_neighborhood_graph/nextflow_labels.config new file mode 100644 index 0000000..541aaad --- /dev/null +++ b/target/executable/neighbors/spatial_neighborhood_graph/nextflow_labels.config @@ -0,0 +1,68 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // CPU resources + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 10 } + withLabel: highcpu { cpus = 20 } + + // Memory resources + withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } } + + // Disk space + // Nextflow apparently can't handle empty directives, i.e. + // withLabel: lowdisk {} + // so for that reason we have to add a dummy directive + withLabel: lowdisk { + dummyDirective = "dummyValue" + } + withLabel: middisk { + dummyDirective = "dummyValue" + } + withLabel: highdisk { + dummyDirective = "dummyValue" + } + withLabel: veryhighdisk { + dummyDirective = "dummyValue" + } + // NOTE: The above labels intentionally do not have an effect by default. + // The user should set the disk space requirements by adding the following + // to the compute environment: + // + // withLabel: lowdisk { disk = { 20.GB * task.attempt } } + // withLabel: middisk { disk = { 100.GB * task.attempt } } + // withLabel: highdisk { disk = { 200.GB * task.attempt } } + // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} diff --git a/target/executable/neighbors/spatial_neighborhood_graph/setup_logger.py b/target/executable/neighbors/spatial_neighborhood_graph/setup_logger.py new file mode 100644 index 0000000..3ca1cdb --- /dev/null +++ b/target/executable/neighbors/spatial_neighborhood_graph/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger diff --git a/target/executable/spatial_neighborhood_graph/neighbors/neighbors b/target/executable/neighbors/spatial_neighborhood_graph/spatial_neighborhood_graph similarity index 98% rename from target/executable/spatial_neighborhood_graph/neighbors/neighbors rename to target/executable/neighbors/spatial_neighborhood_graph/spatial_neighborhood_graph index 14b3545..05ced42 100755 --- a/target/executable/spatial_neighborhood_graph/neighbors/neighbors +++ b/target/executable/neighbors/spatial_neighborhood_graph/spatial_neighborhood_graph @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# neighbors niche-compass +# spatial_neighborhood_graph niche-compass # # This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative # work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data @@ -165,8 +165,8 @@ VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` # define meta fields -VIASH_META_NAME="neighbors" -VIASH_META_FUNCTIONALITY_NAME="neighbors" +VIASH_META_NAME="spatial_neighborhood_graph" +VIASH_META_FUNCTIONALITY_NAME="spatial_neighborhood_graph" VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" VIASH_META_TEMP_DIR="$VIASH_TEMP" @@ -457,10 +457,10 @@ RUN pip install --upgrade pip && \ python -c 'exec("try:\n import awkward\nexcept ModuleNotFoundError:\n exit(0)\nelse: exit(1)")' LABEL org.opencontainers.image.authors="Dorien Roosen" -LABEL org.opencontainers.image.description="Companion container for running component spatial_neighborhood_graph neighbors" -LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z" +LABEL org.opencontainers.image.description="Companion container for running component neighbors spatial_neighborhood_graph" +LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER @@ -578,7 +578,7 @@ VIASH_DOCKER_RUN_ARGS=(-i --rm) # ViashHelp: Display helpful explanation about this executable function ViashHelp { - echo "neighbors niche-compass" + echo "spatial_neighborhood_graph niche-compass" echo "" echo "Calculates a spatial neighborhood graph." echo "" @@ -687,7 +687,7 @@ while [[ $# -gt 0 ]]; do shift 1 ;; --version) - echo "neighbors niche-compass" + echo "spatial_neighborhood_graph niche-compass" exit ;; --input) @@ -877,7 +877,7 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then # determine docker image id if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/openpipeline_spatial/spatial_neighborhood_graph/neighbors:niche-compass' + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/openpipeline_spatial/neighbors/spatial_neighborhood_graph:niche-compass' fi # print dockerfile @@ -1207,7 +1207,7 @@ fi ViashDebug "Running command: $(echo $VIASH_CMD)" cat << VIASHEOF | eval $VIASH_CMD set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-neighbors-XXXXXX").py +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-spatial_neighborhood_graph-XXXXXX").py function clean_up { rm "\$tempscript" } diff --git a/target/executable/nichecompass/gene_program_mask/.config.vsh.yaml b/target/executable/nichecompass/gene_program_mask/.config.vsh.yaml new file mode 100644 index 0000000..ec0be43 --- /dev/null +++ b/target/executable/nichecompass/gene_program_mask/.config.vsh.yaml @@ -0,0 +1,469 @@ +name: "gene_program_mask" +namespace: "nichecompass" +version: "niche-compass" +authors: +- name: "Dorien Roosen" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input_gene_orthologs_mapping_file" + description: "Path to a CSV file mapping human genes to mouse orthologs.\nRequired\ + \ for the OmniPath and NicheNet masks if `--species mouse`.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_metabolite_enzymes" + description: "Path to the MeBocost metabolite-enzymes TSV file.\nRequired for\ + \ generating the MeBocost gene program mask.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_metabolite_sensors" + description: "Path to the MeBocost metabolite-sensors TSV file.\nRequired for\ + \ generating the MeBocost gene program mask.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Parameters" + arguments: + - type: "string" + name: "--species" + description: "Species of the organism (human or mouse)." + info: null + default: + - "human" + required: false + choices: + - "human" + - "mouse" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--create_omnipath_gene_program_mask" + description: "Whether to create the OmniPath gene program mask." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--create_nichenet_gene_program_mask" + description: "Whether to create the NicheNet gene program mask." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--create_mebocost_gene_program_mask" + description: "Whether to create the MeBocost gene program mask." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--create_collectri_tf_gene_program_mask" + description: "Whether to create the CollecTRI TF gene program mask." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--overlap_thresh_target_genes" + description: "The minimum ratio of target genes that need to overlap between a\ + \ GP without source genes and another GP for the GP to be dropped.\nGene programs\ + \ with different source genes are never combined or dropped.\n" + info: null + default: + - 1.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Omnipath Parameters" + arguments: + - type: "integer" + name: "--omnipath_min_curation_effort" + description: "Minimum number of times an interaction has to be described in a\ + \ paper and mentioned in a database to be included in the OmniPath gene programs." + info: null + default: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "NicheNet Parameters" + arguments: + - type: "string" + name: "--nichenet_version" + description: "Version of the NicheNet ligand receptor network and ligand target\ + \ gene regulatory potential matrix.\n´v2´ is an improved version of ´v1´, and\ + \ has separate files for mouse and human.\n" + info: null + default: + - "v2" + required: false + choices: + - "v1" + - "v2" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--nichenet_keep_target_genes_ratio" + description: "Ratio of target genes that are kept compared to total target genes.\n\ + This ratio is applied over the entire matrix (not on gene program level), and\ + \ determines the ´all_gps_score_keep_threshold´, which will be used to filter\ + \ target genes according to their regulatory potential scores.\n" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--nichenet_max_n_target_genes_per_gp" + description: "Maximum number of target genes per gene program. If a gene program\ + \ has more target genes than ´max_n_target_genes_per_gp´, only the ´max_n_target_genes_per_gp´\ + \ gene programs with the highest regulatory potential scores will be kept.\n\ + Default value is chosen based on MultiNicheNet specification (s. Browaeys, R.\ + \ et al. MultiNicheNet: a flexible framework for differential cell-cell communication\ + \ analysis from multi-sample multi-condition single-cell transcriptomics data.\ + \ bioRxiv (2023) doi:10.1101/2023.06.13.544751).\n" + info: null + default: + - 250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Path to the output gene program mask JSON file." + info: null + example: + - "gp_mask.json" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_omnipath_lr_network" + description: "Path to the output OmniPath ligand-receptor network CSV file." + info: null + example: + - "omnipath_lr_network.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_nichenet_lr_network" + description: "Path to the output NicheNet ligand-receptor network CSV file." + info: null + example: + - "nichenet_lr_network.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_nichenet_ligand_target_matrix" + description: "Path to the output NicheNet ligand-target gene regulatory potential\ + \ matrix file." + info: null + example: + - "nichenet_ligand_target_matrix.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_collectri_tf_network" + description: "Path to the output CollecTRI TF-target gene regulatory potential\ + \ network CSV file." + info: null + example: + - "collectri_tf_network.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_omnipath_gp_gene_count_distributions" + description: "Path to save the OmniPath gene program gene count distributions\ + \ plot." + info: null + example: + - "omnipath_gp_gene_count_distributions.svg" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_nichenet_gp_gene_count_distributions" + description: "Path to save the NicheNet gene program gene count distributions\ + \ plot." + info: null + example: + - "nichenet_gp_gene_count_distributions.svg" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_mebocost_gp_gene_count_distributions" + description: "Path to save the MeBocost gene program gene count distributions\ + \ plot." + info: null + example: + - "mebocost_gp_gene_count_distributions.svg" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_collectri_tf_gp_gene_count_distributions" + description: "Path to save the CollecTRI TF gene program gene count distributions\ + \ plot." + info: null + example: + - "collectri_tf_gp_gene_count_distributions.svg" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "setup_logger.py" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Generation of a prior knowledge gene program mask for NicheCompass." +test_resources: +- type: "python_script" + path: "test.py" + is_executable: true +- type: "file" + path: "niche" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +repositories: +- type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" +links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowcpu" + - "lowmem" + - "lowdisk" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04" + target_registry: "images.viash-hub.com" + target_tag: "niche-compass" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "python3-pip" + - "python3-dev" + - "python-is-python3" + interactive: false + - type: "docker" + run: + - "pip install torch --index-url https://download.pytorch.org/whl/cu124 \\\n&&\ + \ pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html\ + \ \n" + - type: "python" + user: false + packages: + - "numpy<2" + - "nichecompass" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.9.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/nichecompass/gene_program_mask/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "target/executable/nichecompass/gene_program_mask" + executable: "target/executable/nichecompass/gene_program_mask/gene_program_mask" + viash_version: "0.9.4" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" + git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" +package_config: + name: "openpipeline_spatial" + version: "niche-compass" + info: + test_resources: + - type: "s3" + path: "s3://openpipelines-bio/openpipeline_spatial/resources_test" + dest: "resources_test" + repositories: + - type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'niche-compass'" + organization: "vsh" + links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" diff --git a/target/executable/nichecompass/gene_program_mask/gene_program_mask b/target/executable/nichecompass/gene_program_mask/gene_program_mask new file mode 100755 index 0000000..1d72d9e --- /dev/null +++ b/target/executable/nichecompass/gene_program_mask/gene_program_mask @@ -0,0 +1,1930 @@ +#!/usr/bin/env bash + +# gene_program_mask niche-compass +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. +# +# Component authors: +# * Dorien Roosen (maintainer) + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="gene_program_mask" +VIASH_META_FUNCTIONALITY_NAME="gene_program_mask" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 +ENTRYPOINT [] +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y libhdf5-dev python3-pip python3-dev python-is-python3 && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install torch --index-url https://download.pytorch.org/whl/cu124 \ +&& pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "numpy<2" "nichecompass" + +LABEL org.opencontainers.image.authors="Dorien Roosen" +LABEL org.opencontainers.image.description="Companion container for running component nichecompass gene_program_mask" +LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z" +LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" +LABEL org.opencontainers.image.version="niche-compass" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "gene_program_mask niche-compass" + echo "" + echo "Generation of a prior knowledge gene program mask for NicheCompass." + echo "" + echo "Inputs:" + echo " --input_gene_orthologs_mapping_file" + echo " type: file, file must exist" + echo " Path to a CSV file mapping human genes to mouse orthologs." + echo " Required for the OmniPath and NicheNet masks if \`--species mouse\`." + echo "" + echo " --input_metabolite_enzymes" + echo " type: file, file must exist" + echo " Path to the MeBocost metabolite-enzymes TSV file." + echo " Required for generating the MeBocost gene program mask." + echo "" + echo " --input_metabolite_sensors" + echo " type: file, file must exist" + echo " Path to the MeBocost metabolite-sensors TSV file." + echo " Required for generating the MeBocost gene program mask." + echo "" + echo "Parameters:" + echo " --species" + echo " type: string" + echo " default: human" + echo " choices: [ human, mouse ]" + echo " Species of the organism (human or mouse)." + echo "" + echo " --create_omnipath_gene_program_mask" + echo " type: boolean" + echo " default: true" + echo " Whether to create the OmniPath gene program mask." + echo "" + echo " --create_nichenet_gene_program_mask" + echo " type: boolean" + echo " default: true" + echo " Whether to create the NicheNet gene program mask." + echo "" + echo " --create_mebocost_gene_program_mask" + echo " type: boolean" + echo " default: true" + echo " Whether to create the MeBocost gene program mask." + echo "" + echo " --create_collectri_tf_gene_program_mask" + echo " type: boolean" + echo " default: true" + echo " Whether to create the CollecTRI TF gene program mask." + echo "" + echo " --overlap_thresh_target_genes" + echo " type: double" + echo " default: 1.0" + echo " min: 0.0" + echo " max: 1.0" + echo " The minimum ratio of target genes that need to overlap between a GP" + echo " without source genes and another GP for the GP to be dropped." + echo " Gene programs with different source genes are never combined or dropped." + echo "" + echo "Omnipath Parameters:" + echo " --omnipath_min_curation_effort" + echo " type: integer" + echo " default: 2" + echo " Minimum number of times an interaction has to be described in a paper" + echo " and mentioned in a database to be included in the OmniPath gene" + echo " programs." + echo "" + echo "NicheNet Parameters:" + echo " --nichenet_version" + echo " type: string" + echo " default: v2" + echo " choices: [ v1, v2 ]" + echo " Version of the NicheNet ligand receptor network and ligand target gene" + echo " regulatory potential matrix." + echo " ´v2´ is an improved version of ´v1´, and has separate files for mouse" + echo " and human." + echo "" + echo " --nichenet_keep_target_genes_ratio" + echo " type: double" + echo " default: 1.0" + echo " Ratio of target genes that are kept compared to total target genes." + echo " This ratio is applied over the entire matrix (not on gene program" + echo " level), and determines the ´all_gps_score_keep_threshold´, which will be" + echo " used to filter target genes according to their regulatory potential" + echo " scores." + echo "" + echo " --nichenet_max_n_target_genes_per_gp" + echo " type: integer" + echo " default: 250" + echo " Maximum number of target genes per gene program. If a gene program has" + echo " more target genes than ´max_n_target_genes_per_gp´, only the" + echo " ´max_n_target_genes_per_gp´ gene programs with the highest regulatory" + echo " potential scores will be kept." + echo " Default value is chosen based on MultiNicheNet specification (s." + echo " Browaeys, R. et al. MultiNicheNet: a flexible framework for differential" + echo " cell-cell communication analysis from multi-sample multi-condition" + echo " single-cell transcriptomics data. bioRxiv (2023)" + echo " doi:10.1101/2023.06.13.544751)." + echo "" + echo "Outputs:" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example: gp_mask.json" + echo " Path to the output gene program mask JSON file." + echo "" + echo " --output_omnipath_lr_network" + echo " type: file, output, file must exist" + echo " example: omnipath_lr_network.csv" + echo " Path to the output OmniPath ligand-receptor network CSV file." + echo "" + echo " --output_nichenet_lr_network" + echo " type: file, output, file must exist" + echo " example: nichenet_lr_network.csv" + echo " Path to the output NicheNet ligand-receptor network CSV file." + echo "" + echo " --output_nichenet_ligand_target_matrix" + echo " type: file, output, file must exist" + echo " example: nichenet_ligand_target_matrix.csv" + echo " Path to the output NicheNet ligand-target gene regulatory potential" + echo " matrix file." + echo "" + echo " --output_collectri_tf_network" + echo " type: file, output, file must exist" + echo " example: collectri_tf_network.csv" + echo " Path to the output CollecTRI TF-target gene regulatory potential network" + echo " CSV file." + echo "" + echo " --output_omnipath_gp_gene_count_distributions" + echo " type: file, output, file must exist" + echo " example: omnipath_gp_gene_count_distributions.svg" + echo " Path to save the OmniPath gene program gene count distributions plot." + echo "" + echo " --output_nichenet_gp_gene_count_distributions" + echo " type: file, output, file must exist" + echo " example: nichenet_gp_gene_count_distributions.svg" + echo " Path to save the NicheNet gene program gene count distributions plot." + echo "" + echo " --output_mebocost_gp_gene_count_distributions" + echo " type: file, output, file must exist" + echo " example: mebocost_gp_gene_count_distributions.svg" + echo " Path to save the MeBocost gene program gene count distributions plot." + echo "" + echo " --output_collectri_tf_gp_gene_count_distributions" + echo " type: file, output, file must exist" + echo " example: collectri_tf_gp_gene_count_distributions.svg" + echo " Path to save the CollecTRI TF gene program gene count distributions" + echo " plot." + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "gene_program_mask niche-compass" + exit + ;; + --input_gene_orthologs_mapping_file) + [ -n "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE" ] && ViashError Bad arguments for option \'--input_gene_orthologs_mapping_file\': \'$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_gene_orthologs_mapping_file. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_gene_orthologs_mapping_file=*) + [ -n "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE" ] && ViashError Bad arguments for option \'--input_gene_orthologs_mapping_file=*\': \'$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_metabolite_enzymes) + [ -n "$VIASH_PAR_INPUT_METABOLITE_ENZYMES" ] && ViashError Bad arguments for option \'--input_metabolite_enzymes\': \'$VIASH_PAR_INPUT_METABOLITE_ENZYMES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_METABOLITE_ENZYMES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_metabolite_enzymes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_metabolite_enzymes=*) + [ -n "$VIASH_PAR_INPUT_METABOLITE_ENZYMES" ] && ViashError Bad arguments for option \'--input_metabolite_enzymes=*\': \'$VIASH_PAR_INPUT_METABOLITE_ENZYMES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_METABOLITE_ENZYMES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --input_metabolite_sensors) + [ -n "$VIASH_PAR_INPUT_METABOLITE_SENSORS" ] && ViashError Bad arguments for option \'--input_metabolite_sensors\': \'$VIASH_PAR_INPUT_METABOLITE_SENSORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_METABOLITE_SENSORS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input_metabolite_sensors. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input_metabolite_sensors=*) + [ -n "$VIASH_PAR_INPUT_METABOLITE_SENSORS" ] && ViashError Bad arguments for option \'--input_metabolite_sensors=*\': \'$VIASH_PAR_INPUT_METABOLITE_SENSORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT_METABOLITE_SENSORS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --species) + [ -n "$VIASH_PAR_SPECIES" ] && ViashError Bad arguments for option \'--species\': \'$VIASH_PAR_SPECIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPECIES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --species. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --species=*) + [ -n "$VIASH_PAR_SPECIES" ] && ViashError Bad arguments for option \'--species=*\': \'$VIASH_PAR_SPECIES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SPECIES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --create_omnipath_gene_program_mask) + [ -n "$VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK" ] && ViashError Bad arguments for option \'--create_omnipath_gene_program_mask\': \'$VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --create_omnipath_gene_program_mask. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --create_omnipath_gene_program_mask=*) + [ -n "$VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK" ] && ViashError Bad arguments for option \'--create_omnipath_gene_program_mask=*\': \'$VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK=$(ViashRemoveFlags "$1") + shift 1 + ;; + --create_nichenet_gene_program_mask) + [ -n "$VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK" ] && ViashError Bad arguments for option \'--create_nichenet_gene_program_mask\': \'$VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --create_nichenet_gene_program_mask. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --create_nichenet_gene_program_mask=*) + [ -n "$VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK" ] && ViashError Bad arguments for option \'--create_nichenet_gene_program_mask=*\': \'$VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK=$(ViashRemoveFlags "$1") + shift 1 + ;; + --create_mebocost_gene_program_mask) + [ -n "$VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK" ] && ViashError Bad arguments for option \'--create_mebocost_gene_program_mask\': \'$VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --create_mebocost_gene_program_mask. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --create_mebocost_gene_program_mask=*) + [ -n "$VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK" ] && ViashError Bad arguments for option \'--create_mebocost_gene_program_mask=*\': \'$VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK=$(ViashRemoveFlags "$1") + shift 1 + ;; + --create_collectri_tf_gene_program_mask) + [ -n "$VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK" ] && ViashError Bad arguments for option \'--create_collectri_tf_gene_program_mask\': \'$VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --create_collectri_tf_gene_program_mask. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --create_collectri_tf_gene_program_mask=*) + [ -n "$VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK" ] && ViashError Bad arguments for option \'--create_collectri_tf_gene_program_mask=*\': \'$VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK=$(ViashRemoveFlags "$1") + shift 1 + ;; + --overlap_thresh_target_genes) + [ -n "$VIASH_PAR_OVERLAP_THRESH_TARGET_GENES" ] && ViashError Bad arguments for option \'--overlap_thresh_target_genes\': \'$VIASH_PAR_OVERLAP_THRESH_TARGET_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OVERLAP_THRESH_TARGET_GENES="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --overlap_thresh_target_genes. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --overlap_thresh_target_genes=*) + [ -n "$VIASH_PAR_OVERLAP_THRESH_TARGET_GENES" ] && ViashError Bad arguments for option \'--overlap_thresh_target_genes=*\': \'$VIASH_PAR_OVERLAP_THRESH_TARGET_GENES\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OVERLAP_THRESH_TARGET_GENES=$(ViashRemoveFlags "$1") + shift 1 + ;; + --omnipath_min_curation_effort) + [ -n "$VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT" ] && ViashError Bad arguments for option \'--omnipath_min_curation_effort\': \'$VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --omnipath_min_curation_effort. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --omnipath_min_curation_effort=*) + [ -n "$VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT" ] && ViashError Bad arguments for option \'--omnipath_min_curation_effort=*\': \'$VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --nichenet_version) + [ -n "$VIASH_PAR_NICHENET_VERSION" ] && ViashError Bad arguments for option \'--nichenet_version\': \'$VIASH_PAR_NICHENET_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NICHENET_VERSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --nichenet_version. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --nichenet_version=*) + [ -n "$VIASH_PAR_NICHENET_VERSION" ] && ViashError Bad arguments for option \'--nichenet_version=*\': \'$VIASH_PAR_NICHENET_VERSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NICHENET_VERSION=$(ViashRemoveFlags "$1") + shift 1 + ;; + --nichenet_keep_target_genes_ratio) + [ -n "$VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO" ] && ViashError Bad arguments for option \'--nichenet_keep_target_genes_ratio\': \'$VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --nichenet_keep_target_genes_ratio. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --nichenet_keep_target_genes_ratio=*) + [ -n "$VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO" ] && ViashError Bad arguments for option \'--nichenet_keep_target_genes_ratio=*\': \'$VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO=$(ViashRemoveFlags "$1") + shift 1 + ;; + --nichenet_max_n_target_genes_per_gp) + [ -n "$VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP" ] && ViashError Bad arguments for option \'--nichenet_max_n_target_genes_per_gp\': \'$VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --nichenet_max_n_target_genes_per_gp. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --nichenet_max_n_target_genes_per_gp=*) + [ -n "$VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP" ] && ViashError Bad arguments for option \'--nichenet_max_n_target_genes_per_gp=*\': \'$VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_omnipath_lr_network) + [ -n "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK" ] && ViashError Bad arguments for option \'--output_omnipath_lr_network\': \'$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_omnipath_lr_network. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_omnipath_lr_network=*) + [ -n "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK" ] && ViashError Bad arguments for option \'--output_omnipath_lr_network=*\': \'$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_nichenet_lr_network) + [ -n "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK" ] && ViashError Bad arguments for option \'--output_nichenet_lr_network\': \'$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_nichenet_lr_network. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_nichenet_lr_network=*) + [ -n "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK" ] && ViashError Bad arguments for option \'--output_nichenet_lr_network=*\': \'$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_nichenet_ligand_target_matrix) + [ -n "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX" ] && ViashError Bad arguments for option \'--output_nichenet_ligand_target_matrix\': \'$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_nichenet_ligand_target_matrix. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_nichenet_ligand_target_matrix=*) + [ -n "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX" ] && ViashError Bad arguments for option \'--output_nichenet_ligand_target_matrix=*\': \'$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_collectri_tf_network) + [ -n "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK" ] && ViashError Bad arguments for option \'--output_collectri_tf_network\': \'$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_collectri_tf_network. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_collectri_tf_network=*) + [ -n "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK" ] && ViashError Bad arguments for option \'--output_collectri_tf_network=*\': \'$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_omnipath_gp_gene_count_distributions) + [ -n "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS" ] && ViashError Bad arguments for option \'--output_omnipath_gp_gene_count_distributions\': \'$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_omnipath_gp_gene_count_distributions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_omnipath_gp_gene_count_distributions=*) + [ -n "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS" ] && ViashError Bad arguments for option \'--output_omnipath_gp_gene_count_distributions=*\': \'$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_nichenet_gp_gene_count_distributions) + [ -n "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS" ] && ViashError Bad arguments for option \'--output_nichenet_gp_gene_count_distributions\': \'$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_nichenet_gp_gene_count_distributions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_nichenet_gp_gene_count_distributions=*) + [ -n "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS" ] && ViashError Bad arguments for option \'--output_nichenet_gp_gene_count_distributions=*\': \'$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_mebocost_gp_gene_count_distributions) + [ -n "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS" ] && ViashError Bad arguments for option \'--output_mebocost_gp_gene_count_distributions\': \'$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_mebocost_gp_gene_count_distributions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_mebocost_gp_gene_count_distributions=*) + [ -n "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS" ] && ViashError Bad arguments for option \'--output_mebocost_gp_gene_count_distributions=*\': \'$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output_collectri_tf_gp_gene_count_distributions) + [ -n "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS" ] && ViashError Bad arguments for option \'--output_collectri_tf_gp_gene_count_distributions\': \'$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_collectri_tf_gp_gene_count_distributions. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_collectri_tf_gp_gene_count_distributions=*) + [ -n "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS" ] && ViashError Bad arguments for option \'--output_collectri_tf_gp_gene_count_distributions=*\': \'$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/openpipeline_spatial/nichecompass/gene_program_mask:niche-compass' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_SPECIES+x} ]; then + VIASH_PAR_SPECIES="human" +fi +if [ -z ${VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK+x} ]; then + VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK="true" +fi +if [ -z ${VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK+x} ]; then + VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK="true" +fi +if [ -z ${VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK+x} ]; then + VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK="true" +fi +if [ -z ${VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK+x} ]; then + VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK="true" +fi +if [ -z ${VIASH_PAR_OVERLAP_THRESH_TARGET_GENES+x} ]; then + VIASH_PAR_OVERLAP_THRESH_TARGET_GENES="1.0" +fi +if [ -z ${VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT+x} ]; then + VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT="2" +fi +if [ -z ${VIASH_PAR_NICHENET_VERSION+x} ]; then + VIASH_PAR_NICHENET_VERSION="v2" +fi +if [ -z ${VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO+x} ]; then + VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO="1.0" +fi +if [ -z ${VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP+x} ]; then + VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP="250" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE" ] && [ ! -e "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_INPUT_METABOLITE_ENZYMES" ] && [ ! -e "$VIASH_PAR_INPUT_METABOLITE_ENZYMES" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_METABOLITE_ENZYMES' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_INPUT_METABOLITE_SENSORS" ] && [ ! -e "$VIASH_PAR_INPUT_METABOLITE_SENSORS" ]; then + ViashError "Input file '$VIASH_PAR_INPUT_METABOLITE_SENSORS' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK" ]]; then + if ! [[ "$VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--create_omnipath_gene_program_mask' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK" ]]; then + if ! [[ "$VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--create_nichenet_gene_program_mask' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK" ]]; then + if ! [[ "$VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--create_mebocost_gene_program_mask' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK" ]]; then + if ! [[ "$VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then + ViashError '--create_collectri_tf_gene_program_mask' has to be a boolean. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_OVERLAP_THRESH_TARGET_GENES" ]]; then + if ! [[ "$VIASH_PAR_OVERLAP_THRESH_TARGET_GENES" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--overlap_thresh_target_genes' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_OVERLAP_THRESH_TARGET_GENES '>=' 0.0 | bc` -eq 1 ]]; then + ViashError '--overlap_thresh_target_genes' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_OVERLAP_THRESH_TARGET_GENES -v n2=0.0 'BEGIN { print (n1 >= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--overlap_thresh_target_genes' has be more than or equal to 0.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--overlap_thresh_target_genes' specifies a minimum value but the value was not verified as neither \'bc\' or \`awk\` are present on the system. + fi + if command -v bc &> /dev/null; then + if ! [[ `echo $VIASH_PAR_OVERLAP_THRESH_TARGET_GENES '<=' 1.0 | bc` -eq 1 ]]; then + ViashError '--overlap_thresh_target_genes' has to be less than or equal to 1.0. Use "--help" to get more information on the parameters. + exit 1 + fi + elif command -v awk &> /dev/null; then + if ! [[ `awk -v n1=$VIASH_PAR_OVERLAP_THRESH_TARGET_GENES -v n2=1.0 'BEGIN { print (n1 <= n2) ? "1" : "0" }'` -eq 1 ]]; then + ViashError '--overlap_thresh_target_genes' has be less than or equal to 1.0. Use "--help" to get more information on the parameters. + exit 1 + fi + else + ViashWarning '--overlap_thresh_target_genes' specifies a maximum value but the value was not verified as neither \'bc\' or \'awk\' are present on the system. + fi +fi +if [[ -n "$VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT" ]]; then + if ! [[ "$VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--omnipath_min_curation_effort' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO" ]]; then + if ! [[ "$VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO" =~ ^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$ ]]; then + ViashError '--nichenet_keep_target_genes_ratio' has to be a double. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP" ]]; then + if ! [[ "$VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--nichenet_max_n_target_genes_per_gp' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# check whether value is belongs to a set of choices +if [ ! -z "$VIASH_PAR_SPECIES" ]; then + VIASH_PAR_SPECIES_CHOICES=("human;mouse") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_SPECIES_CHOICES[*]};" =~ ";$VIASH_PAR_SPECIES;" ]]; then + ViashError '--species' specified value of \'$VIASH_PAR_SPECIES\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +if [ ! -z "$VIASH_PAR_NICHENET_VERSION" ]; then + VIASH_PAR_NICHENET_VERSION_CHOICES=("v1;v2") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_NICHENET_VERSION_CHOICES[*]};" =~ ";$VIASH_PAR_NICHENET_VERSION;" ]]; then + ViashError '--nichenet_version' specified value of \'$VIASH_PAR_NICHENET_VERSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS")" +fi +if [ ! -z "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE")" ) + VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE") +fi +if [ ! -z "$VIASH_PAR_INPUT_METABOLITE_ENZYMES" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_METABOLITE_ENZYMES")" ) + VIASH_PAR_INPUT_METABOLITE_ENZYMES=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT_METABOLITE_ENZYMES") +fi +if [ ! -z "$VIASH_PAR_INPUT_METABOLITE_SENSORS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT_METABOLITE_SENSORS")" ) + VIASH_PAR_INPUT_METABOLITE_SENSORS=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT_METABOLITE_SENSORS") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK")" ) + VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK")" ) + VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX")" ) + VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK")" ) + VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS")" ) + VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS")" ) + VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS")" ) + VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS" ) +fi +if [ ! -z "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS")" ) + VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS" ) +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-gene_program_mask-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +import os +import sys +import shutil +import json + +from nichecompass.utils import ( + extract_gp_dict_from_mebocost_ms_interactions, + extract_gp_dict_from_nichenet_lrt_interactions, + extract_gp_dict_from_omnipath_lr_interactions, + filter_and_combine_gp_dict_gps_v2, + extract_gp_dict_from_collectri_tf_network, +) + + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_gene_orthologs_mapping_file': $( if [ ! -z ${VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE+x} ]; then echo "r'${VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_metabolite_enzymes': $( if [ ! -z ${VIASH_PAR_INPUT_METABOLITE_ENZYMES+x} ]; then echo "r'${VIASH_PAR_INPUT_METABOLITE_ENZYMES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'input_metabolite_sensors': $( if [ ! -z ${VIASH_PAR_INPUT_METABOLITE_SENSORS+x} ]; then echo "r'${VIASH_PAR_INPUT_METABOLITE_SENSORS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'species': $( if [ ! -z ${VIASH_PAR_SPECIES+x} ]; then echo "r'${VIASH_PAR_SPECIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'create_omnipath_gene_program_mask': $( if [ ! -z ${VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK+x} ]; then echo "r'${VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'create_nichenet_gene_program_mask': $( if [ ! -z ${VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK+x} ]; then echo "r'${VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'create_mebocost_gene_program_mask': $( if [ ! -z ${VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK+x} ]; then echo "r'${VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'create_collectri_tf_gene_program_mask': $( if [ ! -z ${VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK+x} ]; then echo "r'${VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), + 'overlap_thresh_target_genes': $( if [ ! -z ${VIASH_PAR_OVERLAP_THRESH_TARGET_GENES+x} ]; then echo "float(r'${VIASH_PAR_OVERLAP_THRESH_TARGET_GENES//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'omnipath_min_curation_effort': $( if [ ! -z ${VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT+x} ]; then echo "int(r'${VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'nichenet_version': $( if [ ! -z ${VIASH_PAR_NICHENET_VERSION+x} ]; then echo "r'${VIASH_PAR_NICHENET_VERSION//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'nichenet_keep_target_genes_ratio': $( if [ ! -z ${VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO+x} ]; then echo "float(r'${VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'nichenet_max_n_target_genes_per_gp': $( if [ ! -z ${VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_omnipath_lr_network': $( if [ ! -z ${VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_nichenet_lr_network': $( if [ ! -z ${VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_nichenet_ligand_target_matrix': $( if [ ! -z ${VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_collectri_tf_network': $( if [ ! -z ${VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_omnipath_gp_gene_count_distributions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_nichenet_gp_gene_count_distributions': $( if [ ! -z ${VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_mebocost_gp_gene_count_distributions': $( if [ ! -z ${VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_collectri_tf_gp_gene_count_distributions': $( if [ ! -z ${VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS//\'/\'\"\'\"r\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger + +logger = setup_logger() + +# Validate that inputs are provided correctly +if not any( + [ + par["create_omnipath_gene_program_mask"], + par["create_nichenet_gene_program_mask"], + par["create_mebocost_gene_program_mask"], + par["create_collectri_tf_gene_program_mask"], + ] +): + raise ValueError("At least one gene program mask must be set to True") +if ( + par["create_omnipath_gene_program_mask"] + and par["species"] == "mouse" + and not par["input_gene_orthologs_mapping_file"] +): + raise ValueError( + "For mouse species, a --input_gene_orthologs_mapping_file file must be provided for generating the omnipath mask." + ) +if ( + par["create_nichenet_gene_program_mask"] + and par["species"] == "mouse" + and not par["input_gene_orthologs_mapping_file"] +): + raise ValueError( + "For mouse species, a --input_gene_orthologs_mapping_file file must be provided for generating the nichenet mask." + ) +if par["create_mebocost_gene_program_mask"] and ( + not par["input_metabolite_enzymes"] or not par["input_metabolite_sensors"] +): + raise ValueError( + "For mebocost gene program mask, both --input_metabolite_enzymes and --input_metabolite_sensors files must be provided." + ) + +# Assemble gene program dictionaries +gp_dicts = [] + +if par["create_omnipath_gene_program_mask"]: + logger.info("Generating Omnipath gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_omnipath_gp_gene_count_distributions"] else False + ) + save_to_disk = True if par["output_omnipath_lr_network"] else False + + omnipath_gp_dict = extract_gp_dict_from_omnipath_lr_interactions( + species=par["species"], + min_curation_effort=par["omnipath_min_curation_effort"], + load_from_disk=False, + save_to_disk=True, + lr_network_file_path=par["output_omnipath_lr_network"], + gene_orthologs_mapping_file_path=par["input_gene_orthologs_mapping_file"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_omnipath_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(omnipath_gp_dict) + +if par["create_nichenet_gene_program_mask"]: + logger.info("Generating NicheNet gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_nichenet_gp_gene_count_distributions"] else False + ) + save_to_disk = ( + True + if ( + par["output_nichenet_lr_network"] + or par["output_nichenet_ligand_target_matrix"] + ) + else False + ) + + nichenet_gp_dict = extract_gp_dict_from_nichenet_lrt_interactions( + species=par["species"], + version=par["nichenet_version"], + keep_target_genes_ratio=par["nichenet_keep_target_genes_ratio"], + max_n_target_genes_per_gp=par["nichenet_max_n_target_genes_per_gp"], + load_from_disk=False, + save_to_disk=save_to_disk, + lr_network_file_path=par["output_nichenet_lr_network"], + ligand_target_matrix_file_path=par["output_nichenet_ligand_target_matrix"], + gene_orthologs_mapping_file_path=par["input_gene_orthologs_mapping_file"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_nichenet_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(nichenet_gp_dict) + +if par["create_mebocost_gene_program_mask"]: + logger.info("Generating MeBocost gene program mask...") + + shutil.copy2( + par["input_metabolite_enzymes"], + os.path.join(meta["temp_dir"], f"{par['species']}_metabolite_enzymes.tsv"), + ) + shutil.copy2( + par["input_metabolite_sensors"], + os.path.join(meta["temp_dir"], f"{par['species']}_metabolite_sensors.tsv"), + ) + plot_gp_gene_count_distributions = ( + True if par["output_mebocost_gp_gene_count_distributions"] else False + ) + + mebocost_gp_dict = extract_gp_dict_from_mebocost_ms_interactions( + dir_path=meta["temp_dir"], + species=par["species"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_mebocost_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(mebocost_gp_dict) + +if par["create_collectri_tf_gene_program_mask"]: + logger.info("Generating CollecTRI TF gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_collectri_tf_gp_gene_count_distributions"] else False + ) + save_to_disk = True if par["output_collectri_tf_network"] else False + + collectri_gp_dict = extract_gp_dict_from_collectri_tf_network( + species=par["species"], + save_to_disk=save_to_disk, + tf_network_file_path=par["output_collectri_tf_network"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_collectri_tf_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(collectri_gp_dict) + +# Filter and combine GPs +assert len(gp_dicts) > 0, "No gene program dictionaries were created." + +combined_gp_dict = filter_and_combine_gp_dict_gps_v2( + gp_dicts, + overlap_thresh_target_genes=par["overlap_thresh_target_genes"], + verbose=True, +) + +logger.info("Gene program mask generation completed.") +logger.info( + f"Number of gene programs after filtering and combining: {len(combined_gp_dict)}." +) + +logger.info(f"Saving combined gene program mask to: {par['output']}") +with open(par["output"], "w") as f: + json.dump(combined_gp_dict, f) +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE" ]; then + VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE") + fi + if [ ! -z "$VIASH_PAR_INPUT_METABOLITE_ENZYMES" ]; then + VIASH_PAR_INPUT_METABOLITE_ENZYMES=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_METABOLITE_ENZYMES") + fi + if [ ! -z "$VIASH_PAR_INPUT_METABOLITE_SENSORS" ]; then + VIASH_PAR_INPUT_METABOLITE_SENSORS=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT_METABOLITE_SENSORS") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK" ]; then + VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK" ]; then + VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX" ]; then + VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK" ]; then + VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS") + fi + if [ ! -z "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK" ] && [ ! -e "$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK" ] && [ ! -e "$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX" ] && [ ! -e "$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK" ] && [ ! -e "$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS" ] && [ ! -e "$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS" ] && [ ! -e "$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS" ] && [ ! -e "$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS" ] && [ ! -e "$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS' does not exist." + exit 1 +fi + + +exit 0 diff --git a/target/executable/nichecompass/gene_program_mask/nextflow_labels.config b/target/executable/nichecompass/gene_program_mask/nextflow_labels.config new file mode 100644 index 0000000..541aaad --- /dev/null +++ b/target/executable/nichecompass/gene_program_mask/nextflow_labels.config @@ -0,0 +1,68 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // CPU resources + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 10 } + withLabel: highcpu { cpus = 20 } + + // Memory resources + withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } } + + // Disk space + // Nextflow apparently can't handle empty directives, i.e. + // withLabel: lowdisk {} + // so for that reason we have to add a dummy directive + withLabel: lowdisk { + dummyDirective = "dummyValue" + } + withLabel: middisk { + dummyDirective = "dummyValue" + } + withLabel: highdisk { + dummyDirective = "dummyValue" + } + withLabel: veryhighdisk { + dummyDirective = "dummyValue" + } + // NOTE: The above labels intentionally do not have an effect by default. + // The user should set the disk space requirements by adding the following + // to the compute environment: + // + // withLabel: lowdisk { disk = { 20.GB * task.attempt } } + // withLabel: middisk { disk = { 100.GB * task.attempt } } + // withLabel: highdisk { disk = { 200.GB * task.attempt } } + // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} diff --git a/target/executable/nichecompass/gene_program_mask/setup_logger.py b/target/executable/nichecompass/gene_program_mask/setup_logger.py new file mode 100644 index 0000000..3ca1cdb --- /dev/null +++ b/target/executable/nichecompass/gene_program_mask/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger diff --git a/target/executable/nichecompass/nichecompass/.config.vsh.yaml b/target/executable/nichecompass/nichecompass/.config.vsh.yaml index 6e20151..7562850 100644 --- a/target/executable/nichecompass/nichecompass/.config.vsh.yaml +++ b/target/executable/nichecompass/nichecompass/.config.vsh.yaml @@ -83,45 +83,6 @@ argument_groups: direction: "input" multiple: true multiple_sep: ";" -- name: "Spatial Neighbors Calculation" - arguments: - - type: "string" - name: "--coord_type" - description: "Type of coordinate system. Valid options are:\n`grid` - grid coordinates.\n\ - `generic` - generic coordinates.\nIf not provided, `grid` is used if `--input_obsm_spatial_coords`\ - \ is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is\ - \ used.\n" - info: null - required: false - choices: - - "generic" - - "grid" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--n_spatial_neighbors" - description: "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n\ - `generic` - number of neighborhoods for non-grid data. Only used when `--delaunay\ - \ False`.\n" - info: null - default: - - 6 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--delaunay" - description: "Whether to use Delaunay triangulation to determine spatial neighborhood\ - \ graph.\nOnly used when `--coord_type generic`.\n" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Gene Program Mask" arguments: - type: "integer" @@ -807,12 +768,12 @@ argument_groups: multiple: false multiple_sep: ";" - type: "string" - name: "--output_uns_gene_index" + name: "--output_uns_genes_index" description: "Key of the uns field where the index of a concatenated vector of\ \ target and source genes that are in the gene program masks will be stored.\n" info: null default: - - "nichecompass_gene_idx" + - "nichecompass_genes_idx" required: false direction: "input" multiple: false @@ -870,6 +831,20 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" + - type: "string" + name: "--output_compression" + description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ + By default no compression is applied.\n" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ";" resources: - type: "python_script" path: "script.py" @@ -1028,7 +1003,7 @@ build_info: output: "target/executable/nichecompass/nichecompass" executable: "target/executable/nichecompass/nichecompass/nichecompass" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/executable/nichecompass/nichecompass/nichecompass b/target/executable/nichecompass/nichecompass/nichecompass index c677f5b..8bdb46b 100755 --- a/target/executable/nichecompass/nichecompass/nichecompass +++ b/target/executable/nichecompass/nichecompass/nichecompass @@ -464,9 +464,9 @@ RUN pip install --upgrade pip && \ LABEL org.opencontainers.image.authors="Dorien Roosen" LABEL org.opencontainers.image.description="Companion container for running component nichecompass nichecompass" -LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z" +LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z" LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial" -LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" +LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e" LABEL org.opencontainers.image.version="niche-compass" VIASHDOCKER @@ -626,32 +626,6 @@ function ViashHelp { echo " type: string, multiple values allowed" echo " Keys of the adata.obs fields to use as covariates." echo "" - echo "Spatial Neighbors Calculation:" - echo " --coord_type" - echo " type: string" - echo " choices: [ generic, grid ]" - echo " Type of coordinate system. Valid options are:" - echo " \`grid\` - grid coordinates." - echo " \`generic\` - generic coordinates." - echo " If not provided, \`grid\` is used if \`--input_obsm_spatial_coords\` is in" - echo " --input .uns with \`--n_neighs\` = 6 (Visium), otherwise \`generic\` is" - echo " used." - echo "" - echo " --n_spatial_neighbors" - echo " type: integer" - echo " default: 6" - echo " Depending on \`--coord_type\`:" - echo " \`grid\` - number of neighboring tiles." - echo " \`generic\` - number of neighborhoods for non-grid data. Only used when" - echo " \`--delaunay False\`." - echo "" - echo " --delaunay" - echo " type: boolean" - echo " default: false" - echo " Whether to use Delaunay triangulation to determine spatial neighborhood" - echo " graph." - echo " Only used when \`--coord_type generic\`." - echo "" echo "Gene Program Mask:" echo " --min_genes_per_gp" echo " type: integer" @@ -1063,9 +1037,9 @@ function ViashHelp { echo " default: nichecompass_active_gp_names" echo " Key of the uns field where the active gene program names will be stored." echo "" - echo " --output_uns_gene_index" + echo " --output_uns_genes_index" echo " type: string" - echo " default: nichecompass_gene_idx" + echo " default: nichecompass_genes_idx" echo " Key of the uns field where the index of a concatenated vector of target" echo " and source genes that are in the gene program masks will be stored." echo "" @@ -1098,6 +1072,13 @@ function ViashHelp { echo " Key of the obsp field where the aggregation weights of the node label" echo " aggregator will be stored." echo "" + echo " --output_compression" + echo " type: string" + echo " example: gzip" + echo " choices: [ gzip, lzf ]" + echo " Compression format to use for the output AnnData and/or Mudata objects." + echo " By default no compression is applied." + echo "" echo "Viash built in Computational Requirements:" echo " ---cpus=INT" echo " Number of CPUs to use" @@ -1220,39 +1201,6 @@ while [[ $# -gt 0 ]]; do fi shift 1 ;; - --coord_type) - [ -n "$VIASH_PAR_COORD_TYPE" ] && ViashError Bad arguments for option \'--coord_type\': \'$VIASH_PAR_COORD_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COORD_TYPE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --coord_type. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --coord_type=*) - [ -n "$VIASH_PAR_COORD_TYPE" ] && ViashError Bad arguments for option \'--coord_type=*\': \'$VIASH_PAR_COORD_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_COORD_TYPE=$(ViashRemoveFlags "$1") - shift 1 - ;; - --n_spatial_neighbors) - [ -n "$VIASH_PAR_N_SPATIAL_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_spatial_neighbors\': \'$VIASH_PAR_N_SPATIAL_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_SPATIAL_NEIGHBORS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --n_spatial_neighbors. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --n_spatial_neighbors=*) - [ -n "$VIASH_PAR_N_SPATIAL_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_spatial_neighbors=*\': \'$VIASH_PAR_N_SPATIAL_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_N_SPATIAL_NEIGHBORS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --delaunay) - [ -n "$VIASH_PAR_DELAUNAY" ] && ViashError Bad arguments for option \'--delaunay\': \'$VIASH_PAR_DELAUNAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DELAUNAY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --delaunay. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --delaunay=*) - [ -n "$VIASH_PAR_DELAUNAY" ] && ViashError Bad arguments for option \'--delaunay=*\': \'$VIASH_PAR_DELAUNAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_DELAUNAY=$(ViashRemoveFlags "$1") - shift 1 - ;; --min_genes_per_gp) [ -n "$VIASH_PAR_MIN_GENES_PER_GP" ] && ViashError Bad arguments for option \'--min_genes_per_gp\': \'$VIASH_PAR_MIN_GENES_PER_GP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 VIASH_PAR_MIN_GENES_PER_GP="$2" @@ -1893,15 +1841,15 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES=$(ViashRemoveFlags "$1") shift 1 ;; - --output_uns_gene_index) - [ -n "$VIASH_PAR_OUTPUT_UNS_GENE_INDEX" ] && ViashError Bad arguments for option \'--output_uns_gene_index\': \'$VIASH_PAR_OUTPUT_UNS_GENE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_GENE_INDEX="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_gene_index. Use "--help" to get more information on the parameters. && exit 1 + --output_uns_genes_index) + [ -n "$VIASH_PAR_OUTPUT_UNS_GENES_INDEX" ] && ViashError Bad arguments for option \'--output_uns_genes_index\': \'$VIASH_PAR_OUTPUT_UNS_GENES_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_GENES_INDEX="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_genes_index. Use "--help" to get more information on the parameters. && exit 1 shift 2 ;; - --output_uns_gene_index=*) - [ -n "$VIASH_PAR_OUTPUT_UNS_GENE_INDEX" ] && ViashError Bad arguments for option \'--output_uns_gene_index=*\': \'$VIASH_PAR_OUTPUT_UNS_GENE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT_UNS_GENE_INDEX=$(ViashRemoveFlags "$1") + --output_uns_genes_index=*) + [ -n "$VIASH_PAR_OUTPUT_UNS_GENES_INDEX" ] && ViashError Bad arguments for option \'--output_uns_genes_index=*\': \'$VIASH_PAR_OUTPUT_UNS_GENES_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_UNS_GENES_INDEX=$(ViashRemoveFlags "$1") shift 1 ;; --output_uns_target_genes_index) @@ -1965,6 +1913,17 @@ while [[ $# -gt 0 ]]; do VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS=$(ViashRemoveFlags "$1") shift 1 ;; + --output_compression) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output_compression=*) + [ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1") + shift 1 + ;; ---engine) VIASH_ENGINE_ID="$2" shift 2 @@ -2185,12 +2144,6 @@ fi if [ -z ${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES+x} ]; then VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES="spatial_connectivities" fi -if [ -z ${VIASH_PAR_N_SPATIAL_NEIGHBORS+x} ]; then - VIASH_PAR_N_SPATIAL_NEIGHBORS="6" -fi -if [ -z ${VIASH_PAR_DELAUNAY+x} ]; then - VIASH_PAR_DELAUNAY="false" -fi if [ -z ${VIASH_PAR_MIN_GENES_PER_GP+x} ]; then VIASH_PAR_MIN_GENES_PER_GP="1" fi @@ -2326,8 +2279,8 @@ fi if [ -z ${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES+x} ]; then VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES="nichecompass_active_gp_names" fi -if [ -z ${VIASH_PAR_OUTPUT_UNS_GENE_INDEX+x} ]; then - VIASH_PAR_OUTPUT_UNS_GENE_INDEX="nichecompass_gene_idx" +if [ -z ${VIASH_PAR_OUTPUT_UNS_GENES_INDEX+x} ]; then + VIASH_PAR_OUTPUT_UNS_GENES_INDEX="nichecompass_genes_idx" fi if [ -z ${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX+x} ]; then VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX="nichecompass_target_genes_idx" @@ -2353,18 +2306,6 @@ if [ ! -z "$VIASH_PAR_INPUT_GP_MASK" ] && [ ! -e "$VIASH_PAR_INPUT_GP_MASK" ]; t fi # check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_N_SPATIAL_NEIGHBORS" ]]; then - if ! [[ "$VIASH_PAR_N_SPATIAL_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--n_spatial_neighbors' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_DELAUNAY" ]]; then - if ! [[ "$VIASH_PAR_DELAUNAY" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then - ViashError '--delaunay' has to be a boolean. Use "--help" to get more information on the parameters. - exit 1 - fi -fi if [[ -n "$VIASH_PAR_MIN_GENES_PER_GP" ]]; then if ! [[ "$VIASH_PAR_MIN_GENES_PER_GP" =~ ^[-+]?[0-9]+$ ]]; then ViashError '--min_genes_per_gp' has to be an integer. Use "--help" to get more information on the parameters. @@ -2989,18 +2930,6 @@ if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then fi # check whether value is belongs to a set of choices -if [ ! -z "$VIASH_PAR_COORD_TYPE" ]; then - VIASH_PAR_COORD_TYPE_CHOICES=("generic;grid") - IFS=';' - set -f - if ! [[ ";${VIASH_PAR_COORD_TYPE_CHOICES[*]};" =~ ";$VIASH_PAR_COORD_TYPE;" ]]; then - ViashError '--coord_type' specified value of \'$VIASH_PAR_COORD_TYPE\' is not in the list of allowed values. Use "--help" to get more information on the parameters. - exit 1 - fi - set +f - unset IFS -fi - if [ ! -z "$VIASH_PAR_COVARIATE_EMBEDDING_INJECTION_LAYERS" ]; then VIASH_PAR_COVARIATE_EMBEDDING_INJECTION_LAYERS_CHOICES=("encoder;gene_expr_decoder;chrom_access_decoder") IFS=';' @@ -3063,6 +2992,18 @@ if [ ! -z "$VIASH_PAR_CONV_LAYER_ENCODER" ]; then unset IFS fi +if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then + VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip;lzf") + IFS=';' + set -f + if ! [[ ";${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]};" =~ ";$VIASH_PAR_OUTPUT_COMPRESSION;" ]]; then + ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters. + exit 1 + fi + set +f + unset IFS +fi + # create parent directories of output files, if so desired if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" @@ -3185,9 +3126,6 @@ par = { 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ), 'input_obsm_spatial_connectivities': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES//\'/\'\"\'\"r\'}'"; else echo None; fi ), 'input_obs_covariates': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_COVARIATES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_COVARIATES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), - 'coord_type': $( if [ ! -z ${VIASH_PAR_COORD_TYPE+x} ]; then echo "r'${VIASH_PAR_COORD_TYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'n_spatial_neighbors': $( if [ ! -z ${VIASH_PAR_N_SPATIAL_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_SPATIAL_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'delaunay': $( if [ ! -z ${VIASH_PAR_DELAUNAY+x} ]; then echo "r'${VIASH_PAR_DELAUNAY//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ), 'min_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES_PER_GP//\'/\'\"\'\"r\'}')"; else echo None; fi ), 'min_source_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_SOURCE_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_SOURCE_GENES_PER_GP//\'/\'\"\'\"r\'}')"; else echo None; fi ), 'min_target_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_TARGET_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_TARGET_GENES_PER_GP//\'/\'\"\'\"r\'}')"; else echo None; fi ), @@ -3244,12 +3182,13 @@ par = { 'output_varm_gp_sources_mask': $( if [ ! -z ${VIASH_PAR_OUTPUT_VARM_GP_SOURCES_MASK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_VARM_GP_SOURCES_MASK//\'/\'\"\'\"r\'}'"; else echo None; fi ), 'output_uns_gp_names': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GP_NAMES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GP_NAMES//\'/\'\"\'\"r\'}'"; else echo None; fi ), 'output_uns_active_gp_names': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_uns_gene_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GENE_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GENE_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_uns_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GENES_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ), 'output_uns_target_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ), 'output_uns_source_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_SOURCE_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_SOURCE_GENES_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ), 'output_uns_covariate_embeddings': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_COVARIATE_EMBEDDINGS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_COVARIATE_EMBEDDINGS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ), 'output_obsp_reconstructed_adj_edge_proba': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_RECONSTRUCTED_ADJ_EDGE_PROBA+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_RECONSTRUCTED_ADJ_EDGE_PROBA//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output_obsp_agg_weights': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS//\'/\'\"\'\"r\'}'"; else echo None; fi ) + 'output_obsp_agg_weights': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi ) } meta = { 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), @@ -3288,21 +3227,19 @@ logger.info("GPU enabled? %s", use_gpu) ## Read in data adata = mu.read_h5ad(par["input"], mod=par["modality"]) -# ## Compute spatial neighbor graph -# logger.info("Computing spatial neighbor graph...") -# # Compute connectivities and distances -# sq.gr.spatial_neighbors( -# adata, -# coord_type=par["coord_type"], -# spatial_key=par["input_obsm_spatial_coords"], -# n_neighs=par["n_spatial_neighbors"], -# delaunay=par["delaunay"], -# ) - -# # Making the connectivity matrix symmetric -# adata.obsp["spatial_connectivities"] = adata.obsp["spatial_connectivities"].maximum( -# adata.obsp["spatial_connectivities"].T -# ) +# Counts need to be float32 to be processed by nichecompass model +# See https://discuss.pytorch.org/t/runtimeerror-mat1-and-mat2-must-have-the-same-dtype/166759 +counts_dtype = ( + adata.layers[par["layer"]].dtype if par["layer"] is not None else adata.X.dtype +) +if counts_dtype != "float32": + logger.info( + f"Converting count data to float32 from {counts_dtype} for model compatibility..." + ) + if par["layer"] is not None: + adata.layers[par["layer"]] = adata.layers[par["layer"]].astype("float32") + else: + adata.X = adata.X.astype("float32") ## Add GP mask to data logger.info("Adding prior knowledge gene program mask to data...") @@ -3315,7 +3252,7 @@ add_gps_from_gp_dict_to_adata( gp_targets_mask_key=par["output_varm_gp_targets_mask"], gp_sources_mask_key=par["output_varm_gp_sources_mask"], gp_names_key=par["output_uns_gp_names"], - genes_idx_key=par["output_uns_gene_index"], + genes_idx_key=par["output_uns_genes_index"], target_genes_idx_key=par["output_uns_target_genes_index"], source_genes_idx_key=par["output_uns_source_genes_index"], min_genes_per_gp=par["min_genes_per_gp"], @@ -3338,12 +3275,12 @@ model = NicheCompass( gp_sources_mask_key=par["output_varm_gp_sources_mask"], latent_key=par["output_obsm_embedding"], cat_covariates_keys=par["input_obs_covariates"], - cat_covariates_no_edges=par["covariates_edges"], + cat_covariates_no_edges=par["covariate_edges"], cat_covariates_embeds_keys=par["output_uns_covariate_embeddings"], - cat_covariates_embeds_injection_layers=par["covariate_embedding_injection_layers"], - gene_idx_key=par["output_uns_gene_index"], - target_gene_idx_key=par["output_uns_target_genes_index"], - source_gene_idx_key=par["output_uns_source_genes_index"], + cat_covariates_embeds_injection=par["covariate_embedding_injection_layers"], + genes_idx_key=par["output_uns_genes_index"], + target_genes_idx_key=par["output_uns_target_genes_index"], + source_genes_idx_key=par["output_uns_source_genes_index"], recon_adj_key=par["output_obsp_reconstructed_adj_edge_proba"], agg_weights_key=par["output_obsp_agg_weights"], include_edge_recon_loss=par["include_edge_recon_loss"], @@ -3364,7 +3301,6 @@ model = NicheCompass( encoder_use_bn=par["encoder_use_bn"], dropout_rate_encoder=par["dropout_rate_encoder"], dropout_rate_graph_decoder=par["dropout_rate_graph_decoder"], - cat_covariates_cats=par["cat_covariates_cats"], n_addon_gp=par["n_addon_gp"], cat_covariates_embeds_nums=par["cat_covariates_embeds_nums"], seed=par["random_state"], @@ -3402,7 +3338,7 @@ model.train( ## Save model and data logger.info("Saving NicheCompass model and data...") mdata = mu.MuData({par["modality"]: adata}) -mdata.write_h5mu(par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) model.save(par["output_model"], save_adata=False) VIASHMAIN diff --git a/target/nextflow/convert/from_cells2stats_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_cells2stats_to_h5mu/.config.vsh.yaml index 9f0d1aa..556435d 100644 --- a/target/nextflow/convert/from_cells2stats_to_h5mu/.config.vsh.yaml +++ b/target/nextflow/convert/from_cells2stats_to_h5mu/.config.vsh.yaml @@ -301,7 +301,7 @@ build_info: output: "target/nextflow/convert/from_cells2stats_to_h5mu" executable: "target/nextflow/convert/from_cells2stats_to_h5mu/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_cells2stats_to_h5mu/main.nf b/target/nextflow/convert/from_cells2stats_to_h5mu/main.nf index e979919..d1259da 100644 --- a/target/nextflow/convert/from_cells2stats_to_h5mu/main.nf +++ b/target/nextflow/convert/from_cells2stats_to_h5mu/main.nf @@ -3399,7 +3399,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_cells2stats_to_h5mu", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_cosmx_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_cosmx_to_h5mu/.config.vsh.yaml index b2dd1b3..8549c38 100644 --- a/target/nextflow/convert/from_cosmx_to_h5mu/.config.vsh.yaml +++ b/target/nextflow/convert/from_cosmx_to_h5mu/.config.vsh.yaml @@ -238,7 +238,7 @@ build_info: output: "target/nextflow/convert/from_cosmx_to_h5mu" executable: "target/nextflow/convert/from_cosmx_to_h5mu/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_cosmx_to_h5mu/main.nf b/target/nextflow/convert/from_cosmx_to_h5mu/main.nf index 864dc9b..0772fb9 100644 --- a/target/nextflow/convert/from_cosmx_to_h5mu/main.nf +++ b/target/nextflow/convert/from_cosmx_to_h5mu/main.nf @@ -3350,7 +3350,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_cosmx_to_h5mu", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml b/target/nextflow/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml index 292187c..f1c56a6 100644 --- a/target/nextflow/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml +++ b/target/nextflow/convert/from_cosmx_to_spatialexperiment/.config.vsh.yaml @@ -234,7 +234,7 @@ build_info: output: "target/nextflow/convert/from_cosmx_to_spatialexperiment" executable: "target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf b/target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf index 1cd041f..6d3d93f 100644 --- a/target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf +++ b/target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf @@ -3326,7 +3326,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_cosmx_to_spatialexperiment", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml b/target/nextflow/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml index e58f1cd..2350628 100644 --- a/target/nextflow/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml +++ b/target/nextflow/convert/from_h5mu_to_spatialexperiment/.config.vsh.yaml @@ -224,7 +224,7 @@ build_info: output: "target/nextflow/convert/from_h5mu_to_spatialexperiment" executable: "target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf b/target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf index be53f9b..12a0047 100644 --- a/target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf +++ b/target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf @@ -3332,7 +3332,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_h5mu_to_spatialexperiment", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_spatialdata_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_spatialdata_to_h5mu/.config.vsh.yaml index 157b722..f2f7ed2 100644 --- a/target/nextflow/convert/from_spatialdata_to_h5mu/.config.vsh.yaml +++ b/target/nextflow/convert/from_spatialdata_to_h5mu/.config.vsh.yaml @@ -221,7 +221,7 @@ build_info: output: "target/nextflow/convert/from_spatialdata_to_h5mu" executable: "target/nextflow/convert/from_spatialdata_to_h5mu/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_spatialdata_to_h5mu/main.nf b/target/nextflow/convert/from_spatialdata_to_h5mu/main.nf index b1b4880..5158603 100644 --- a/target/nextflow/convert/from_spatialdata_to_h5mu/main.nf +++ b/target/nextflow/convert/from_spatialdata_to_h5mu/main.nf @@ -3331,7 +3331,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_spatialdata_to_h5mu", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_xenium_to_h5mu/.config.vsh.yaml b/target/nextflow/convert/from_xenium_to_h5mu/.config.vsh.yaml index ede593b..a817c81 100644 --- a/target/nextflow/convert/from_xenium_to_h5mu/.config.vsh.yaml +++ b/target/nextflow/convert/from_xenium_to_h5mu/.config.vsh.yaml @@ -244,7 +244,7 @@ build_info: output: "target/nextflow/convert/from_xenium_to_h5mu" executable: "target/nextflow/convert/from_xenium_to_h5mu/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_xenium_to_h5mu/main.nf b/target/nextflow/convert/from_xenium_to_h5mu/main.nf index 2279c1f..6939853 100644 --- a/target/nextflow/convert/from_xenium_to_h5mu/main.nf +++ b/target/nextflow/convert/from_xenium_to_h5mu/main.nf @@ -3348,7 +3348,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_h5mu", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_xenium_to_spatialdata/.config.vsh.yaml b/target/nextflow/convert/from_xenium_to_spatialdata/.config.vsh.yaml index 1a77ac0..0442746 100644 --- a/target/nextflow/convert/from_xenium_to_spatialdata/.config.vsh.yaml +++ b/target/nextflow/convert/from_xenium_to_spatialdata/.config.vsh.yaml @@ -326,7 +326,7 @@ build_info: output: "target/nextflow/convert/from_xenium_to_spatialdata" executable: "target/nextflow/convert/from_xenium_to_spatialdata/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_xenium_to_spatialdata/main.nf b/target/nextflow/convert/from_xenium_to_spatialdata/main.nf index 314f098..c11ba2e 100644 --- a/target/nextflow/convert/from_xenium_to_spatialdata/main.nf +++ b/target/nextflow/convert/from_xenium_to_spatialdata/main.nf @@ -3443,7 +3443,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_spatialdata", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml b/target/nextflow/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml index 1857483..43706f7 100644 --- a/target/nextflow/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml +++ b/target/nextflow/convert/from_xenium_to_spatialexperiment/.config.vsh.yaml @@ -224,7 +224,7 @@ build_info: output: "target/nextflow/convert/from_xenium_to_spatialexperiment" executable: "target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf b/target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf index a2a0a31..b437d39 100644 --- a/target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf +++ b/target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf @@ -3315,7 +3315,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_spatialexperiment", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/dataflow/obsp_block_concatenation/.config.vsh.yaml b/target/nextflow/dataflow/obsp_block_concatenation/.config.vsh.yaml index f555882..9cd35ef 100644 --- a/target/nextflow/dataflow/obsp_block_concatenation/.config.vsh.yaml +++ b/target/nextflow/dataflow/obsp_block_concatenation/.config.vsh.yaml @@ -296,7 +296,7 @@ build_info: output: "target/nextflow/dataflow/obsp_block_concatenation" executable: "target/nextflow/dataflow/obsp_block_concatenation/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/dataflow/obsp_block_concatenation/main.nf b/target/nextflow/dataflow/obsp_block_concatenation/main.nf index c2e78c9..66973fd 100644 --- a/target/nextflow/dataflow/obsp_block_concatenation/main.nf +++ b/target/nextflow/dataflow/obsp_block_concatenation/main.nf @@ -3400,7 +3400,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/dataflow/obsp_block_concatenation", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/mapping/spaceranger_count/.config.vsh.yaml b/target/nextflow/mapping/spaceranger_count/.config.vsh.yaml index dde8507..fe726c1 100644 --- a/target/nextflow/mapping/spaceranger_count/.config.vsh.yaml +++ b/target/nextflow/mapping/spaceranger_count/.config.vsh.yaml @@ -426,7 +426,7 @@ build_info: output: "target/nextflow/mapping/spaceranger_count" executable: "target/nextflow/mapping/spaceranger_count/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/mapping/spaceranger_count/main.nf b/target/nextflow/mapping/spaceranger_count/main.nf index f97066b..32d5684 100644 --- a/target/nextflow/mapping/spaceranger_count/main.nf +++ b/target/nextflow/mapping/spaceranger_count/main.nf @@ -3548,7 +3548,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/mapping/spaceranger_count", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/spatial_neighborhood_graph/neighbors/.config.vsh.yaml b/target/nextflow/neighbors/spatial_neighborhood_graph/.config.vsh.yaml similarity index 96% rename from target/nextflow/spatial_neighborhood_graph/neighbors/.config.vsh.yaml rename to target/nextflow/neighbors/spatial_neighborhood_graph/.config.vsh.yaml index ef627c8..2ae1816 100644 --- a/target/nextflow/spatial_neighborhood_graph/neighbors/.config.vsh.yaml +++ b/target/nextflow/neighbors/spatial_neighborhood_graph/.config.vsh.yaml @@ -1,5 +1,5 @@ -name: "neighbors" -namespace: "spatial_neighborhood_graph" +name: "spatial_neighborhood_graph" +namespace: "neighbors" version: "niche-compass" authors: - name: "Dorien Roosen" @@ -269,10 +269,10 @@ build_info: config: "src/neighbors/spatial_neighborhood_graph/config.vsh.yaml" runner: "nextflow" engine: "docker|native" - output: "target/nextflow/spatial_neighborhood_graph/neighbors" - executable: "target/nextflow/spatial_neighborhood_graph/neighbors/main.nf" + output: "target/nextflow/neighbors/spatial_neighborhood_graph" + executable: "target/nextflow/neighbors/spatial_neighborhood_graph/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/spatial_neighborhood_graph/neighbors/main.nf b/target/nextflow/neighbors/spatial_neighborhood_graph/main.nf similarity index 99% rename from target/nextflow/spatial_neighborhood_graph/neighbors/main.nf rename to target/nextflow/neighbors/spatial_neighborhood_graph/main.nf index 014c4b9..3cccf0c 100644 --- a/target/nextflow/spatial_neighborhood_graph/neighbors/main.nf +++ b/target/nextflow/neighbors/spatial_neighborhood_graph/main.nf @@ -1,4 +1,4 @@ -// neighbors niche-compass +// spatial_neighborhood_graph niche-compass // // This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative // work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data @@ -3033,8 +3033,8 @@ nextflow.enable.dsl=2 meta = [ "resources_dir": moduleDir.toRealPath().normalize(), "config": processConfig(readJsonBlob('''{ - "name" : "neighbors", - "namespace" : "spatial_neighborhood_graph", + "name" : "spatial_neighborhood_graph", + "namespace" : "neighbors", "version" : "niche-compass", "authors" : [ { @@ -3380,9 +3380,9 @@ meta = [ "config" : "/workdir/root/repo/src/neighbors/spatial_neighborhood_graph/config.vsh.yaml", "runner" : "nextflow", "engine" : "docker|native", - "output" : "/workdir/root/repo/target/nextflow/spatial_neighborhood_graph/neighbors", + "output" : "/workdir/root/repo/target/nextflow/neighbors/spatial_neighborhood_graph", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { @@ -3884,7 +3884,7 @@ meta["defaults"] = [ directives: readJsonBlob('''{ "container" : { "registry" : "images.viash-hub.com", - "image" : "vsh/openpipeline_spatial/spatial_neighborhood_graph/neighbors", + "image" : "vsh/openpipeline_spatial/neighbors/spatial_neighborhood_graph", "tag" : "niche-compass" }, "label" : [ diff --git a/target/nextflow/spatial_neighborhood_graph/neighbors/nextflow.config b/target/nextflow/neighbors/spatial_neighborhood_graph/nextflow.config similarity index 98% rename from target/nextflow/spatial_neighborhood_graph/neighbors/nextflow.config rename to target/nextflow/neighbors/spatial_neighborhood_graph/nextflow.config index 9026248..37ae911 100644 --- a/target/nextflow/spatial_neighborhood_graph/neighbors/nextflow.config +++ b/target/nextflow/neighbors/spatial_neighborhood_graph/nextflow.config @@ -1,5 +1,5 @@ manifest { - name = 'spatial_neighborhood_graph/neighbors' + name = 'neighbors/spatial_neighborhood_graph' mainScript = 'main.nf' nextflowVersion = '!>=20.12.1-edge' version = 'niche-compass' diff --git a/target/nextflow/neighbors/spatial_neighborhood_graph/nextflow_labels.config b/target/nextflow/neighbors/spatial_neighborhood_graph/nextflow_labels.config new file mode 100644 index 0000000..541aaad --- /dev/null +++ b/target/nextflow/neighbors/spatial_neighborhood_graph/nextflow_labels.config @@ -0,0 +1,68 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // CPU resources + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 10 } + withLabel: highcpu { cpus = 20 } + + // Memory resources + withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } } + + // Disk space + // Nextflow apparently can't handle empty directives, i.e. + // withLabel: lowdisk {} + // so for that reason we have to add a dummy directive + withLabel: lowdisk { + dummyDirective = "dummyValue" + } + withLabel: middisk { + dummyDirective = "dummyValue" + } + withLabel: highdisk { + dummyDirective = "dummyValue" + } + withLabel: veryhighdisk { + dummyDirective = "dummyValue" + } + // NOTE: The above labels intentionally do not have an effect by default. + // The user should set the disk space requirements by adding the following + // to the compute environment: + // + // withLabel: lowdisk { disk = { 20.GB * task.attempt } } + // withLabel: middisk { disk = { 100.GB * task.attempt } } + // withLabel: highdisk { disk = { 200.GB * task.attempt } } + // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} diff --git a/target/nextflow/spatial_neighborhood_graph/neighbors/nextflow_schema.json b/target/nextflow/neighbors/spatial_neighborhood_graph/nextflow_schema.json similarity index 98% rename from target/nextflow/spatial_neighborhood_graph/neighbors/nextflow_schema.json rename to target/nextflow/neighbors/spatial_neighborhood_graph/nextflow_schema.json index 6221159..c722633 100644 --- a/target/nextflow/spatial_neighborhood_graph/neighbors/nextflow_schema.json +++ b/target/nextflow/neighbors/spatial_neighborhood_graph/nextflow_schema.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "neighbors", + "title": "spatial_neighborhood_graph", "description": "Calculates a spatial neighborhood graph.", "type": "object", "$defs": { diff --git a/target/nextflow/neighbors/spatial_neighborhood_graph/setup_logger.py b/target/nextflow/neighbors/spatial_neighborhood_graph/setup_logger.py new file mode 100644 index 0000000..3ca1cdb --- /dev/null +++ b/target/nextflow/neighbors/spatial_neighborhood_graph/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger diff --git a/target/nextflow/nichecompass/gene_program_mask/.config.vsh.yaml b/target/nextflow/nichecompass/gene_program_mask/.config.vsh.yaml new file mode 100644 index 0000000..f84e92b --- /dev/null +++ b/target/nextflow/nichecompass/gene_program_mask/.config.vsh.yaml @@ -0,0 +1,469 @@ +name: "gene_program_mask" +namespace: "nichecompass" +version: "niche-compass" +authors: +- name: "Dorien Roosen" + roles: + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +argument_groups: +- name: "Inputs" + arguments: + - type: "file" + name: "--input_gene_orthologs_mapping_file" + description: "Path to a CSV file mapping human genes to mouse orthologs.\nRequired\ + \ for the OmniPath and NicheNet masks if `--species mouse`.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_metabolite_enzymes" + description: "Path to the MeBocost metabolite-enzymes TSV file.\nRequired for\ + \ generating the MeBocost gene program mask.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_metabolite_sensors" + description: "Path to the MeBocost metabolite-sensors TSV file.\nRequired for\ + \ generating the MeBocost gene program mask.\n" + info: null + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Parameters" + arguments: + - type: "string" + name: "--species" + description: "Species of the organism (human or mouse)." + info: null + default: + - "human" + required: false + choices: + - "human" + - "mouse" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--create_omnipath_gene_program_mask" + description: "Whether to create the OmniPath gene program mask." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--create_nichenet_gene_program_mask" + description: "Whether to create the NicheNet gene program mask." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--create_mebocost_gene_program_mask" + description: "Whether to create the MeBocost gene program mask." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--create_collectri_tf_gene_program_mask" + description: "Whether to create the CollecTRI TF gene program mask." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--overlap_thresh_target_genes" + description: "The minimum ratio of target genes that need to overlap between a\ + \ GP without source genes and another GP for the GP to be dropped.\nGene programs\ + \ with different source genes are never combined or dropped.\n" + info: null + default: + - 1.0 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Omnipath Parameters" + arguments: + - type: "integer" + name: "--omnipath_min_curation_effort" + description: "Minimum number of times an interaction has to be described in a\ + \ paper and mentioned in a database to be included in the OmniPath gene programs." + info: null + default: + - 2 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "NicheNet Parameters" + arguments: + - type: "string" + name: "--nichenet_version" + description: "Version of the NicheNet ligand receptor network and ligand target\ + \ gene regulatory potential matrix.\n´v2´ is an improved version of ´v1´, and\ + \ has separate files for mouse and human.\n" + info: null + default: + - "v2" + required: false + choices: + - "v1" + - "v2" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--nichenet_keep_target_genes_ratio" + description: "Ratio of target genes that are kept compared to total target genes.\n\ + This ratio is applied over the entire matrix (not on gene program level), and\ + \ determines the ´all_gps_score_keep_threshold´, which will be used to filter\ + \ target genes according to their regulatory potential scores.\n" + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--nichenet_max_n_target_genes_per_gp" + description: "Maximum number of target genes per gene program. If a gene program\ + \ has more target genes than ´max_n_target_genes_per_gp´, only the ´max_n_target_genes_per_gp´\ + \ gene programs with the highest regulatory potential scores will be kept.\n\ + Default value is chosen based on MultiNicheNet specification (s. Browaeys, R.\ + \ et al. MultiNicheNet: a flexible framework for differential cell-cell communication\ + \ analysis from multi-sample multi-condition single-cell transcriptomics data.\ + \ bioRxiv (2023) doi:10.1101/2023.06.13.544751).\n" + info: null + default: + - 250 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Path to the output gene program mask JSON file." + info: null + example: + - "gp_mask.json" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_omnipath_lr_network" + description: "Path to the output OmniPath ligand-receptor network CSV file." + info: null + example: + - "omnipath_lr_network.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_nichenet_lr_network" + description: "Path to the output NicheNet ligand-receptor network CSV file." + info: null + example: + - "nichenet_lr_network.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_nichenet_ligand_target_matrix" + description: "Path to the output NicheNet ligand-target gene regulatory potential\ + \ matrix file." + info: null + example: + - "nichenet_ligand_target_matrix.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_collectri_tf_network" + description: "Path to the output CollecTRI TF-target gene regulatory potential\ + \ network CSV file." + info: null + example: + - "collectri_tf_network.csv" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_omnipath_gp_gene_count_distributions" + description: "Path to save the OmniPath gene program gene count distributions\ + \ plot." + info: null + example: + - "omnipath_gp_gene_count_distributions.svg" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_nichenet_gp_gene_count_distributions" + description: "Path to save the NicheNet gene program gene count distributions\ + \ plot." + info: null + example: + - "nichenet_gp_gene_count_distributions.svg" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_mebocost_gp_gene_count_distributions" + description: "Path to save the MeBocost gene program gene count distributions\ + \ plot." + info: null + example: + - "mebocost_gp_gene_count_distributions.svg" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_collectri_tf_gp_gene_count_distributions" + description: "Path to save the CollecTRI TF gene program gene count distributions\ + \ plot." + info: null + example: + - "collectri_tf_gp_gene_count_distributions.svg" + must_exist: true + create_parent: true + required: false + direction: "output" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +- type: "file" + path: "setup_logger.py" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "Generation of a prior knowledge gene program mask for NicheCompass." +test_resources: +- type: "python_script" + path: "test.py" + is_executable: true +- type: "file" + path: "niche" +info: null +status: "enabled" +scope: + image: "public" + target: "public" +repositories: +- type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" +links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "lowcpu" + - "lowmem" + - "lowdisk" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04" + target_registry: "images.viash-hub.com" + target_tag: "niche-compass" + namespace_separator: "/" + setup: + - type: "apt" + packages: + - "libhdf5-dev" + - "python3-pip" + - "python3-dev" + - "python-is-python3" + interactive: false + - type: "docker" + run: + - "pip install torch --index-url https://download.pytorch.org/whl/cu124 \\\n&&\ + \ pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html\ + \ \n" + - type: "python" + user: false + packages: + - "numpy<2" + - "nichecompass" + upgrade: true + test_setup: + - type: "python" + user: false + packages: + - "viashpy==0.9.0" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/nichecompass/gene_program_mask/config.vsh.yaml" + runner: "nextflow" + engine: "docker|native" + output: "target/nextflow/nichecompass/gene_program_mask" + executable: "target/nextflow/nichecompass/gene_program_mask/main.nf" + viash_version: "0.9.4" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" + git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" +package_config: + name: "openpipeline_spatial" + version: "niche-compass" + info: + test_resources: + - type: "s3" + path: "s3://openpipelines-bio/openpipeline_spatial/resources_test" + dest: "resources_test" + repositories: + - type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'niche-compass'" + organization: "vsh" + links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" diff --git a/target/nextflow/nichecompass/gene_program_mask/main.nf b/target/nextflow/nichecompass/gene_program_mask/main.nf new file mode 100644 index 0000000..87d8e37 --- /dev/null +++ b/target/nextflow/nichecompass/gene_program_mask/main.nf @@ -0,0 +1,4343 @@ +// gene_program_mask niche-compass +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dorien Roosen (maintainer) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "gene_program_mask", + "namespace" : "nichecompass", + "version" : "niche-compass", + "authors" : [ + { + "name" : "Dorien Roosen", + "roles" : [ + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "file", + "name" : "--input_gene_orthologs_mapping_file", + "description" : "Path to a CSV file mapping human genes to mouse orthologs.\nRequired for the OmniPath and NicheNet masks if `--species mouse`.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_metabolite_enzymes", + "description" : "Path to the MeBocost metabolite-enzymes TSV file.\nRequired for generating the MeBocost gene program mask.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_metabolite_sensors", + "description" : "Path to the MeBocost metabolite-sensors TSV file.\nRequired for generating the MeBocost gene program mask.\n", + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--species", + "description" : "Species of the organism (human or mouse).", + "default" : [ + "human" + ], + "required" : false, + "choices" : [ + "human", + "mouse" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--create_omnipath_gene_program_mask", + "description" : "Whether to create the OmniPath gene program mask.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--create_nichenet_gene_program_mask", + "description" : "Whether to create the NicheNet gene program mask.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--create_mebocost_gene_program_mask", + "description" : "Whether to create the MeBocost gene program mask.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--create_collectri_tf_gene_program_mask", + "description" : "Whether to create the CollecTRI TF gene program mask.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--overlap_thresh_target_genes", + "description" : "The minimum ratio of target genes that need to overlap between a GP without source genes and another GP for the GP to be dropped.\nGene programs with different source genes are never combined or dropped.\n", + "default" : [ + 1.0 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Omnipath Parameters", + "arguments" : [ + { + "type" : "integer", + "name" : "--omnipath_min_curation_effort", + "description" : "Minimum number of times an interaction has to be described in a paper and mentioned in a database to be included in the OmniPath gene programs.", + "default" : [ + 2 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "NicheNet Parameters", + "arguments" : [ + { + "type" : "string", + "name" : "--nichenet_version", + "description" : "Version of the NicheNet ligand receptor network and ligand target gene regulatory potential matrix.\n´v2´ is an improved version of ´v1´, and has separate files for mouse and human.\n", + "default" : [ + "v2" + ], + "required" : false, + "choices" : [ + "v1", + "v2" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--nichenet_keep_target_genes_ratio", + "description" : "Ratio of target genes that are kept compared to total target genes.\nThis ratio is applied over the entire matrix (not on gene program level), and determines the ´all_gps_score_keep_threshold´, which will be used to filter target genes according to their regulatory potential scores.\n", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--nichenet_max_n_target_genes_per_gp", + "description" : "Maximum number of target genes per gene program. If a gene program has more target genes than ´max_n_target_genes_per_gp´, only the ´max_n_target_genes_per_gp´ gene programs with the highest regulatory potential scores will be kept.\nDefault value is chosen based on MultiNicheNet specification (s. Browaeys, R. et al. MultiNicheNet: a flexible framework for differential cell-cell communication analysis from multi-sample multi-condition single-cell transcriptomics data. bioRxiv (2023) doi:10.1101/2023.06.13.544751).\n", + "default" : [ + 250 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Path to the output gene program mask JSON file.", + "example" : [ + "gp_mask.json" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_omnipath_lr_network", + "description" : "Path to the output OmniPath ligand-receptor network CSV file.", + "example" : [ + "omnipath_lr_network.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_nichenet_lr_network", + "description" : "Path to the output NicheNet ligand-receptor network CSV file.", + "example" : [ + "nichenet_lr_network.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_nichenet_ligand_target_matrix", + "description" : "Path to the output NicheNet ligand-target gene regulatory potential matrix file.", + "example" : [ + "nichenet_ligand_target_matrix.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_collectri_tf_network", + "description" : "Path to the output CollecTRI TF-target gene regulatory potential network CSV file.", + "example" : [ + "collectri_tf_network.csv" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_omnipath_gp_gene_count_distributions", + "description" : "Path to save the OmniPath gene program gene count distributions plot.", + "example" : [ + "omnipath_gp_gene_count_distributions.svg" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_nichenet_gp_gene_count_distributions", + "description" : "Path to save the NicheNet gene program gene count distributions plot.", + "example" : [ + "nichenet_gp_gene_count_distributions.svg" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_mebocost_gp_gene_count_distributions", + "description" : "Path to save the MeBocost gene program gene count distributions plot.", + "example" : [ + "mebocost_gp_gene_count_distributions.svg" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_collectri_tf_gp_gene_count_distributions", + "description" : "Path to save the CollecTRI TF gene program gene count distributions plot.", + "example" : [ + "collectri_tf_gp_gene_count_distributions.svg" + ], + "must_exist" : true, + "create_parent" : true, + "required" : false, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "python_script", + "path" : "script.py", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/src/utils/setup_logger.py" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "Generation of a prior knowledge gene program mask for NicheCompass.", + "test_resources" : [ + { + "type" : "python_script", + "path" : "test.py", + "is_executable" : true + }, + { + "type" : "file", + "path" : "/resources_test/niche/" + } + ], + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "openpipeline", + "repo" : "openpipeline", + "tag" : "v3.0.0" + } + ], + "links" : { + "repository" : "https://github.com/openpipelines-bio/openpipeline_spatial", + "docker_registry" : "ghcr.io" + }, + "runners" : [ + { + "type" : "executable", + "id" : "executable", + "docker_setup_strategy" : "ifneedbepullelsecachedbuild" + }, + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "label" : [ + "lowcpu", + "lowmem", + "lowdisk" + ], + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "docker", + "id" : "docker", + "image" : "nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04", + "target_registry" : "images.viash-hub.com", + "target_tag" : "niche-compass", + "namespace_separator" : "/", + "setup" : [ + { + "type" : "apt", + "packages" : [ + "libhdf5-dev", + "python3-pip", + "python3-dev", + "python-is-python3" + ], + "interactive" : false + }, + { + "type" : "docker", + "run" : [ + "pip install torch --index-url https://download.pytorch.org/whl/cu124 \\\\\n&& pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html \n" + ] + }, + { + "type" : "python", + "user" : false, + "packages" : [ + "numpy<2", + "nichecompass" + ], + "upgrade" : true + } + ], + "test_setup" : [ + { + "type" : "python", + "user" : false, + "packages" : [ + "viashpy==0.9.0" + ], + "upgrade" : true + } + ] + }, + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/nichecompass/gene_program_mask/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "docker|native", + "output" : "/workdir/root/repo/target/nextflow/nichecompass/gene_program_mask", + "viash_version" : "0.9.4", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" + }, + "package_config" : { + "name" : "openpipeline_spatial", + "version" : "niche-compass", + "info" : { + "test_resources" : [ + { + "type" : "s3", + "path" : "s3://openpipelines-bio/openpipeline_spatial/resources_test", + "dest" : "resources_test" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "openpipeline", + "repo" : "openpipeline", + "tag" : "v3.0.0" + } + ], + "viash_version" : "0.9.4", + "source" : "/workdir/root/repo/src", + "target" : "/workdir/root/repo/target", + "config_mods" : [ + ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'niche-compass'" + ], + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/openpipelines-bio/openpipeline_spatial", + "docker_registry" : "ghcr.io" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) + + +// inner workflow +// inner workflow hook +def innerWorkflowFactory(args) { + def rawScript = '''set -e +tempscript=".viash_script.py" +cat > "$tempscript" << VIASHMAIN +import os +import sys +import shutil +import json + +from nichecompass.utils import ( + extract_gp_dict_from_mebocost_ms_interactions, + extract_gp_dict_from_nichenet_lrt_interactions, + extract_gp_dict_from_omnipath_lr_interactions, + filter_and_combine_gp_dict_gps_v2, + extract_gp_dict_from_collectri_tf_network, +) + + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input_gene_orthologs_mapping_file': $( if [ ! -z ${VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE+x} ]; then echo "r'${VIASH_PAR_INPUT_GENE_ORTHOLOGS_MAPPING_FILE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_metabolite_enzymes': $( if [ ! -z ${VIASH_PAR_INPUT_METABOLITE_ENZYMES+x} ]; then echo "r'${VIASH_PAR_INPUT_METABOLITE_ENZYMES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'input_metabolite_sensors': $( if [ ! -z ${VIASH_PAR_INPUT_METABOLITE_SENSORS+x} ]; then echo "r'${VIASH_PAR_INPUT_METABOLITE_SENSORS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'species': $( if [ ! -z ${VIASH_PAR_SPECIES+x} ]; then echo "r'${VIASH_PAR_SPECIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'create_omnipath_gene_program_mask': $( if [ ! -z ${VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK+x} ]; then echo "r'${VIASH_PAR_CREATE_OMNIPATH_GENE_PROGRAM_MASK//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'create_nichenet_gene_program_mask': $( if [ ! -z ${VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK+x} ]; then echo "r'${VIASH_PAR_CREATE_NICHENET_GENE_PROGRAM_MASK//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'create_mebocost_gene_program_mask': $( if [ ! -z ${VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK+x} ]; then echo "r'${VIASH_PAR_CREATE_MEBOCOST_GENE_PROGRAM_MASK//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'create_collectri_tf_gene_program_mask': $( if [ ! -z ${VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK+x} ]; then echo "r'${VIASH_PAR_CREATE_COLLECTRI_TF_GENE_PROGRAM_MASK//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), + 'overlap_thresh_target_genes': $( if [ ! -z ${VIASH_PAR_OVERLAP_THRESH_TARGET_GENES+x} ]; then echo "float(r'${VIASH_PAR_OVERLAP_THRESH_TARGET_GENES//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'omnipath_min_curation_effort': $( if [ ! -z ${VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT+x} ]; then echo "int(r'${VIASH_PAR_OMNIPATH_MIN_CURATION_EFFORT//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'nichenet_version': $( if [ ! -z ${VIASH_PAR_NICHENET_VERSION+x} ]; then echo "r'${VIASH_PAR_NICHENET_VERSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'nichenet_keep_target_genes_ratio': $( if [ ! -z ${VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO+x} ]; then echo "float(r'${VIASH_PAR_NICHENET_KEEP_TARGET_GENES_RATIO//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'nichenet_max_n_target_genes_per_gp': $( if [ ! -z ${VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_NICHENET_MAX_N_TARGET_GENES_PER_GP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_omnipath_lr_network': $( if [ ! -z ${VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OMNIPATH_LR_NETWORK//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_nichenet_lr_network': $( if [ ! -z ${VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_NICHENET_LR_NETWORK//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_nichenet_ligand_target_matrix': $( if [ ! -z ${VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_NICHENET_LIGAND_TARGET_MATRIX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_collectri_tf_network': $( if [ ! -z ${VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COLLECTRI_TF_NETWORK//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_omnipath_gp_gene_count_distributions': $( if [ ! -z ${VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OMNIPATH_GP_GENE_COUNT_DISTRIBUTIONS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_nichenet_gp_gene_count_distributions': $( if [ ! -z ${VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_NICHENET_GP_GENE_COUNT_DISTRIBUTIONS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_mebocost_gp_gene_count_distributions': $( if [ ! -z ${VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_MEBOCOST_GP_GENE_COUNT_DISTRIBUTIONS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_collectri_tf_gp_gene_count_distributions': $( if [ ! -z ${VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COLLECTRI_TF_GP_GENE_COUNT_DISTRIBUTIONS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END +sys.path.append(meta["resources_dir"]) +from setup_logger import setup_logger + +logger = setup_logger() + +# Validate that inputs are provided correctly +if not any( + [ + par["create_omnipath_gene_program_mask"], + par["create_nichenet_gene_program_mask"], + par["create_mebocost_gene_program_mask"], + par["create_collectri_tf_gene_program_mask"], + ] +): + raise ValueError("At least one gene program mask must be set to True") +if ( + par["create_omnipath_gene_program_mask"] + and par["species"] == "mouse" + and not par["input_gene_orthologs_mapping_file"] +): + raise ValueError( + "For mouse species, a --input_gene_orthologs_mapping_file file must be provided for generating the omnipath mask." + ) +if ( + par["create_nichenet_gene_program_mask"] + and par["species"] == "mouse" + and not par["input_gene_orthologs_mapping_file"] +): + raise ValueError( + "For mouse species, a --input_gene_orthologs_mapping_file file must be provided for generating the nichenet mask." + ) +if par["create_mebocost_gene_program_mask"] and ( + not par["input_metabolite_enzymes"] or not par["input_metabolite_sensors"] +): + raise ValueError( + "For mebocost gene program mask, both --input_metabolite_enzymes and --input_metabolite_sensors files must be provided." + ) + +# Assemble gene program dictionaries +gp_dicts = [] + +if par["create_omnipath_gene_program_mask"]: + logger.info("Generating Omnipath gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_omnipath_gp_gene_count_distributions"] else False + ) + save_to_disk = True if par["output_omnipath_lr_network"] else False + + omnipath_gp_dict = extract_gp_dict_from_omnipath_lr_interactions( + species=par["species"], + min_curation_effort=par["omnipath_min_curation_effort"], + load_from_disk=False, + save_to_disk=True, + lr_network_file_path=par["output_omnipath_lr_network"], + gene_orthologs_mapping_file_path=par["input_gene_orthologs_mapping_file"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_omnipath_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(omnipath_gp_dict) + +if par["create_nichenet_gene_program_mask"]: + logger.info("Generating NicheNet gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_nichenet_gp_gene_count_distributions"] else False + ) + save_to_disk = ( + True + if ( + par["output_nichenet_lr_network"] + or par["output_nichenet_ligand_target_matrix"] + ) + else False + ) + + nichenet_gp_dict = extract_gp_dict_from_nichenet_lrt_interactions( + species=par["species"], + version=par["nichenet_version"], + keep_target_genes_ratio=par["nichenet_keep_target_genes_ratio"], + max_n_target_genes_per_gp=par["nichenet_max_n_target_genes_per_gp"], + load_from_disk=False, + save_to_disk=save_to_disk, + lr_network_file_path=par["output_nichenet_lr_network"], + ligand_target_matrix_file_path=par["output_nichenet_ligand_target_matrix"], + gene_orthologs_mapping_file_path=par["input_gene_orthologs_mapping_file"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_nichenet_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(nichenet_gp_dict) + +if par["create_mebocost_gene_program_mask"]: + logger.info("Generating MeBocost gene program mask...") + + shutil.copy2( + par["input_metabolite_enzymes"], + os.path.join(meta["temp_dir"], f"{par['species']}_metabolite_enzymes.tsv"), + ) + shutil.copy2( + par["input_metabolite_sensors"], + os.path.join(meta["temp_dir"], f"{par['species']}_metabolite_sensors.tsv"), + ) + plot_gp_gene_count_distributions = ( + True if par["output_mebocost_gp_gene_count_distributions"] else False + ) + + mebocost_gp_dict = extract_gp_dict_from_mebocost_ms_interactions( + dir_path=meta["temp_dir"], + species=par["species"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_mebocost_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(mebocost_gp_dict) + +if par["create_collectri_tf_gene_program_mask"]: + logger.info("Generating CollecTRI TF gene program mask...") + + plot_gp_gene_count_distributions = ( + True if par["output_collectri_tf_gp_gene_count_distributions"] else False + ) + save_to_disk = True if par["output_collectri_tf_network"] else False + + collectri_gp_dict = extract_gp_dict_from_collectri_tf_network( + species=par["species"], + save_to_disk=save_to_disk, + tf_network_file_path=par["output_collectri_tf_network"], + plot_gp_gene_count_distributions=plot_gp_gene_count_distributions, + gp_gene_count_distributions_save_path=par[ + "output_collectri_tf_gp_gene_count_distributions" + ], + ) + + gp_dicts.append(collectri_gp_dict) + +# Filter and combine GPs +assert len(gp_dicts) > 0, "No gene program dictionaries were created." + +combined_gp_dict = filter_and_combine_gp_dict_gps_v2( + gp_dicts, + overlap_thresh_target_genes=par["overlap_thresh_target_genes"], + verbose=True, +) + +logger.info("Gene program mask generation completed.") +logger.info( + f"Number of gene programs after filtering and combining: {len(combined_gp_dict)}." +) + +logger.info(f"Saving combined gene program mask to: {par['output']}") +with open(par["output"], "w") as f: + json.dump(combined_gp_dict, f) +VIASHMAIN +python -B "$tempscript" +''' + + return vdsl3WorkflowFactory(args, meta, rawScript) +} + + + +/** + * Generate a workflow for VDSL3 modules. + * + * This function is called by the workflowFactory() function. + * + * Input channel: [id, input_map] + * Output channel: [id, output_map] + * + * Internally, this workflow will convert the input channel + * to a format which the Nextflow module will be able to handle. + */ +def vdsl3WorkflowFactory(Map args, Map meta, String rawScript) { + def key = args["key"] + def processObj = null + + workflow processWf { + take: input_ + main: + + if (processObj == null) { + processObj = _vdsl3ProcessFactory(args, meta, rawScript) + } + + output_ = input_ + | map { tuple -> + def id = tuple[0] + def data_ = tuple[1] + + if (workflow.stubRun) { + // add id if missing + data_ = [id: 'stub'] + data_ + } + + // process input files separately + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { par -> + def val = data_.containsKey(par.plainName) ? data_[par.plainName] : [] + def inputFiles = [] + if (val == null) { + inputFiles = [] + } else if (val instanceof List) { + inputFiles = val + } else if (val instanceof Path) { + inputFiles = [ val ] + } else { + inputFiles = [] + } + if (!workflow.stubRun) { + // throw error when an input file doesn't exist + inputFiles.each{ file -> + assert file.exists() : + "Error in module '${key}' id '${id}' argument '${par.plainName}'.\n" + + " Required input file does not exist.\n" + + " Path: '$file'.\n" + + " Expected input file to exist" + } + } + inputFiles + } + + // remove input files + def argsExclInputFiles = meta.config.allArguments + .findAll { (it.type != "file" || it.direction != "input") && data_.containsKey(it.plainName) } + .collectEntries { par -> + def parName = par.plainName + def val = data_[parName] + if (par.multiple && val instanceof Collection) { + val = val.join(par.multiple_sep) + } + if (par.direction == "output" && par.type == "file") { + val = val + .replaceAll('\\$id', id) + .replaceAll('\\$\\{id\\}', id) + .replaceAll('\\$key', key) + .replaceAll('\\$\\{key\\}', key) + } + [parName, val] + } + + [ id ] + inputPaths + [ argsExclInputFiles, meta.resources_dir ] + } + | processObj + | map { output -> + def outputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .indexed() + .collectEntries{ index, par -> + def out = output[index + 1] + // strip dummy '.exitcode' file from output (see nextflow-io/nextflow#2678) + if (!out instanceof List || out.size() <= 1) { + if (par.multiple) { + out = [] + } else { + assert !par.required : + "Error in module '${key}' id '${output[0]}' argument '${par.plainName}'.\n" + + " Required output file is missing" + out = null + } + } else if (out.size() == 2 && !par.multiple) { + out = out[1] + } else { + out = out.drop(1) + } + [ par.plainName, out ] + } + + // drop null outputs + outputFiles.removeAll{it.value == null} + + [ output[0], outputFiles ] + } + emit: output_ + } + + return processWf +} + +// depends on: session? +def _vdsl3ProcessFactory(Map workflowArgs, Map meta, String rawScript) { + // autodetect process key + def wfKey = workflowArgs["key"] + def procKeyPrefix = "${wfKey}_process" + def scriptMeta = nextflow.script.ScriptMeta.current() + def existing = scriptMeta.getProcessNames().findAll{it.startsWith(procKeyPrefix)} + def numbers = existing.collect{it.replace(procKeyPrefix, "0").toInteger()} + def newNumber = (numbers + [-1]).max() + 1 + + def procKey = newNumber == 0 ? procKeyPrefix : "$procKeyPrefix$newNumber" + + if (newNumber > 0) { + log.warn "Key for module '${wfKey}' is duplicated.\n", + "If you run a component multiple times in the same workflow,\n" + + "it's recommended you set a unique key for every call,\n" + + "for example: ${wfKey}.run(key: \"foo\")." + } + + // subset directives and convert to list of tuples + def drctv = workflowArgs.directives + + // TODO: unit test the two commands below + // convert publish array into tags + def valueToStr = { val -> + // ignore closures + if (val instanceof CharSequence) { + if (!val.matches('^[{].*[}]$')) { + '"' + val + '"' + } else { + val + } + } else if (val instanceof List) { + "[" + val.collect{valueToStr(it)}.join(", ") + "]" + } else if (val instanceof Map) { + "[" + val.collect{k, v -> k + ": " + valueToStr(v)}.join(", ") + "]" + } else { + val.inspect() + } + } + + // multiple entries allowed: label, publishdir + def drctvStrs = drctv.collect { key, value -> + if (key in ["label", "publishDir"]) { + value.collect{ val -> + if (val instanceof Map) { + "\n$key " + val.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else if (val == null) { + "" + } else { + "\n$key " + valueToStr(val) + } + }.join() + } else if (value instanceof Map) { + "\n$key " + value.collect{ k, v -> k + ": " + valueToStr(v) }.join(", ") + } else { + "\n$key " + valueToStr(value) + } + }.join() + + def inputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + .collect { ', path(viash_par_' + it.plainName + ', stageAs: "_viash_par/' + it.plainName + '_?/*")' } + .join() + + def outputPaths = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + // insert dummy into every output (see nextflow-io/nextflow#2678) + if (!par.multiple) { + ', path{[".exitcode", args.' + par.plainName + ']}' + } else { + ', path{[".exitcode"] + args.' + par.plainName + '}' + } + } + .join() + + // TODO: move this functionality somewhere else? + if (workflowArgs.auto.transcript) { + outputPaths = outputPaths + ', path{[".exitcode", ".command*"]}' + } else { + outputPaths = outputPaths + ', path{[".exitcode"]}' + } + + // create dirs for output files (based on BashWrapper.createParentFiles) + def createParentStr = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" && it.create_parent } + .collect { par -> + def contents = "args[\"${par.plainName}\"] instanceof List ? args[\"${par.plainName}\"].join('\" \"') : args[\"${par.plainName}\"]" + "\${ args.containsKey(\"${par.plainName}\") ? \"mkdir_parent '\" + escapeText(${contents}) + \"'\" : \"\" }" + } + .join("\n") + + // construct inputFileExports + def inputFileExports = meta.config.allArguments + .findAll { it.type == "file" && it.direction.toLowerCase() == "input" } + .collect { par -> + def contents = "viash_par_${par.plainName} instanceof List ? viash_par_${par.plainName}.join(\"${par.multiple_sep}\") : viash_par_${par.plainName}" + "\n\${viash_par_${par.plainName}.empty ? \"\" : \"export VIASH_PAR_${par.plainName.toUpperCase()}='\" + escapeText(${contents}) + \"'\"}" + } + + // NOTE: if using docker, use /tmp instead of tmpDir! + def tmpDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('VIASH_TMPDIR') ?: + System.getenv('VIASH_TEMPDIR') ?: + System.getenv('VIASH_TMP') ?: + System.getenv('TEMP') ?: + System.getenv('TMPDIR') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMP') ?: + '/tmp' + ).toAbsolutePath() + + // construct stub + def stub = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "output" } + .collect { par -> + "\${ args.containsKey(\"${par.plainName}\") ? \"touch2 \\\"\" + (args[\"${par.plainName}\"] instanceof String ? args[\"${par.plainName}\"].replace(\"_*\", \"_0\") : args[\"${par.plainName}\"].join('\" \"')) + \"\\\"\" : \"\" }" + } + .join("\n") + + // escape script + def escapedScript = rawScript.replace('\\', '\\\\').replace('$', '\\$').replace('"""', '\\"\\"\\"') + + // publishdir assert + def assertStr = (workflowArgs.auto.publish == true) || workflowArgs.auto.transcript ? + """\nassert task.publishDir.size() > 0: "if auto.publish is true, params.publish_dir needs to be defined.\\n Example: --publish_dir './output/'" """ : + "" + + // generate process string + def procStr = + """nextflow.enable.dsl=2 + | + |def escapeText = { s -> s.toString().replaceAll("'", "'\\\"'\\\"'") } + |process $procKey {$drctvStrs + |input: + | tuple val(id)$inputPaths, val(args), path(resourcesDir, stageAs: ".viash_meta_resources") + |output: + | tuple val("\$id")$outputPaths, optional: true + |stub: + |\"\"\" + |touch2() { mkdir -p "\\\$(dirname "\\\$1")" && touch "\\\$1" ; } + |$stub + |\"\"\" + |script:$assertStr + |def parInject = args + | .findAll{key, value -> value != null} + | .collect{key, value -> "export VIASH_PAR_\${key.toUpperCase()}='\${escapeText(value)}'"} + | .join("\\n") + |\"\"\" + |# meta exports + |export VIASH_META_RESOURCES_DIR="\${resourcesDir}" + |export VIASH_META_TEMP_DIR="${['docker', 'podman', 'charliecloud'].any{ it == workflow.containerEngine } ? '/tmp' : tmpDir}" + |export VIASH_META_NAME="${meta.config.name}" + |# export VIASH_META_EXECUTABLE="\\\$VIASH_META_RESOURCES_DIR/\\\$VIASH_META_NAME" + |export VIASH_META_CONFIG="\\\$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" + |\${task.cpus ? "export VIASH_META_CPUS=\$task.cpus" : "" } + |\${task.memory?.bytes != null ? "export VIASH_META_MEMORY_B=\$task.memory.bytes" : "" } + |if [ ! -z \\\${VIASH_META_MEMORY_B+x} ]; then + | export VIASH_META_MEMORY_KB=\\\$(( (\\\$VIASH_META_MEMORY_B+999) / 1000 )) + | export VIASH_META_MEMORY_MB=\\\$(( (\\\$VIASH_META_MEMORY_KB+999) / 1000 )) + | export VIASH_META_MEMORY_GB=\\\$(( (\\\$VIASH_META_MEMORY_MB+999) / 1000 )) + | export VIASH_META_MEMORY_TB=\\\$(( (\\\$VIASH_META_MEMORY_GB+999) / 1000 )) + | export VIASH_META_MEMORY_PB=\\\$(( (\\\$VIASH_META_MEMORY_TB+999) / 1000 )) + | export VIASH_META_MEMORY_KIB=\\\$(( (\\\$VIASH_META_MEMORY_B+1023) / 1024 )) + | export VIASH_META_MEMORY_MIB=\\\$(( (\\\$VIASH_META_MEMORY_KIB+1023) / 1024 )) + | export VIASH_META_MEMORY_GIB=\\\$(( (\\\$VIASH_META_MEMORY_MIB+1023) / 1024 )) + | export VIASH_META_MEMORY_TIB=\\\$(( (\\\$VIASH_META_MEMORY_GIB+1023) / 1024 )) + | export VIASH_META_MEMORY_PIB=\\\$(( (\\\$VIASH_META_MEMORY_TIB+1023) / 1024 )) + |fi + | + |# meta synonyms + |export VIASH_TEMP="\\\$VIASH_META_TEMP_DIR" + |export TEMP_DIR="\\\$VIASH_META_TEMP_DIR" + | + |# create output dirs if need be + |function mkdir_parent { + | for file in "\\\$@"; do + | mkdir -p "\\\$(dirname "\\\$file")" + | done + |} + |$createParentStr + | + |# argument exports${inputFileExports.join()} + |\$parInject + | + |# process script + |${escapedScript} + |\"\"\" + |} + |""".stripMargin() + + // TODO: print on debug + // if (workflowArgs.debug == true) { + // println("######################\n$procStr\n######################") + // } + + // write process to temp file + def tempFile = java.nio.file.Files.createTempFile("viash-process-${procKey}-", ".nf") + addShutdownHook { java.nio.file.Files.deleteIfExists(tempFile) } + tempFile.text = procStr + + // create process from temp file + def binding = new nextflow.script.ScriptBinding([:]) + def session = nextflow.Nextflow.getSession() + def parser = _getScriptLoader(session) + .setModule(true) + .setBinding(binding) + def moduleScript = parser.runScript(tempFile) + .getScript() + + // register module in meta + def module = new nextflow.script.IncludeDef.Module(name: procKey) + scriptMeta.addModule(moduleScript, module.name, module.alias) + + // retrieve and return process from meta + return scriptMeta.getProcess(procKey) +} + +// use Reflection to get a ScriptParser / ScriptLoader +// <25.02.0-edge: new nextflow.script.ScriptParser(session) +// >=25.02.0-edge: nextflow.script.ScriptLoaderFactory.create(session) +def _getScriptLoader(nextflow.Session session) { + // try using the old method + try { + Class scriptParserClass = Class.forName('nextflow.script.ScriptParser') + return scriptParserClass.getDeclaredConstructor(nextflow.Session).newInstance(session) + } catch (ClassNotFoundException e) { + // else try with the new method + try { + Class scriptLoaderFactoryClass = Class.forName('nextflow.script.ScriptLoaderFactory') + def createMethod = scriptLoaderFactoryClass.getDeclaredMethod('create', nextflow.Session) + return createMethod.invoke(null, session) // null because create is static + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | java.lang.reflect.InvocationTargetException e2) { + // Handle the case where neither class is found + throw new Exception("Neither nextflow.script.ScriptParser nor nextflow.script.ScriptLoaderFactory could be found. Is this a compatible Nextflow version?", e2) + } + } +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "container" : { + "registry" : "images.viash-hub.com", + "image" : "vsh/openpipeline_spatial/nichecompass/gene_program_mask", + "tag" : "niche-compass" + }, + "label" : [ + "lowcpu", + "lowmem", + "lowdisk" + ], + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/nichecompass/gene_program_mask/nextflow.config b/target/nextflow/nichecompass/gene_program_mask/nextflow.config new file mode 100644 index 0000000..b7d2b96 --- /dev/null +++ b/target/nextflow/nichecompass/gene_program_mask/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'nichecompass/gene_program_mask' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'niche-compass' + description = 'Generation of a prior knowledge gene program mask for NicheCompass.' + author = 'Dorien Roosen' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/nichecompass/gene_program_mask/nextflow_labels.config b/target/nextflow/nichecompass/gene_program_mask/nextflow_labels.config new file mode 100644 index 0000000..541aaad --- /dev/null +++ b/target/nextflow/nichecompass/gene_program_mask/nextflow_labels.config @@ -0,0 +1,68 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // CPU resources + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 10 } + withLabel: highcpu { cpus = 20 } + + // Memory resources + withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } } + + // Disk space + // Nextflow apparently can't handle empty directives, i.e. + // withLabel: lowdisk {} + // so for that reason we have to add a dummy directive + withLabel: lowdisk { + dummyDirective = "dummyValue" + } + withLabel: middisk { + dummyDirective = "dummyValue" + } + withLabel: highdisk { + dummyDirective = "dummyValue" + } + withLabel: veryhighdisk { + dummyDirective = "dummyValue" + } + // NOTE: The above labels intentionally do not have an effect by default. + // The user should set the disk space requirements by adding the following + // to the compute environment: + // + // withLabel: lowdisk { disk = { 20.GB * task.attempt } } + // withLabel: middisk { disk = { 100.GB * task.attempt } } + // withLabel: highdisk { disk = { 200.GB * task.attempt } } + // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} diff --git a/target/nextflow/nichecompass/gene_program_mask/nextflow_schema.json b/target/nextflow/nichecompass/gene_program_mask/nextflow_schema.json new file mode 100644 index 0000000..597cc7f --- /dev/null +++ b/target/nextflow/nichecompass/gene_program_mask/nextflow_schema.json @@ -0,0 +1,224 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "gene_program_mask", + "description": "Generation of a prior knowledge gene program mask for NicheCompass.", + "type": "object", + "$defs": { + "inputs": { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + "input_gene_orthologs_mapping_file": { + "type": "string", + "format": "path", + "description": "Path to a CSV file mapping human genes to mouse orthologs.\nRequired for the OmniPath and NicheNet masks if `--species mouse`.\n", + "help_text": "Type: `file`, multiple: `False`, direction: `input`. " + }, + "input_metabolite_enzymes": { + "type": "string", + "format": "path", + "description": "Path to the MeBocost metabolite-enzymes TSV file.\nRequired for generating the MeBocost gene program mask.\n", + "help_text": "Type: `file`, multiple: `False`, direction: `input`. " + }, + "input_metabolite_sensors": { + "type": "string", + "format": "path", + "description": "Path to the MeBocost metabolite-sensors TSV file.\nRequired for generating the MeBocost gene program mask.\n", + "help_text": "Type: `file`, multiple: `False`, direction: `input`. " + } + } + }, + "outputs": { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + "output": { + "type": "string", + "format": "path", + "description": "Path to the output gene program mask JSON file.", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output.json\"`, direction: `output`, example: `\"gp_mask.json\"`. ", + "default": "$id.$key.output.json" + }, + "output_omnipath_lr_network": { + "type": "string", + "format": "path", + "description": "Path to the output OmniPath ligand-receptor network CSV file.", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_omnipath_lr_network.csv\"`, direction: `output`, example: `\"omnipath_lr_network.csv\"`. ", + "default": "$id.$key.output_omnipath_lr_network.csv" + }, + "output_nichenet_lr_network": { + "type": "string", + "format": "path", + "description": "Path to the output NicheNet ligand-receptor network CSV file.", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_nichenet_lr_network.csv\"`, direction: `output`, example: `\"nichenet_lr_network.csv\"`. ", + "default": "$id.$key.output_nichenet_lr_network.csv" + }, + "output_nichenet_ligand_target_matrix": { + "type": "string", + "format": "path", + "description": "Path to the output NicheNet ligand-target gene regulatory potential matrix file.", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_nichenet_ligand_target_matrix.csv\"`, direction: `output`, example: `\"nichenet_ligand_target_matrix.csv\"`. ", + "default": "$id.$key.output_nichenet_ligand_target_matrix.csv" + }, + "output_collectri_tf_network": { + "type": "string", + "format": "path", + "description": "Path to the output CollecTRI TF-target gene regulatory potential network CSV file.", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_collectri_tf_network.csv\"`, direction: `output`, example: `\"collectri_tf_network.csv\"`. ", + "default": "$id.$key.output_collectri_tf_network.csv" + }, + "output_omnipath_gp_gene_count_distributions": { + "type": "string", + "format": "path", + "description": "Path to save the OmniPath gene program gene count distributions plot.", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_omnipath_gp_gene_count_distributions.svg\"`, direction: `output`, example: `\"omnipath_gp_gene_count_distributions.svg\"`. ", + "default": "$id.$key.output_omnipath_gp_gene_count_distributions.svg" + }, + "output_nichenet_gp_gene_count_distributions": { + "type": "string", + "format": "path", + "description": "Path to save the NicheNet gene program gene count distributions plot.", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_nichenet_gp_gene_count_distributions.svg\"`, direction: `output`, example: `\"nichenet_gp_gene_count_distributions.svg\"`. ", + "default": "$id.$key.output_nichenet_gp_gene_count_distributions.svg" + }, + "output_mebocost_gp_gene_count_distributions": { + "type": "string", + "format": "path", + "description": "Path to save the MeBocost gene program gene count distributions plot.", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_mebocost_gp_gene_count_distributions.svg\"`, direction: `output`, example: `\"mebocost_gp_gene_count_distributions.svg\"`. ", + "default": "$id.$key.output_mebocost_gp_gene_count_distributions.svg" + }, + "output_collectri_tf_gp_gene_count_distributions": { + "type": "string", + "format": "path", + "description": "Path to save the CollecTRI TF gene program gene count distributions plot.", + "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_collectri_tf_gp_gene_count_distributions.svg\"`, direction: `output`, example: `\"collectri_tf_gp_gene_count_distributions.svg\"`. ", + "default": "$id.$key.output_collectri_tf_gp_gene_count_distributions.svg" + } + } + }, + "parameters": { + "title": "Parameters", + "type": "object", + "description": "No description", + "properties": { + "species": { + "type": "string", + "description": "Species of the organism (human or mouse).", + "help_text": "Type: `string`, multiple: `False`, default: `\"human\"`, choices: ``human`, `mouse``. ", + "enum": [ + "human", + "mouse" + ], + "default": "human" + }, + "create_omnipath_gene_program_mask": { + "type": "boolean", + "description": "Whether to create the OmniPath gene program mask.", + "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ", + "default": true + }, + "create_nichenet_gene_program_mask": { + "type": "boolean", + "description": "Whether to create the NicheNet gene program mask.", + "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ", + "default": true + }, + "create_mebocost_gene_program_mask": { + "type": "boolean", + "description": "Whether to create the MeBocost gene program mask.", + "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ", + "default": true + }, + "create_collectri_tf_gene_program_mask": { + "type": "boolean", + "description": "Whether to create the CollecTRI TF gene program mask.", + "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ", + "default": true + }, + "overlap_thresh_target_genes": { + "type": "number", + "description": "The minimum ratio of target genes that need to overlap between a GP without source genes and another GP for the GP to be dropped.\nGene programs with different source genes are never combined or dropped.\n", + "help_text": "Type: `double`, multiple: `False`, default: `1.0`. ", + "default": 1.0 + } + } + }, + "omnipath parameters": { + "title": "Omnipath Parameters", + "type": "object", + "description": "No description", + "properties": { + "omnipath_min_curation_effort": { + "type": "integer", + "description": "Minimum number of times an interaction has to be described in a paper and mentioned in a database to be included in the OmniPath gene programs.", + "help_text": "Type: `integer`, multiple: `False`, default: `2`. ", + "default": 2 + } + } + }, + "nichenet parameters": { + "title": "NicheNet Parameters", + "type": "object", + "description": "No description", + "properties": { + "nichenet_version": { + "type": "string", + "description": "Version of the NicheNet ligand receptor network and ligand target gene regulatory potential matrix.\n´v2´ is an improved version of ´v1´, and has separate files for mouse and human.\n", + "help_text": "Type: `string`, multiple: `False`, default: `\"v2\"`, choices: ``v1`, `v2``. ", + "enum": [ + "v1", + "v2" + ], + "default": "v2" + }, + "nichenet_keep_target_genes_ratio": { + "type": "number", + "description": "Ratio of target genes that are kept compared to total target genes.\nThis ratio is applied over the entire matrix (not on gene program level), and determines the ´all_gps_score_keep_threshold´, which will be used to filter target genes according to their regulatory potential scores.\n", + "help_text": "Type: `double`, multiple: `False`, default: `1.0`. ", + "default": 1.0 + }, + "nichenet_max_n_target_genes_per_gp": { + "type": "integer", + "description": "Maximum number of target genes per gene program", + "help_text": "Type: `integer`, multiple: `False`, default: `250`. ", + "default": 250 + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/inputs" + }, + { + "$ref": "#/$defs/outputs" + }, + { + "$ref": "#/$defs/parameters" + }, + { + "$ref": "#/$defs/omnipath parameters" + }, + { + "$ref": "#/$defs/nichenet parameters" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/nichecompass/gene_program_mask/setup_logger.py b/target/nextflow/nichecompass/gene_program_mask/setup_logger.py new file mode 100644 index 0000000..3ca1cdb --- /dev/null +++ b/target/nextflow/nichecompass/gene_program_mask/setup_logger.py @@ -0,0 +1,12 @@ +def setup_logger(): + import logging + from sys import stdout + + logger = logging.getLogger() + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler(stdout) + logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s") + console_handler.setFormatter(logFormatter) + logger.addHandler(console_handler) + + return logger diff --git a/target/nextflow/nichecompass/nichecompass/.config.vsh.yaml b/target/nextflow/nichecompass/nichecompass/.config.vsh.yaml index 76d0cd0..8c1ac05 100644 --- a/target/nextflow/nichecompass/nichecompass/.config.vsh.yaml +++ b/target/nextflow/nichecompass/nichecompass/.config.vsh.yaml @@ -83,45 +83,6 @@ argument_groups: direction: "input" multiple: true multiple_sep: ";" -- name: "Spatial Neighbors Calculation" - arguments: - - type: "string" - name: "--coord_type" - description: "Type of coordinate system. Valid options are:\n`grid` - grid coordinates.\n\ - `generic` - generic coordinates.\nIf not provided, `grid` is used if `--input_obsm_spatial_coords`\ - \ is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is\ - \ used.\n" - info: null - required: false - choices: - - "generic" - - "grid" - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--n_spatial_neighbors" - description: "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n\ - `generic` - number of neighborhoods for non-grid data. Only used when `--delaunay\ - \ False`.\n" - info: null - default: - - 6 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "boolean" - name: "--delaunay" - description: "Whether to use Delaunay triangulation to determine spatial neighborhood\ - \ graph.\nOnly used when `--coord_type generic`.\n" - info: null - default: - - false - required: false - direction: "input" - multiple: false - multiple_sep: ";" - name: "Gene Program Mask" arguments: - type: "integer" @@ -807,12 +768,12 @@ argument_groups: multiple: false multiple_sep: ";" - type: "string" - name: "--output_uns_gene_index" + name: "--output_uns_genes_index" description: "Key of the uns field where the index of a concatenated vector of\ \ target and source genes that are in the gene program masks will be stored.\n" info: null default: - - "nichecompass_gene_idx" + - "nichecompass_genes_idx" required: false direction: "input" multiple: false @@ -870,6 +831,20 @@ argument_groups: direction: "input" multiple: false multiple_sep: ";" + - type: "string" + name: "--output_compression" + description: "Compression format to use for the output AnnData and/or Mudata objects.\n\ + By default no compression is applied.\n" + info: null + example: + - "gzip" + required: false + choices: + - "gzip" + - "lzf" + direction: "input" + multiple: false + multiple_sep: ";" resources: - type: "python_script" path: "script.py" @@ -1028,7 +1003,7 @@ build_info: output: "target/nextflow/nichecompass/nichecompass" executable: "target/nextflow/nichecompass/nichecompass/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" package_config: name: "openpipeline_spatial" diff --git a/target/nextflow/nichecompass/nichecompass/main.nf b/target/nextflow/nichecompass/nichecompass/main.nf index 885a621..a86e235 100644 --- a/target/nextflow/nichecompass/nichecompass/main.nf +++ b/target/nextflow/nichecompass/nichecompass/main.nf @@ -3132,48 +3132,6 @@ meta = [ } ] }, - { - "name" : "Spatial Neighbors Calculation", - "arguments" : [ - { - "type" : "string", - "name" : "--coord_type", - "description" : "Type of coordinate system. Valid options are:\n`grid` - grid coordinates.\n`generic` - generic coordinates.\nIf not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used.\n", - "required" : false, - "choices" : [ - "generic", - "grid" - ], - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "integer", - "name" : "--n_spatial_neighbors", - "description" : "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n`generic` - number of neighborhoods for non-grid data. Only used when `--delaunay False`.\n", - "default" : [ - 6 - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - }, - { - "type" : "boolean", - "name" : "--delaunay", - "description" : "Whether to use Delaunay triangulation to determine spatial neighborhood graph.\nOnly used when `--coord_type generic`.\n", - "default" : [ - false - ], - "required" : false, - "direction" : "input", - "multiple" : false, - "multiple_sep" : ";" - } - ] - }, { "name" : "Gene Program Mask", "arguments" : [ @@ -3889,10 +3847,10 @@ meta = [ }, { "type" : "string", - "name" : "--output_uns_gene_index", + "name" : "--output_uns_genes_index", "description" : "Key of the uns field where the index of a concatenated vector of target and source genes that are in the gene program masks will be stored.\n", "default" : [ - "nichecompass_gene_idx" + "nichecompass_genes_idx" ], "required" : false, "direction" : "input", @@ -3955,6 +3913,22 @@ meta = [ "direction" : "input", "multiple" : false, "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--output_compression", + "description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n", + "example" : [ + "gzip" + ], + "required" : false, + "choices" : [ + "gzip", + "lzf" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" } ] } @@ -4164,7 +4138,7 @@ meta = [ "engine" : "docker|native", "output" : "/workdir/root/repo/target/nextflow/nichecompass/nichecompass", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { @@ -4231,9 +4205,6 @@ par = { 'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'input_obsm_spatial_connectivities': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'input_obs_covariates': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_COVARIATES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_COVARIATES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), - 'coord_type': $( if [ ! -z ${VIASH_PAR_COORD_TYPE+x} ]; then echo "r'${VIASH_PAR_COORD_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'n_spatial_neighbors': $( if [ ! -z ${VIASH_PAR_N_SPATIAL_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_SPATIAL_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), - 'delaunay': $( if [ ! -z ${VIASH_PAR_DELAUNAY+x} ]; then echo "r'${VIASH_PAR_DELAUNAY//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ), 'min_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES_PER_GP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), 'min_source_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_SOURCE_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_SOURCE_GENES_PER_GP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), 'min_target_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_TARGET_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_TARGET_GENES_PER_GP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ), @@ -4290,12 +4261,13 @@ par = { 'output_varm_gp_sources_mask': $( if [ ! -z ${VIASH_PAR_OUTPUT_VARM_GP_SOURCES_MASK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_VARM_GP_SOURCES_MASK//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'output_uns_gp_names': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GP_NAMES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GP_NAMES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'output_uns_active_gp_names': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_uns_gene_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GENE_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GENE_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_uns_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GENES_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'output_uns_target_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'output_uns_source_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_SOURCE_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_SOURCE_GENES_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), 'output_uns_covariate_embeddings': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_COVARIATE_EMBEDDINGS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_COVARIATE_EMBEDDINGS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ), 'output_obsp_reconstructed_adj_edge_proba': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_RECONSTRUCTED_ADJ_EDGE_PROBA+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_RECONSTRUCTED_ADJ_EDGE_PROBA//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), - 'output_obsp_agg_weights': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) + 'output_obsp_agg_weights': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), + 'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ) } meta = { 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ), @@ -4334,21 +4306,19 @@ logger.info("GPU enabled? %s", use_gpu) ## Read in data adata = mu.read_h5ad(par["input"], mod=par["modality"]) -# ## Compute spatial neighbor graph -# logger.info("Computing spatial neighbor graph...") -# # Compute connectivities and distances -# sq.gr.spatial_neighbors( -# adata, -# coord_type=par["coord_type"], -# spatial_key=par["input_obsm_spatial_coords"], -# n_neighs=par["n_spatial_neighbors"], -# delaunay=par["delaunay"], -# ) - -# # Making the connectivity matrix symmetric -# adata.obsp["spatial_connectivities"] = adata.obsp["spatial_connectivities"].maximum( -# adata.obsp["spatial_connectivities"].T -# ) +# Counts need to be float32 to be processed by nichecompass model +# See https://discuss.pytorch.org/t/runtimeerror-mat1-and-mat2-must-have-the-same-dtype/166759 +counts_dtype = ( + adata.layers[par["layer"]].dtype if par["layer"] is not None else adata.X.dtype +) +if counts_dtype != "float32": + logger.info( + f"Converting count data to float32 from {counts_dtype} for model compatibility..." + ) + if par["layer"] is not None: + adata.layers[par["layer"]] = adata.layers[par["layer"]].astype("float32") + else: + adata.X = adata.X.astype("float32") ## Add GP mask to data logger.info("Adding prior knowledge gene program mask to data...") @@ -4361,7 +4331,7 @@ add_gps_from_gp_dict_to_adata( gp_targets_mask_key=par["output_varm_gp_targets_mask"], gp_sources_mask_key=par["output_varm_gp_sources_mask"], gp_names_key=par["output_uns_gp_names"], - genes_idx_key=par["output_uns_gene_index"], + genes_idx_key=par["output_uns_genes_index"], target_genes_idx_key=par["output_uns_target_genes_index"], source_genes_idx_key=par["output_uns_source_genes_index"], min_genes_per_gp=par["min_genes_per_gp"], @@ -4384,12 +4354,12 @@ model = NicheCompass( gp_sources_mask_key=par["output_varm_gp_sources_mask"], latent_key=par["output_obsm_embedding"], cat_covariates_keys=par["input_obs_covariates"], - cat_covariates_no_edges=par["covariates_edges"], + cat_covariates_no_edges=par["covariate_edges"], cat_covariates_embeds_keys=par["output_uns_covariate_embeddings"], - cat_covariates_embeds_injection_layers=par["covariate_embedding_injection_layers"], - gene_idx_key=par["output_uns_gene_index"], - target_gene_idx_key=par["output_uns_target_genes_index"], - source_gene_idx_key=par["output_uns_source_genes_index"], + cat_covariates_embeds_injection=par["covariate_embedding_injection_layers"], + genes_idx_key=par["output_uns_genes_index"], + target_genes_idx_key=par["output_uns_target_genes_index"], + source_genes_idx_key=par["output_uns_source_genes_index"], recon_adj_key=par["output_obsp_reconstructed_adj_edge_proba"], agg_weights_key=par["output_obsp_agg_weights"], include_edge_recon_loss=par["include_edge_recon_loss"], @@ -4410,7 +4380,6 @@ model = NicheCompass( encoder_use_bn=par["encoder_use_bn"], dropout_rate_encoder=par["dropout_rate_encoder"], dropout_rate_graph_decoder=par["dropout_rate_graph_decoder"], - cat_covariates_cats=par["cat_covariates_cats"], n_addon_gp=par["n_addon_gp"], cat_covariates_embeds_nums=par["cat_covariates_embeds_nums"], seed=par["random_state"], @@ -4448,7 +4417,7 @@ model.train( ## Save model and data logger.info("Saving NicheCompass model and data...") mdata = mu.MuData({par["modality"]: adata}) -mdata.write_h5mu(par["output"]) +mdata.write_h5mu(par["output"], compression=par["output_compression"]) model.save(par["output_model"], save_adata=False) VIASHMAIN diff --git a/target/nextflow/workflows/multiomics/spatial_process_samples/.config.vsh.yaml b/target/nextflow/workflows/multiomics/spatial_process_samples/.config.vsh.yaml index e87c6af..4ed7883 100644 --- a/target/nextflow/workflows/multiomics/spatial_process_samples/.config.vsh.yaml +++ b/target/nextflow/workflows/multiomics/spatial_process_samples/.config.vsh.yaml @@ -640,7 +640,7 @@ build_info: output: "target/nextflow/workflows/multiomics/spatial_process_samples" executable: "target/nextflow/workflows/multiomics/spatial_process_samples/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" dependencies: - "target/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/workflows/multiomics/process_samples" diff --git a/target/nextflow/workflows/multiomics/spatial_process_samples/main.nf b/target/nextflow/workflows/multiomics/spatial_process_samples/main.nf index acccda5..a466115 100644 --- a/target/nextflow/workflows/multiomics/spatial_process_samples/main.nf +++ b/target/nextflow/workflows/multiomics/spatial_process_samples/main.nf @@ -3807,7 +3807,7 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/multiomics/spatial_process_samples", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : { diff --git a/target/nextflow/workflows/niche/nichecompass_leiden/.config.vsh.yaml b/target/nextflow/workflows/niche/nichecompass_leiden/.config.vsh.yaml new file mode 100644 index 0000000..6f8641d --- /dev/null +++ b/target/nextflow/workflows/niche/nichecompass_leiden/.config.vsh.yaml @@ -0,0 +1,816 @@ +name: "nichecompass_leiden" +namespace: "workflows/niche" +version: "niche-compass" +authors: +- name: "Dorien Roosen" + roles: + - "author" + - "maintainer" + info: + role: "Core Team Member" + links: + email: "dorien@data-intuitive.com" + github: "dorien-er" + linkedin: "dorien-roosen" + organizations: + - name: "Data Intuitive" + href: "https://www.data-intuitive.com" + role: "Data Scientist" +- name: "Weiwei Schultz" + roles: + - "contributor" + info: + role: "Contributor" + organizations: + - name: "Janssen R&D US" + role: "Associate Director Data Sciences" +argument_groups: +- name: "Inputs" + arguments: + - type: "string" + name: "--id" + description: "ID of the sample." + info: null + example: + - "foo" + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input" + alternatives: + - "-i" + description: "Path to the sample." + info: null + example: + - "input.h5mu" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--input_gp_mask" + description: "JSON file containing a nested dictionary containing the gene programs,\n\ + with keys being gene program names and values being dictionaries with keys `targets`\ + \ and `sources`,\nwhere `targets` contains a list of the names of genes in the\ + \ gene program for the reconstruction of the gene expression of the node itself\ + \ (receiving node)\nand `sources` contains a list of the names of genes in the\ + \ gene program for the reconstruction of the gene expression of the node's neighbors\ + \ (transmitting nodes).\n" + info: null + example: + - "prior_knowledge_gp_mask.json" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--modality" + description: "Which modality to process." + info: null + default: + - "rna" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--layer" + description: "Use specified layer for calculation of qc metrics. If not specified,\ + \ adata.X is used." + info: null + example: + - "raw_counts" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--input_obs_covariates" + description: "Keys of the adata.obs fields to use as covariates." + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--input_obsm_spatial_coords" + description: "Key in adata.obsm where spatial coordinates are stored" + info: null + default: + - "spatial" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Sample ID options" + description: "Options for adding the id to .obs on the MuData object. Having a sample\ + \ \nid present in a requirement of several components for this pipeline.\n" + arguments: + - type: "boolean" + name: "--include_sample_as_covariate" + description: "Whether to include the sample information as a categorical covariate\ + \ for the \nNicheCompass model.\n" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--add_id_to_obs" + description: "Add the value passed with --id to .obs." + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--add_id_obs_output" + description: ".Obs column to add the sample IDs to. Required and only used when\ + \ \n--add_id_to_obs is set to 'true'\n" + info: null + default: + - "sample_id" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--add_id_make_observation_keys_unique" + description: "Join the id to the .obs index (.obs_names). \nOnly used when --add_id_to_obs\ + \ is set to 'true'.\n" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Spatial Neighbors Calculation" + description: "Options for the calculation of the spatial neighborhood graph.\n" + arguments: + - type: "string" + name: "--coord_type" + description: "Type of coordinate system provided by `--input_obsm_spatial_coords`.\ + \ Valid options are:\n`grid` - grid coordinates.\n`generic` - generic coordinates.\n\ + If not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input\ + \ .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used.\n" + info: null + required: false + choices: + - "generic" + - "grid" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--n_spatial_neighbors" + description: "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n\ + `generic` - number of neighborhoods for non-grid data. Only used when `--delaunay\ + \ False`.\n" + info: null + default: + - 6 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--delaunay" + description: "Whether to use Delaunay triangulation to determine spatial neighborhood\ + \ graph.\nOnly used when `--coord_type generic`.\n" + info: null + default: + - false + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Gene Program Mask" + description: "Options for filtering gene programs based on the number of genes available\ + \ in the data." + arguments: + - type: "integer" + name: "--min_genes_per_gp" + description: "Minimum number of genes in a gene program inluding both target and\ + \ source genes that need to be available in the input data (gene expression\ + \ has been probed) for a gene program not to be discarded.\n" + info: null + default: + - 1 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_source_genes_per_gp" + description: "Minimum number of source genes in a gene program that need to be\ + \ available in the input data (gene expression has been probed) for a gene program\ + \ not to be discarded.\n" + info: null + default: + - 0 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_target_genes_per_gp" + description: "Minimum number of target genes in a gene program that need to be\ + \ available in the input data (gene expression has been probed) for a gene program\ + \ not to be discarded.\n" + info: null + default: + - 0 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_genes_per_gp" + description: "Maximum number of genes in a gene program inluding both target and\ + \ source genes that can be available in the input data (gene expression has\ + \ been probed) for a gene program not to be discarded.\n" + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_source_genes_per_gp" + description: "Maximum number of source genes in a gene program that can be available\ + \ in the input data (gene expression has been probed) for a gene program not\ + \ to be discarded.\n" + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_target_genes_per_gp" + description: "Maximum number of target genes in a gene program that can be available\ + \ in the input data (gene expression has been probed) for a gene program not\ + \ to be discarded.\n" + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean_true" + name: "--filter_genes_not_in_masks" + description: "Whether to remove the genes that are not in the gp masks from the\ + \ input data.\n" + info: null + direction: "input" +- name: "NicheCompass Model Architecture" + description: "Options for the NicheCompass model architecture." + arguments: + - type: "boolean" + name: "--covariate_edges" + description: "List of booleans that indicate whether there can be edges between\ + \ different categories of the categorical covariates.\nIf this is `True` for\ + \ a specific categorical covariate, this covariate will be excluded from the\ + \ edge reconstruction loss.\nNeeds to match the length and order of `--input_obs_covariates`.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "string" + name: "--gene_expr_recon_dist" + description: "The distribution used for gene expression reconstruction. \nIf `nb`,\ + \ uses a negative binomial distribution. \nIf `zinb`, uses a zero-inflated negative\ + \ binomial distribution.\n" + info: null + default: + - "nb" + required: false + choices: + - "nb" + - "zinb" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "boolean" + name: "--log_variational" + description: "Whether to transform x by log(x+1) prior to encoding for numerical\ + \ stability (not for normalization).\n" + info: null + default: + - true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--node_label_method" + description: "Node label method that will be used for omics reconstruction.\n\ + If `one-hop-sum`, uses a concatenation of the node's input features with the\ + \ sum of the input features of all nodes in the node's one-hop neighborhood.\n\ + If `one-hop-norm`, uses a concatenation of the node's input features with the\ + \ node's one-hop neighbors input features normalized as per Kipf, T. N. & Welling,\ + \ M. Semi-Supervised Classification with Graph Convolutional Networks. arXiv\ + \ [cs.LG] (2016).\nIf `one-hop-attention`, uses a concatenation of the node's\ + \ input features with the node's one-hop neighbors input features weighted by\ + \ an attention mechanism.\n" + info: null + default: + - "one-hop-norm" + required: false + choices: + - "one-hop-norm" + - "two-hop-norm" + - "one-hop-attention" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--active_gp_thresh_ratio" + description: "Ratio that determines which gene programs are considered active\ + \ and are used in the latent representation after model training.\nAll inactive\ + \ gene programs will be dropped during model training after a determined number\ + \ of epochs.\nAggregations of the absolute values of the gene weights of the\ + \ gene expression decoder per gene program are calculated.\nThe maximum value,\ + \ i.e. the value of the gene program with the highest aggregated value will\ + \ be used as a benchmark and all gene programs whose aggregated value is smaller\ + \ than `--active_gp_thresh_ratio` times this maximum value will be set to inactive.\n\ + If set to 0, all gene programs will be considered active.\n" + info: null + default: + - 0.1 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--active_gp_type" + description: "Type to determine active gene programs. \nCan be `mixed`, in which\ + \ case active gene programs are determined across prior and add-on gene programs\ + \ jointly,\nor `separate` in which case they are determined separately for prior\ + \ and add-on gene programs.\n" + info: null + default: + - "separate" + required: false + choices: + - "mixed" + - "separate" + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--n_addon_gp" + description: "Number of addon gene programs (i.e. gene programs that are not included\ + \ in masks but can be learned de novo).\n" + info: null + default: + - 100 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--cat_covariates_embeds_nums" + description: "Number of embedding nodes for all categorical covariates.\nMust\ + \ be the same length as `--input_obs_covariates`.\n" + info: null + required: false + direction: "input" + multiple: true + multiple_sep: ";" + - type: "integer" + name: "--random_state" + description: "Random seed for reproducibility.\n" + info: null + default: + - 0 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "NicheCompass Training Parameters" + description: "Options for training the NicheCompass model." + arguments: + - type: "integer" + name: "--n_epochs" + description: "Number of training epochs" + info: null + default: + - 100 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--n_epochs_all_gps" + description: "Number of epochs during which all gene programs are used for model\ + \ training.\nAfter that only active gene programs are retained.\n" + info: null + default: + - 25 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--n_epochs_no_edge_recon" + description: "Number of epochs during which the edge reconstruction loss is excluded\ + \ from backpropagation for pretraining using the other loss components.\n" + info: null + default: + - 0 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--n_epochs_no_cat_covariates_contrastive" + description: "Number of epochs during which the categorical covariates contrastive\ + \ loss is excluded from backpropagation for pretraining using the other loss\ + \ components.\n" + info: null + default: + - 5 + required: false + min: 0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--lr" + description: "Learning rate" + info: null + default: + - 0.001 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--weight_decay" + description: "Weight decay (L2 penalty)." + info: null + default: + - 0.001 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--edge_val_ratio" + description: "Fraction of the data that is used as validation set on edge-level.\ + \ The rest of the data will be used as training set on edge-level.\n" + info: null + default: + - 0.1 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--node_val_ratio" + description: "Fraction of the data that is used as validation set on node-level.\ + \ The rest of the data will be used as training set on node-level.\n" + info: null + default: + - 0.1 + required: false + min: 0.0 + max: 1.0 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--edge_batch_size" + description: "Batch size for the edge-level dataloaders.\n" + info: null + default: + - 256 + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--node_batch_size" + description: "Batch size for the node-level dataloaders.\nIf not provided, is\ + \ automatically determined based on `--edge_batch_size`.\n" + info: null + required: false + min: 1 + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--n_sampled_neighbors" + description: "Number of neighbors that are sampled during model training from\ + \ the spatial neighborhood graph.\nIf set to -1, all direct neighbors are included.\n" + info: null + default: + - -1 + required: false + min: -1 + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Clustering options" + arguments: + - type: "string" + name: "--obs_cluster" + description: "Prefix for the .obs keys under which to add the cluster labels.\ + \ Newly created columns in .obs will \nbe created from the specified value for\ + \ '--obs_cluster' suffixed with an underscore and one of the resolutions\nresolutions\ + \ specified in '--leiden_resolution'.\n" + info: null + default: + - "nichecompass_leiden" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "double" + name: "--leiden_resolution" + description: "Control the coarseness of the clustering. Higher values lead to\ + \ more clusters." + info: null + default: + - 1.0 + required: false + direction: "input" + multiple: true + multiple_sep: ";" +- name: "Umap options" + arguments: + - type: "string" + name: "--obsm_umap" + description: "In which .obsm slot to store the resulting UMAP embedding." + info: null + default: + - "X_leiden_nichecompass_umap" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Neighbour calculation" + arguments: + - type: "string" + name: "--uns_neighbors" + description: "In which .uns slot to store various neighbor output objects." + info: null + default: + - "nichecompass_neighbors" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--obsp_neighbor_distances" + description: "In which .obsp slot to store the distance matrix between the resulting\ + \ neighbors." + info: null + default: + - "nichecompass_distances" + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--obsp_neighbor_connectivities" + description: "In which .obsp slot to store the connectivities matrix between the\ + \ resulting neighbors." + info: null + default: + - "nichecompass_connectivities" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +- name: "Outputs" + arguments: + - type: "file" + name: "--output" + description: "Destination path to the output." + info: null + example: + - "output.h5mu" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output_model" + description: "Directory to save the trained NicheCompass model." + info: null + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "string" + name: "--output_obsm_embedding" + description: "Key of the obsm field where the latent / gene program representation\ + \ of active gene programs will be stored after NicheCompass model training.\n" + info: null + default: + - "nichecompass_latent" + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "nextflow_script" + path: "main.nf" + is_executable: true + entrypoint: "run_wf" +- type: "file" + path: "nextflow_labels.config" + dest: "nextflow_labels.config" +description: "A pipeline to compute the spatial neighborhood graph, perform nichecompass\ + \ embedding followed by Leiden clustering." +test_resources: +- type: "nextflow_script" + path: "test.nf" + is_executable: true + entrypoint: "test_wf" +- type: "file" + path: "xenium_tiny.h5mu" +- type: "file" + path: "Lung5_Rep2_tiny.h5mu" +- type: "file" + path: "prior_knowledge_gp_mask.json" +info: + test_dependencies: + - name: "nichecompass_leiden_test" + namespace: "test_workflows/niche" +status: "enabled" +scope: + image: "public" + target: "public" +dependencies: +- name: "dataflow/obsp_block_concatenation" + repository: + type: "local" +- name: "neighbors/spatial_neighborhood_graph" + repository: + type: "local" +- name: "nichecompass/nichecompass" + repository: + type: "local" +- name: "metadata/add_id" + repository: + type: "vsh" + repo: "openpipeline" + tag: "v3.0.0" +- name: "workflows/multiomics/neighbors_leiden_umap" + repository: + type: "vsh" + repo: "openpipeline" + tag: "v3.0.0" +repositories: +- type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" +links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" +runners: +- type: "nextflow" + id: "nextflow" + directives: + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + mem1gb: "memory = 1000000000.B" + mem2gb: "memory = 2000000000.B" + mem5gb: "memory = 5000000000.B" + mem10gb: "memory = 10000000000.B" + mem20gb: "memory = 20000000000.B" + mem50gb: "memory = 50000000000.B" + mem100gb: "memory = 100000000000.B" + mem200gb: "memory = 200000000000.B" + mem500gb: "memory = 500000000000.B" + mem1tb: "memory = 1000000000000.B" + mem2tb: "memory = 2000000000000.B" + mem5tb: "memory = 5000000000000.B" + mem10tb: "memory = 10000000000000.B" + mem20tb: "memory = 20000000000000.B" + mem50tb: "memory = 50000000000000.B" + mem100tb: "memory = 100000000000000.B" + mem200tb: "memory = 200000000000000.B" + mem500tb: "memory = 500000000000000.B" + mem1gib: "memory = 1073741824.B" + mem2gib: "memory = 2147483648.B" + mem4gib: "memory = 4294967296.B" + mem8gib: "memory = 8589934592.B" + mem16gib: "memory = 17179869184.B" + mem32gib: "memory = 34359738368.B" + mem64gib: "memory = 68719476736.B" + mem128gib: "memory = 137438953472.B" + mem256gib: "memory = 274877906944.B" + mem512gib: "memory = 549755813888.B" + mem1tib: "memory = 1099511627776.B" + mem2tib: "memory = 2199023255552.B" + mem4tib: "memory = 4398046511104.B" + mem8tib: "memory = 8796093022208.B" + mem16tib: "memory = 17592186044416.B" + mem32tib: "memory = 35184372088832.B" + mem64tib: "memory = 70368744177664.B" + mem128tib: "memory = 140737488355328.B" + mem256tib: "memory = 281474976710656.B" + mem512tib: "memory = 562949953421312.B" + cpu1: "cpus = 1" + cpu2: "cpus = 2" + cpu5: "cpus = 5" + cpu10: "cpus = 10" + cpu20: "cpus = 20" + cpu50: "cpus = 50" + cpu100: "cpus = 100" + cpu200: "cpus = 200" + cpu500: "cpus = 500" + cpu1000: "cpus = 1000" + script: + - "includeConfig(\"nextflow_labels.config\")" + debug: false + container: "docker" +engines: +- type: "native" + id: "native" +build_info: + config: "src/workflows/niche/nichecompass_leiden/config.vsh.yaml" + runner: "nextflow" + engine: "native" + output: "target/nextflow/workflows/niche/nichecompass_leiden" + executable: "target/nextflow/workflows/niche/nichecompass_leiden/main.nf" + viash_version: "0.9.4" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" + git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" + dependencies: + - "target/nextflow/dataflow/obsp_block_concatenation" + - "target/nextflow/neighbors/spatial_neighborhood_graph" + - "target/nextflow/nichecompass/nichecompass" + - "target/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/metadata/add_id" + - "target/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/workflows/multiomics/neighbors_leiden_umap" +package_config: + name: "openpipeline_spatial" + version: "niche-compass" + info: + test_resources: + - type: "s3" + path: "s3://openpipelines-bio/openpipeline_spatial/resources_test" + dest: "resources_test" + repositories: + - type: "vsh" + name: "openpipeline" + repo: "openpipeline" + tag: "v3.0.0" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\ + .runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\ + )'" + - ".engines += { type: \"native\" }" + - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'" + - ".engines[.type == 'docker'].target_tag := 'niche-compass'" + organization: "vsh" + links: + repository: "https://github.com/openpipelines-bio/openpipeline_spatial" + docker_registry: "ghcr.io" diff --git a/target/nextflow/workflows/niche/nichecompass_leiden/main.nf b/target/nextflow/workflows/niche/nichecompass_leiden/main.nf new file mode 100644 index 0000000..b958c2c --- /dev/null +++ b/target/nextflow/workflows/niche/nichecompass_leiden/main.nf @@ -0,0 +1,4282 @@ +// nichecompass_leiden niche-compass +// +// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +// Intuitive. +// +// The component may contain files which fall under a different license. The +// authors of this component should specify the license in the header of such +// files, or include a separate license file detailing the licenses of all included +// files. +// +// Component authors: +// * Dorien Roosen (author, maintainer) +// * Weiwei Schultz (contributor) + +//////////////////////////// +// VDSL3 helper functions // +//////////////////////////// + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_checkArgumentType.nf' +class UnexpectedArgumentTypeException extends Exception { + String errorIdentifier + String stage + String plainName + String expectedClass + String foundClass + + // ${key ? " in module '$key'" : ""}${id ? " id '$id'" : ""} + UnexpectedArgumentTypeException(String errorIdentifier, String stage, String plainName, String expectedClass, String foundClass) { + super("Error${errorIdentifier ? " $errorIdentifier" : ""}:${stage ? " $stage" : "" } argument '${plainName}' has the wrong type. " + + "Expected type: ${expectedClass}. Found type: ${foundClass}") + this.errorIdentifier = errorIdentifier + this.stage = stage + this.plainName = plainName + this.expectedClass = expectedClass + this.foundClass = foundClass + } +} + +/** + * Checks if the given value is of the expected type. If not, an exception is thrown. + * + * @param stage The stage of the argument (input or output) + * @param par The parameter definition + * @param value The value to check + * @param errorIdentifier The identifier to use in the error message + * @return The value, if it is of the expected type + * @throws UnexpectedArgumentTypeException If the value is not of the expected type +*/ +def _checkArgumentType(String stage, Map par, Object value, String errorIdentifier) { + // expectedClass will only be != null if value is not of the expected type + def expectedClass = null + def foundClass = null + + // todo: split if need be + + if (!par.required && value == null) { + expectedClass = null + } else if (par.multiple) { + if (value !instanceof Collection) { + value = [value] + } + + // split strings + value = value.collectMany{ val -> + if (val instanceof String) { + // collect() to ensure that the result is a List and not simply an array + val.split(par.multiple_sep).collect() + } else { + [val] + } + } + + // process globs + if (par.type == "file" && par.direction == "input") { + value = value.collect{ it instanceof String ? file(it, hidden: true) : it }.flatten() + } + + // check types of elements in list + try { + value = value.collect { listVal -> + _checkArgumentType(stage, par + [multiple: false], listVal, errorIdentifier) + } + } catch (UnexpectedArgumentTypeException e) { + expectedClass = "List[${e.expectedClass}]" + foundClass = "List[${e.foundClass}]" + } + } else if (par.type == "string") { + // cast to string if need be. only cast if the value is a GString + if (value instanceof GString) { + value = value as String + } + expectedClass = value instanceof String ? null : "String" + } else if (par.type == "integer") { + // cast to integer if need be + if (value !instanceof Integer) { + try { + value = value as Integer + } catch (NumberFormatException e) { + expectedClass = "Integer" + } + } + } else if (par.type == "long") { + // cast to long if need be + if (value !instanceof Long) { + try { + value = value as Long + } catch (NumberFormatException e) { + expectedClass = "Long" + } + } + } else if (par.type == "double") { + // cast to double if need be + if (value !instanceof Double) { + try { + value = value as Double + } catch (NumberFormatException e) { + expectedClass = "Double" + } + } + } else if (par.type == "float") { + // cast to float if need be + if (value !instanceof Float) { + try { + value = value as Float + } catch (NumberFormatException e) { + expectedClass = "Float" + } + } + } else if (par.type == "boolean" | par.type == "boolean_true" | par.type == "boolean_false") { + // cast to boolean if need be + if (value !instanceof Boolean) { + try { + value = value as Boolean + } catch (Exception e) { + expectedClass = "Boolean" + } + } + } else if (par.type == "file" && (par.direction == "input" || stage == "output")) { + // cast to path if need be + if (value instanceof String) { + value = file(value, hidden: true) + } + if (value instanceof File) { + value = value.toPath() + } + expectedClass = value instanceof Path ? null : "Path" + } else if (par.type == "file" && stage == "input" && par.direction == "output") { + // cast to string if need be + if (value !instanceof String) { + try { + value = value as String + } catch (Exception e) { + expectedClass = "String" + } + } + } else { + // didn't find a match for par.type + expectedClass = par.type + } + + if (expectedClass != null) { + if (foundClass == null) { + foundClass = value.getClass().getName() + } + throw new UnexpectedArgumentTypeException(errorIdentifier, stage, par.plainName, expectedClass, foundClass) + } + + return value +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processInputValues.nf' +Map _processInputValues(Map inputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.required && arg.direction == "input") { + assert inputs.containsKey(arg.plainName) && inputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required input argument '${arg.plainName}' is missing" + } + } + + inputs = inputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid input argument" + + value = _checkArgumentType("input", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return inputs +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/arguments/_processOutputValues.nf' +Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + outputs = outputs.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && it.direction == "output" } + assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument" + + value = _checkArgumentType("output", par, value, "in module '$key' id '$id'") + + [ name, value ] + } + } + return outputs +} + +void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) { + if (!workflow.stubRun) { + config.allArguments.each { arg -> + if (arg.direction == "output" && arg.required) { + assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null : + "Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing" + } + } + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/IDChecker.nf' +class IDChecker { + final def items = [] as Set + + @groovy.transform.WithWriteLock + boolean observe(String item) { + if (items.contains(item)) { + return false + } else { + items << item + return true + } + } + + @groovy.transform.WithReadLock + boolean contains(String item) { + return items.contains(item) + } + + @groovy.transform.WithReadLock + Set getItems() { + return items.clone() + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_checkUniqueIds.nf' + +/** + * Check if the ids are unique across parameter sets + * + * @param parameterSets a list of parameter sets. + */ +private void _checkUniqueIds(List>> parameterSets) { + def ppIds = parameterSets.collect{it[0]} + assert ppIds.size() == ppIds.unique().size() : "All argument sets should have unique ids. Detected ids: $ppIds" +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_getChild.nf' + +// helper functions for reading params from file // +def _getChild(parent, child) { + if (child.contains("://") || java.nio.file.Paths.get(child).isAbsolute()) { + child + } else { + def parentAbsolute = java.nio.file.Paths.get(parent).toAbsolutePath().toString() + parentAbsolute.replaceAll('/[^/]*$', "/") + child + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_parseParamList.nf' +/** + * Figure out the param list format based on the file extension + * + * @param param_list A String containing the path to the parameter list file. + * + * @return A String containing the format of the parameter list file. + */ +def _paramListGuessFormat(param_list) { + if (param_list !instanceof String) { + "asis" + } else if (param_list.endsWith(".csv")) { + "csv" + } else if (param_list.endsWith(".json") || param_list.endsWith(".jsn")) { + "json" + } else if (param_list.endsWith(".yaml") || param_list.endsWith(".yml")) { + "yaml" + } else { + "yaml_blob" + } +} + + +/** + * Read the param list + * + * @param param_list One of the following: + * - A String containing the path to the parameter list file (csv, json or yaml), + * - A yaml blob of a list of maps (yaml_blob), + * - Or a groovy list of maps (asis). + * @param config A Map of the Viash configuration. + * + * @return A List of Maps containing the parameters. + */ +def _parseParamList(param_list, Map config) { + // first determine format by extension + def paramListFormat = _paramListGuessFormat(param_list) + + def paramListPath = (paramListFormat != "asis" && paramListFormat != "yaml_blob") ? + file(param_list, hidden: true) : + null + + // get the correct parser function for the detected params_list format + def paramSets = [] + if (paramListFormat == "asis") { + paramSets = param_list + } else if (paramListFormat == "yaml_blob") { + paramSets = readYamlBlob(param_list) + } else if (paramListFormat == "yaml") { + paramSets = readYaml(paramListPath) + } else if (paramListFormat == "json") { + paramSets = readJson(paramListPath) + } else if (paramListFormat == "csv") { + paramSets = readCsv(paramListPath) + } else { + error "Format of provided --param_list not recognised.\n" + + "Found: '$paramListFormat'.\n" + + "Expected: a csv file, a json file, a yaml file,\n" + + "a yaml blob or a groovy list of maps." + } + + // data checks + assert paramSets instanceof List: "--param_list should contain a list of maps" + for (value in paramSets) { + assert value instanceof Map: "--param_list should contain a list of maps" + } + + // id is argument + def idIsArgument = config.allArguments.any{it.plainName == "id"} + + // Reformat from List to List> by adding the ID as first element of a Tuple2 + paramSets = paramSets.collect({ data -> + def id = data.id + if (!idIsArgument) { + data = data.findAll{k, v -> k != "id"} + } + [id, data] + }) + + // Split parameters with 'multiple: true' + paramSets = paramSets.collect({ id, data -> + data = _splitParams(data, config) + [id, data] + }) + + // The paths of input files inside a param_list file may have been specified relatively to the + // location of the param_list file. These paths must be made absolute. + if (paramListPath) { + paramSets = paramSets.collect({ id, data -> + def new_data = data.collectEntries{ parName, parValue -> + def par = config.allArguments.find{it.plainName == parName} + if (par && par.type == "file" && par.direction == "input") { + if (parValue instanceof Collection) { + parValue = parValue.collectMany{path -> + def x = _resolveSiblingIfNotAbsolute(path, paramListPath) + x instanceof Collection ? x : [x] + } + } else { + parValue = _resolveSiblingIfNotAbsolute(parValue, paramListPath) + } + } + [parName, parValue] + } + [id, new_data] + }) + } + + return paramSets +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/_splitParams.nf' +/** + * Split parameters for arguments that accept multiple values using their separator + * + * @param paramList A Map containing parameters to split. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A Map of parameters where the parameter values have been split into a list using + * their seperator. + */ +Map _splitParams(Map parValues, Map config){ + def parsedParamValues = parValues.collectEntries { parName, parValue -> + def parameterSettings = config.allArguments.find({it.plainName == parName}) + + if (!parameterSettings) { + // if argument is not found, do not alter + return [parName, parValue] + } + if (parameterSettings.multiple) { // Check if parameter can accept multiple values + if (parValue instanceof Collection) { + parValue = parValue.collect{it instanceof String ? it.split(parameterSettings.multiple_sep) : it } + } else if (parValue instanceof String) { + parValue = parValue.split(parameterSettings.multiple_sep) + } else if (parValue == null) { + parValue = [] + } else { + parValue = [ parValue ] + } + parValue = parValue.flatten() + } + // For all parameters check if multiple values are only passed for + // arguments that allow it. Quietly simplify lists of length 1. + if (!parameterSettings.multiple && parValue instanceof Collection) { + assert parValue.size() == 1 : + "Error: argument ${parName} has too many values.\n" + + " Expected amount: 1. Found: ${parValue.size()}" + parValue = parValue[0] + } + [parName, parValue] + } + return parsedParamValues +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/channelFromParams.nf' +/** + * Parse nextflow parameters based on settings defined in a viash config. + * Return a list of parameter sets, each parameter set corresponding to + * an event in a nextflow channel. The output from this function can be used + * with Channel.fromList to create a nextflow channel with Vdsl3 formatted + * events. + * + * This function performs: + * - A filtering of the params which can be found in the config file. + * - Process the params_list argument which allows a user to to initialise + * a Vsdl3 channel with multiple parameter sets. Possible formats are + * csv, json, yaml, or simply a yaml_blob. A csv should have column names + * which correspond to the different arguments of this pipeline. A json or a yaml + * file should be a list of maps, each of which has keys corresponding to the + * arguments of the pipeline. A yaml blob can also be passed directly as a parameter. + * When passing a csv, json or yaml, relative path names are relativized to the + * location of the parameter file. + * - Combine the parameter sets into a vdsl3 Channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A list of parameters with the first element of the event being + * the event ID and the second element containing a map of the parsed parameters. + */ + +private List>> _paramsToParamSets(Map params, Map config){ + // todo: fetch key from run args + def key_ = config.name + + /* parse regular parameters (not in param_list) */ + /*************************************************/ + def globalParams = config.allArguments + .findAll { params.containsKey(it.plainName) } + .collectEntries { [ it.plainName, params[it.plainName] ] } + def globalID = params.get("id", null) + + /* process params_list arguments */ + /*********************************/ + def paramList = params.containsKey("param_list") && params.param_list != null ? + params.param_list : [] + // if (paramList instanceof String) { + // paramList = [paramList] + // } + // def paramSets = paramList.collectMany{ _parseParamList(it, config) } + // TODO: be able to process param_list when it is a list of strings + def paramSets = _parseParamList(paramList, config) + if (paramSets.isEmpty()) { + paramSets = [[null, [:]]] + } + + /* combine arguments into channel */ + /**********************************/ + def processedParams = paramSets.indexed().collect{ index, tup -> + // Process ID + def id = tup[0] ?: globalID + + if (workflow.stubRun && !id) { + // if stub run, explicitly add an id if missing + id = "stub${index}" + } + assert id != null: "Each parameter set should have at least an 'id'" + + // Process params + def parValues = globalParams + tup[1] + // // Remove parameters which are null, if the default is also null + // parValues = parValues.collectEntries{paramName, paramValue -> + // parameterSettings = config.functionality.allArguments.find({it.plainName == paramName}) + // if ( paramValue != null || parameterSettings.get("default", null) != null ) { + // [paramName, paramValue] + // } + // } + parValues = parValues.collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + assert par != null : "Error in module '${key_}' id '${id}': '${name}' is not a valid input argument" + + if (par == null) { + return [:] + } + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + + [ name, value ] + } + + [id, parValues] + } + + // Check if ids (first element of each list) is unique + _checkUniqueIds(processedParams) + return processedParams +} + +/** + * Parse nextflow parameters based on settings defined in a viash config + * and return a nextflow channel. + * + * @param params Input parameters. Can optionaly contain a 'param_list' key that + * provides a list of arguments that can be split up into multiple events + * in the output channel possible formats of param_lists are: a csv file, + * json file, a yaml file or a yaml blob. Each parameters set (event) must + * have a unique ID. + * @param config A Map of the Viash configuration. This Map can be generated from the config file + * using the readConfig() function. + * + * @return A nextflow Channel with events. Events are formatted as a tuple that contains + * first contains the ID of the event and as second element holds a parameter map. + * + * + */ +def channelFromParams(Map params, Map config) { + def processedParams = _paramsToParamSets(params, config) + return Channel.fromList(processedParams) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/checkUniqueIds.nf' +def checkUniqueIds(Map args) { + def stopOnError = args.stopOnError == null ? args.stopOnError : true + + def idChecker = new IDChecker() + + return filter { tup -> + if (!idChecker.observe(tup[0])) { + if (stopOnError) { + error "Duplicate id: ${tup[0]}" + } else { + log.warn "Duplicate id: ${tup[0]}, removing duplicate entry" + return false + } + } + return true + } +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/preprocessInputs.nf' +// This helper file will be deprecated soon +preprocessInputsDeprecationWarningPrinted = false + +def preprocessInputsDeprecationWarning() { + if (!preprocessInputsDeprecationWarningPrinted) { + preprocessInputsDeprecationWarningPrinted = true + System.err.println("Warning: preprocessInputs() is deprecated and will be removed in Viash 0.9.0.") + } +} + +/** + * Generate a nextflow Workflow that allows processing a channel of + * Vdsl3 formatted events and apply a Viash config to them: + * - Gather default parameters from the Viash config and make + * sure that they are correctly formatted (see applyConfig method). + * - Format the input parameters (also using the applyConfig method). + * - Apply the default parameter to the input parameters. + * - Do some assertions: + * ~ Check if the event IDs in the channel are unique. + * + * The events in the channel are formatted as tuples, with the + * first element of the tuples being a unique id of the parameter set, + * and the second element containg the the parameters themselves. + * Optional extra elements of the tuples will be passed to the output as is. + * + * @param args A map that must contain a 'config' key that points + * to a parsed config (see readConfig()). Optionally, a + * 'key' key can be provided which can be used to create a unique + * name for the workflow process. + * + * @return A workflow that allows processing a channel of Vdsl3 formatted events + * and apply a Viash config to them. + */ +def preprocessInputs(Map args) { + preprocessInputsDeprecationWarning() + + def config = args.config + assert config instanceof Map : + "Error in preprocessInputs: config must be a map. " + + "Expected class: Map. Found: config.getClass() is ${config.getClass()}" + def key_ = args.key ?: config.name + + // Get different parameter types (used throughout this function) + def defaultArgs = config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + map { tup -> + def id = tup[0] + def data = tup[1] + def passthrough = tup.drop(2) + + def new_data = (defaultArgs + data).collectEntries { name, value -> + def par = config.allArguments.find { it.plainName == name && (it.direction == "input" || it.type == "file") } + + if (par != null) { + value = _checkArgumentType("input", par, value, "in module '$key_' id '$id'") + } + + [ name, value ] + } + + [ id, new_data ] + passthrough + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runComponents.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component config. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component config. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component config. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component config. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runComponents(Map args) { + log.warn("runComponents is deprecated, use runEach instead") + assert args.components: "runComponents should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runComponents" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def id_ = args.id + + workflow runComponentsWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def comp_config = comp_.config + + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_config) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + // def new_id = id_(tup[0], tup[1], comp_config) + def new_id = tup[0] + if (id_ instanceof String) { + new_id = id_ + } else if (id_ instanceof Closure) { + new_id = id_(new_id, tup[1], comp_config) + } + [new_id] + tup.drop(1) + } + : filter_ch + def data_ch = id_ch | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_config) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_config) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + post_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runComponentsWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/runEach.nf' +/** + * Run a list of components on a stream of data. + * + * @param components: list of Viash VDSL3 modules to run + * @param fromState: a closure, a map or a list of keys to extract from the input data. + * If a closure, it will be called with the id, the data and the component itself. + * @param toState: a closure, a map or a list of keys to extract from the output data + * If a closure, it will be called with the id, the output data, the old state and the component itself. + * @param filter: filter function to apply to the input. + * It will be called with the id, the data and the component itself. + * @param id: id to use for the output data + * If a closure, it will be called with the id, the data and the component itself. + * @param auto: auto options to pass to the components + * + * @return: a workflow that runs the components + **/ +def runEach(Map args) { + assert args.components: "runEach should be passed a list of components to run" + + def components_ = args.components + if (components_ !instanceof List) { + components_ = [ components_ ] + } + assert components_.size() > 0: "pass at least one component to runEach" + + def fromState_ = args.fromState + def toState_ = args.toState + def filter_ = args.filter + def runIf_ = args.runIf + def id_ = args.id + + assert !runIf_ || runIf_ instanceof Closure: "runEach: must pass a Closure to runIf." + + workflow runEachWf { + take: input_ch + main: + + // generate one channel per method + out_chs = components_.collect{ comp_ -> + def filter_ch = filter_ + ? input_ch | filter{tup -> + filter_(tup[0], tup[1], comp_) + } + : input_ch + def id_ch = id_ + ? filter_ch | map{tup -> + def new_id = id_ + if (new_id instanceof Closure) { + new_id = new_id(tup[0], tup[1], comp_) + } + assert new_id instanceof String : "Error in runEach: id should be a String or a Closure that returns a String. Expected: id instanceof String. Found: ${new_id.getClass()}" + [new_id] + tup.drop(1) + } + : filter_ch + def chPassthrough = null + def chRun = null + if (runIf_) { + def idRunIfBranch = id_ch.branch{ tup -> + run: runIf_(tup[0], tup[1], comp_) + passthrough: true + } + chPassthrough = idRunIfBranch.passthrough + chRun = idRunIfBranch.run + } else { + chRun = id_ch + chPassthrough = Channel.empty() + } + def data_ch = chRun | map{tup -> + def new_data = tup[1] + if (fromState_ instanceof Map) { + new_data = fromState_.collectEntries{ key0, key1 -> + [key0, new_data[key1]] + } + } else if (fromState_ instanceof List) { + new_data = fromState_.collectEntries{ key -> + [key, new_data[key]] + } + } else if (fromState_ instanceof Closure) { + new_data = fromState_(tup[0], new_data, comp_) + } + tup.take(1) + [new_data] + tup.drop(1) + } + def out_ch = data_ch + | comp_.run( + auto: (args.auto ?: [:]) + [simplifyInput: false, simplifyOutput: false] + ) + def post_ch = toState_ + ? out_ch | map{tup -> + def output = tup[1] + def old_state = tup[2] + def new_state = null + if (toState_ instanceof Map) { + new_state = old_state + toState_.collectEntries{ key0, key1 -> + [key0, output[key1]] + } + } else if (toState_ instanceof List) { + new_state = old_state + toState_.collectEntries{ key -> + [key, output[key]] + } + } else if (toState_ instanceof Closure) { + new_state = toState_(tup[0], output, old_state, comp_) + } + [tup[0], new_state] + tup.drop(3) + } + : out_ch + + def return_ch = post_ch + | concat(chPassthrough) + + return_ch + } + + // mix all results + output_ch = + (out_chs.size == 1) + ? out_chs[0] + : out_chs[0].mix(*out_chs.drop(1)) + + emit: output_ch + } + + return runEachWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/channel/safeJoin.nf' +/** + * Join sourceChannel to targetChannel + * + * This function joins the sourceChannel to the targetChannel. + * However, each id in the targetChannel must be present in the + * sourceChannel. If _meta.join_id exists in the targetChannel, that is + * used as an id instead. If the id doesn't match any id in the sourceChannel, + * an error is thrown. + */ + +def safeJoin(targetChannel, sourceChannel, key) { + def sourceIDs = new IDChecker() + + def sourceCheck = sourceChannel + | map { tup -> + sourceIDs.observe(tup[0]) + tup + } + def targetCheck = targetChannel + | map { tup -> + def id = tup[0] + + if (!sourceIDs.contains(id)) { + error ( + "Error in module '${key}' when merging output with original state.\n" + + " Reason: output with id '${id}' could not be joined with source channel.\n" + + " If the IDs in the output channel differ from the input channel,\n" + + " please set `tup[1]._meta.join_id to the original ID.\n" + + " Original IDs in input channel: ['${sourceIDs.getItems().join("', '")}'].\n" + + " Unexpected ID in the output channel: '${id}'.\n" + + " Example input event: [\"id\", [input: file(...)]],\n" + + " Example output event: [\"newid\", [output: file(...), _meta: [join_id: \"id\"]]]" + ) + } + // TODO: add link to our documentation on how to fix this + + tup + } + + sourceCheck.cross(targetChannel) + | map{ left, right -> + right + left.drop(1) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/_processArgument.nf' +def _processArgument(arg) { + arg.multiple = arg.multiple != null ? arg.multiple : false + arg.required = arg.required != null ? arg.required : false + arg.direction = arg.direction != null ? arg.direction : "input" + arg.multiple_sep = arg.multiple_sep != null ? arg.multiple_sep : ";" + arg.plainName = arg.name.replaceAll("^-*", "") + + if (arg.type == "file") { + arg.must_exist = arg.must_exist != null ? arg.must_exist : true + arg.create_parent = arg.create_parent != null ? arg.create_parent : true + } + + // add default values to output files which haven't already got a default + if (arg.type == "file" && arg.direction == "output" && arg.default == null) { + def mult = arg.multiple ? "_*" : "" + def extSearch = "" + if (arg.default != null) { + extSearch = arg.default + } else if (arg.example != null) { + extSearch = arg.example + } + if (extSearch instanceof List) { + extSearch = extSearch[0] + } + def extSearchResult = extSearch.find("\\.[^\\.]+\$") + def ext = extSearchResult != null ? extSearchResult : "" + arg.default = "\$id.\$key.${arg.plainName}${mult}${ext}" + if (arg.multiple) { + arg.default = [arg.default] + } + } + + if (!arg.multiple) { + if (arg.default != null && arg.default instanceof List) { + arg.default = arg.default[0] + } + if (arg.example != null && arg.example instanceof List) { + arg.example = arg.example[0] + } + } + + if (arg.type == "boolean_true") { + arg.default = false + } + if (arg.type == "boolean_false") { + arg.default = true + } + + arg +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/addGlobalParams.nf' +def addGlobalArguments(config) { + def localConfig = [ + "argument_groups": [ + [ + "name": "Nextflow input-output arguments", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "arguments" : [ + [ + 'name': '--publish_dir', + 'required': true, + 'type': 'string', + 'description': 'Path to an output directory.', + 'example': 'output/', + 'multiple': false + ], + [ + 'name': '--param_list', + 'required': false, + 'type': 'string', + 'description': '''Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob. + | + |* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ ['id': 'foo', 'input': 'foo.txt'], ['id': 'bar', 'input': 'bar.txt'] ]`. + |* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`. + |* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]`. + |* A yaml blob can also be passed directly as a string. Example: `--param_list "[ {'id': 'foo', 'input': 'foo.txt'}, {'id': 'bar', 'input': 'bar.txt'} ]"`. + | + |When passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.'''.stripMargin(), + 'example': 'my_params.yaml', + 'multiple': false, + 'hidden': true + ] + // TODO: allow multiple: true in param_list? + // TODO: allow to specify a --param_list_regex to filter the param_list? + // TODO: allow to specify a --param_list_from_state to remap entries in the param_list? + ] + ] + ] + ] + + return processConfig(_mergeMap(config, localConfig)) +} + +def _mergeMap(Map lhs, Map rhs) { + return rhs.inject(lhs.clone()) { map, entry -> + if (map[entry.key] instanceof Map && entry.value instanceof Map) { + map[entry.key] = _mergeMap(map[entry.key], entry.value) + } else if (map[entry.key] instanceof Collection && entry.value instanceof Collection) { + map[entry.key] += entry.value + } else { + map[entry.key] = entry.value + } + return map + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/generateHelp.nf' +def _generateArgumentHelp(param) { + // alternatives are not supported + // def names = param.alternatives ::: List(param.name) + + def unnamedProps = [ + ["required parameter", param.required], + ["multiple values allowed", param.multiple], + ["output", param.direction.toLowerCase() == "output"], + ["file must exist", param.type == "file" && param.must_exist] + ].findAll{it[1]}.collect{it[0]} + + def dflt = null + if (param.default != null) { + if (param.default instanceof List) { + dflt = param.default.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + dflt = param.default.toString() + } + } + def example = null + if (param.example != null) { + if (param.example instanceof List) { + example = param.example.join(param.multiple_sep != null ? param.multiple_sep : ", ") + } else { + example = param.example.toString() + } + } + def min = param.min?.toString() + def max = param.max?.toString() + + def escapeChoice = { choice -> + def s1 = choice.replaceAll("\\n", "\\\\n") + def s2 = s1.replaceAll("\"", """\\\"""") + s2.contains(",") || s2 != choice ? "\"" + s2 + "\"" : s2 + } + def choices = param.choices == null ? + null : + "[ " + param.choices.collect{escapeChoice(it.toString())}.join(", ") + " ]" + + def namedPropsStr = [ + ["type", ([param.type] + unnamedProps).join(", ")], + ["default", dflt], + ["example", example], + ["choices", choices], + ["min", min], + ["max", max] + ] + .findAll{it[1]} + .collect{"\n " + it[0] + ": " + it[1].replaceAll("\n", "\\n")} + .join("") + + def descStr = param.description == null ? + "" : + _paragraphWrap("\n" + param.description.trim(), 80 - 8).join("\n ") + + "\n --" + param.plainName + + namedPropsStr + + descStr +} + +// Based on Helper.generateHelp() in Helper.scala +def _generateHelp(config) { + def fun = config + + // PART 1: NAME AND VERSION + def nameStr = fun.name + + (fun.version == null ? "" : " " + fun.version) + + // PART 2: DESCRIPTION + def descrStr = fun.description == null ? + "" : + "\n\n" + _paragraphWrap(fun.description.trim(), 80).join("\n") + + // PART 3: Usage + def usageStr = fun.usage == null ? + "" : + "\n\nUsage:\n" + fun.usage.trim() + + // PART 4: Options + def argGroupStrs = fun.allArgumentGroups.collect{argGroup -> + def name = argGroup.name + def descriptionStr = argGroup.description == null ? + "" : + "\n " + _paragraphWrap(argGroup.description.trim(), 80-4).join("\n ") + "\n" + def arguments = argGroup.arguments.collect{arg -> + arg instanceof String ? fun.allArguments.find{it.plainName == arg} : arg + }.findAll{it != null} + def argumentStrs = arguments.collect{param -> _generateArgumentHelp(param)} + + "\n\n$name:" + + descriptionStr + + argumentStrs.join("\n") + } + + // FINAL: combine + def out = nameStr + + descrStr + + usageStr + + argGroupStrs.join("") + + return out +} + +// based on Format._paragraphWrap +def _paragraphWrap(str, maxLength) { + def outLines = [] + str.split("\n").each{par -> + def words = par.split("\\s").toList() + + def word = null + def line = words.pop() + while(!words.isEmpty()) { + word = words.pop() + if (line.length() + word.length() + 1 <= maxLength) { + line = line + " " + word + } else { + outLines.add(line) + line = word + } + } + if (words.isEmpty()) { + outLines.add(line) + } + } + return outLines +} + +def helpMessage(config) { + if (params.containsKey("help") && params.help) { + def mergedConfig = addGlobalArguments(config) + def helpStr = _generateHelp(mergedConfig) + println(helpStr) + exit 0 + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/processConfig.nf' +def processConfig(config) { + // set defaults for arguments + config.arguments = + (config.arguments ?: []).collect{_processArgument(it)} + + // set defaults for argument_group arguments + config.argument_groups = + (config.argument_groups ?: []).collect{grp -> + grp.arguments = (grp.arguments ?: []).collect{_processArgument(it)} + grp + } + + // create combined arguments list + config.allArguments = + config.arguments + + config.argument_groups.collectMany{it.arguments} + + // add missing argument groups (based on Functionality::allArgumentGroups()) + def argGroups = config.argument_groups + if (argGroups.any{it.name.toLowerCase() == "arguments"}) { + argGroups = argGroups.collect{ grp -> + if (grp.name.toLowerCase() == "arguments") { + grp = grp + [ + arguments: grp.arguments + config.arguments + ] + } + grp + } + } else { + argGroups = argGroups + [ + name: "Arguments", + arguments: config.arguments + ] + } + config.allArgumentGroups = argGroups + + config +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/config/readConfig.nf' + +def readConfig(file) { + def config = readYaml(file ?: moduleDir.resolve("config.vsh.yaml")) + processConfig(config) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_resolveSiblingIfNotAbsolute.nf' +/** + * Resolve a path relative to the current file. + * + * @param str The path to resolve, as a String. + * @param parentPath The path to resolve relative to, as a Path. + * + * @return The path that may have been resovled, as a Path. + */ +def _resolveSiblingIfNotAbsolute(str, parentPath) { + if (str !instanceof String) { + return str + } + if (!_stringIsAbsolutePath(str)) { + return parentPath.resolveSibling(str) + } else { + return file(str, hidden: true) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/_stringIsAbsolutePath.nf' +/** + * Check whether a path as a string is absolute. + * + * In the past, we tried using `file(., relative: true).isAbsolute()`, + * but the 'relative' option was added in 22.10.0. + * + * @param path The path to check, as a String. + * + * @return Whether the path is absolute, as a boolean. + */ +def _stringIsAbsolutePath(path) { + def _resolve_URL_PROTOCOL = ~/^([a-zA-Z][a-zA-Z0-9]*:)?\\/.+/ + + assert path instanceof String + return _resolve_URL_PROTOCOL.matcher(path).matches() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/collectTraces.nf' +class CustomTraceObserver implements nextflow.trace.TraceObserver { + List traces + + CustomTraceObserver(List traces) { + this.traces = traces + } + + @Override + void onProcessComplete(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } + + @Override + void onProcessCached(nextflow.processor.TaskHandler handler, nextflow.trace.TraceRecord trace) { + def trace2 = trace.store.clone() + trace2.script = null + traces.add(trace2) + } +} + +def collectTraces() { + def traces = Collections.synchronizedList([]) + + // add custom trace observer which stores traces in the traces object + session.observers.add(new CustomTraceObserver(traces)) + + traces +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/deepClone.nf' +/** + * Performs a deep clone of the given object. + * @param x an object + */ +def deepClone(x) { + iterateMap(x, {it instanceof Cloneable ? it.clone() : it}) +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getPublishDir.nf' +def getPublishDir() { + return params.containsKey("publish_dir") ? params.publish_dir : + params.containsKey("publishDir") ? params.publishDir : + null +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/getRootDir.nf' + +// Recurse upwards until we find a '.build.yaml' file +def _findBuildYamlFile(pathPossiblySymlink) { + def path = pathPossiblySymlink.toRealPath() + def child = path.resolve(".build.yaml") + if (java.nio.file.Files.isDirectory(path) && java.nio.file.Files.exists(child)) { + return child + } else { + def parent = path.getParent() + if (parent == null) { + return null + } else { + return _findBuildYamlFile(parent) + } + } +} + +// get the root of the target folder +def getRootDir() { + def dir = _findBuildYamlFile(meta.resources_dir) + assert dir != null: "Could not find .build.yaml in the folder structure" + dir.getParent() +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/iterateMap.nf' +/** + * Recursively apply a function over the leaves of an object. + * @param obj The object to iterate over. + * @param fun The function to apply to each value. + * @return The object with the function applied to each value. + */ +def iterateMap(obj, fun) { + if (obj instanceof List && obj !instanceof String) { + return obj.collect{item -> + iterateMap(item, fun) + } + } else if (obj instanceof Map) { + return obj.collectEntries{key, item -> + [key.toString(), iterateMap(item, fun)] + } + } else { + return fun(obj) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/functions/niceView.nf' +/** + * A view for printing the event of each channel as a YAML blob. + * This is useful for debugging. + */ +def niceView() { + workflow niceViewWf { + take: input + main: + output = input + | view{toYamlBlob(it)} + emit: output + } + return niceViewWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readCsv.nf' + +def readCsv(file_path) { + def output = [] + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + + // todo: allow escaped quotes in string + // todo: allow single quotes? + def splitRegex = java.util.regex.Pattern.compile(''',(?=(?:[^"]*"[^"]*")*[^"]*$)''') + def removeQuote = java.util.regex.Pattern.compile('''"(.*)"''') + + def br = java.nio.file.Files.newBufferedReader(inputFile) + + def row = -1 + def header = null + while (br.ready() && header == null) { + def line = br.readLine() + row++ + if (!line.startsWith("#")) { + header = splitRegex.split(line, -1).collect{field -> + m = removeQuote.matcher(field) + m.find() ? m.replaceFirst('$1') : field + } + } + } + assert header != null: "CSV file should contain a header" + + while (br.ready()) { + def line = br.readLine() + row++ + if (line == null) { + br.close() + break + } + + if (!line.startsWith("#")) { + def predata = splitRegex.split(line, -1) + def data = predata.collect{field -> + if (field == "") { + return null + } + def m = removeQuote.matcher(field) + if (m.find()) { + return m.replaceFirst('$1') + } else { + return field + } + } + assert header.size() == data.size(): "Row $row should contain the same number as fields as the header" + + def dataMap = [header, data].transpose().collectEntries().findAll{it.value != null} + output.add(dataMap) + } + } + + output +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJson.nf' +def readJson(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parse(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readJsonBlob.nf' +def readJsonBlob(str) { + def jsonSlurper = new groovy.json.JsonSlurper() + jsonSlurper.parseText(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readTaggedYaml.nf' +// Custom constructor to modify how certain objects are parsed from YAML +class CustomConstructor extends org.yaml.snakeyaml.constructor.Constructor { + Path root + + class ConstructPath extends org.yaml.snakeyaml.constructor.AbstractConstruct { + public Object construct(org.yaml.snakeyaml.nodes.Node node) { + String filename = (String) constructScalar(node); + if (root != null) { + return root.resolve(filename); + } + return java.nio.file.Paths.get(filename); + } + } + + CustomConstructor(org.yaml.snakeyaml.LoaderOptions options, Path root) { + super(options) + this.root = root + // Handling !file tag and parse it back to a File type + this.yamlConstructors.put(new org.yaml.snakeyaml.nodes.Tag("!file"), new ConstructPath()) + } +} + +def readTaggedYaml(Path path) { + def options = new org.yaml.snakeyaml.LoaderOptions() + def constructor = new CustomConstructor(options, path.getParent()) + def yaml = new org.yaml.snakeyaml.Yaml(constructor) + return yaml.load(path.text) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYaml.nf' +def readYaml(file_path) { + def inputFile = file_path !instanceof Path ? file(file_path, hidden: true) : file_path + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(inputFile) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/readYamlBlob.nf' +def readYamlBlob(str) { + def yamlSlurper = new org.yaml.snakeyaml.Yaml() + yamlSlurper.load(str) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toJsonBlob.nf' +String toJsonBlob(data) { + return groovy.json.JsonOutput.toJson(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toTaggedYamlBlob.nf' +// Custom representer to modify how certain objects are represented in YAML +class CustomRepresenter extends org.yaml.snakeyaml.representer.Representer { + Path relativizer + + class RepresentPath implements org.yaml.snakeyaml.representer.Represent { + public String getFileName(Object obj) { + if (obj instanceof File) { + obj = ((File) obj).toPath(); + } + if (obj !instanceof Path) { + throw new IllegalArgumentException("Object: " + obj + " is not a Path or File"); + } + def path = (Path) obj; + + if (relativizer != null) { + return relativizer.relativize(path).toString() + } else { + return path.toString() + } + } + + public org.yaml.snakeyaml.nodes.Node representData(Object data) { + String filename = getFileName(data); + def tag = new org.yaml.snakeyaml.nodes.Tag("!file"); + return representScalar(tag, filename); + } + } + CustomRepresenter(org.yaml.snakeyaml.DumperOptions options, Path relativizer) { + super(options) + this.relativizer = relativizer + this.representers.put(sun.nio.fs.UnixPath, new RepresentPath()) + this.representers.put(Path, new RepresentPath()) + this.representers.put(File, new RepresentPath()) + } +} + +String toTaggedYamlBlob(data) { + return toRelativeTaggedYamlBlob(data, null) +} +String toRelativeTaggedYamlBlob(data, Path relativizer) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + def representer = new CustomRepresenter(options, relativizer) + def yaml = new org.yaml.snakeyaml.Yaml(representer, options) + return yaml.dump(data) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/toYamlBlob.nf' +String toYamlBlob(data) { + def options = new org.yaml.snakeyaml.DumperOptions() + options.setDefaultFlowStyle(org.yaml.snakeyaml.DumperOptions.FlowStyle.BLOCK) + options.setPrettyFlow(true) + def yaml = new org.yaml.snakeyaml.Yaml(options) + def cleanData = iterateMap(data, { it instanceof Path ? it.toString() : it }) + return yaml.dump(cleanData) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeJson.nf' +void writeJson(data, file) { + assert data: "writeJson: data should not be null" + assert file: "writeJson: file should not be null" + file.write(toJsonBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/readwrite/writeYaml.nf' +void writeYaml(data, file) { + assert data: "writeYaml: data should not be null" + assert file: "writeYaml: file should not be null" + file.write(toYamlBlob(data)) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/findStates.nf' +def findStates(Map params, Map config) { + def auto_config = deepClone(config) + def auto_params = deepClone(params) + + auto_config = auto_config.clone() + // override arguments + auto_config.argument_groups = [] + auto_config.arguments = [ + [ + type: "string", + name: "--id", + description: "A dummy identifier", + required: false + ], + [ + type: "file", + name: "--input_states", + example: "/path/to/input/directory/**/state.yaml", + description: "Path to input directory containing the datasets to be integrated.", + required: true, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--filter", + example: "foo/.*/state.yaml", + description: "Regex to filter state files by path.", + required: false + ], + // to do: make this a yaml blob? + [ + type: "string", + name: "--rename_keys", + example: ["newKey1:oldKey1", "newKey2:oldKey2"], + description: "Rename keys in the detected input files. This is useful if the input files do not match the set of input arguments of the workflow.", + required: false, + multiple: true, + multiple_sep: ";" + ], + [ + type: "string", + name: "--settings", + example: '{"output_dataset": "dataset.h5ad", "k": 10}', + description: "Global arguments as a JSON glob to be passed to all components.", + required: false + ] + ] + if (!(auto_params.containsKey("id"))) { + auto_params["id"] = "auto" + } + + // run auto config through processConfig once more + auto_config = processConfig(auto_config) + + workflow findStatesWf { + helpMessage(auto_config) + + output_ch = + channelFromParams(auto_params, auto_config) + | flatMap { autoId, args -> + + def globalSettings = args.settings ? readYamlBlob(args.settings) : [:] + + // look for state files in input dir + def stateFiles = args.input_states + + // filter state files by regex + if (args.filter) { + stateFiles = stateFiles.findAll{ stateFile -> + def stateFileStr = stateFile.toString() + def matcher = stateFileStr =~ args.filter + matcher.matches()} + } + + // read in states + def states = stateFiles.collect { stateFile -> + def state_ = readTaggedYaml(stateFile) + [state_.id, state_] + } + + // construct renameMap + if (args.rename_keys) { + def renameMap = args.rename_keys.collectEntries{renameString -> + def split = renameString.split(":") + assert split.size() == 2: "Argument 'rename_keys' should be of the form 'newKey:oldKey', or 'newKey:oldKey;newKey:oldKey' in case of multiple values" + split + } + + // rename keys in state, only let states through which have all keys + // also add global settings + states = states.collectMany{id, state -> + def newState = [:] + + for (key in renameMap.keySet()) { + def origKey = renameMap[key] + if (!(state.containsKey(origKey))) { + return [] + } + newState[key] = state[origKey] + } + + [[id, globalSettings + newState]] + } + } + + states + } + emit: + output_ch + } + + return findStatesWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/joinStates.nf' +def joinStates(Closure apply_) { + workflow joinStatesWf { + take: input_ch + main: + output_ch = input_ch + | toSortedList + | filter{ it.size() > 0 } + | map{ tups -> + def ids = tups.collect{it[0]} + def states = tups.collect{it[1]} + apply_(ids, states) + } + + emit: output_ch + } + return joinStatesWf +} +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf' +def publishFiles(Map args) { + def key_ = args.get("key") + + assert key_ != null : "publishFiles: key must be specified" + + workflow publishFilesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + def inputFiles_ = inputoutputFilenames_[0] + def outputFilenames_ = inputoutputFilenames_[1] + + [id_, inputFiles_, outputFilenames_] + } + | publishFilesProc + emit: input_ch + } + return publishFilesWf +} + +process publishFilesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles) + output: + tuple val(id), path{outputFiles} + script: + def copyCommands = [ + inputFiles instanceof List ? inputFiles : [inputFiles], + outputFiles instanceof List ? outputFiles : [outputFiles] + ] + .transpose() + .collectMany{infile, outfile -> + if (infile.toString() != outfile.toString()) { + [ + "[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"", + "cp -r '${infile.toString()}' '${outfile.toString()}'" + ] + } else { + // no need to copy if infile is the same as outfile + [] + } + } + """ + echo "Copying output files to destination folder" + ${copyCommands.join("\n ")} + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishFilesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishFilesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishFilesByConfig: key must be specified" + + workflow publishFilesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + + // the processed state is a list of [key, value, inputPath, outputFilename] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - inputPath is a List[Path] + // - outputFilename is a List[String] + // - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml) + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output OR multiple channels were emitted + // and the output was just not added to using the channel + // that is now being parsed + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[inputPath: [], outputFilename: []]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def inputPath = val instanceof File ? val.toPath() : val + [inputPath: inputPath, outputFilename: filename_ix] + } + def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key -> + [key, outputPerFile.collect{dic -> dic[key]}] + } + return [[key: plainName_] + transposedOutputs] + } else { + def value_ = java.nio.file.Paths.get(filename) + def inputPath = value instanceof File ? value.toPath() : value + return [[inputPath: [inputPath], outputFilename: [filename]]] + } + } + + def inputPaths = processedState.collectMany{it.inputPath} + def outputFilenames = processedState.collectMany{it.outputFilename} + + + [id_, inputPaths, outputFilenames] + } + | publishFilesProc + emit: input_ch + } + return publishFilesSimpleWf +} + + + + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/publishStates.nf' +def collectFiles(obj) { + if (obj instanceof java.io.File || obj instanceof Path) { + return [obj] + } else if (obj instanceof List && obj !instanceof String) { + return obj.collectMany{item -> + collectFiles(item) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectFiles(item) + } + } else { + return [] + } +} + +/** + * Recurse through a state and collect all input files and their target output filenames. + * @param obj The state to recurse through. + * @param prefix The prefix to prepend to the output filenames. + */ +def collectInputOutputPaths(obj, prefix) { + if (obj instanceof File || obj instanceof Path) { + def path = obj instanceof Path ? obj : obj.toPath() + def ext = path.getFileName().toString().find("\\.[^\\.]+\$") ?: "" + def newFilename = prefix + ext + return [[obj, newFilename]] + } else if (obj instanceof List && obj !instanceof String) { + return obj.withIndex().collectMany{item, ix -> + collectInputOutputPaths(item, prefix + "_" + ix) + } + } else if (obj instanceof Map) { + return obj.collectMany{key, item -> + collectInputOutputPaths(item, prefix + "." + key) + } + } else { + return [] + } +} + +def publishStates(Map args) { + def key_ = args.get("key") + def yamlTemplate_ = args.get("output_state", args.get("outputState", '$id.$key.state.yaml')) + + assert key_ != null : "publishStates: key must be specified" + + workflow publishStatesWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] + + // the input files and the target output filenames + def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose() + + def yamlFilename = yamlTemplate_ + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + + // TODO: do the pathnames in state_ match up with the outputFilenames_? + + // convert state to yaml blob + def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename)) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesWf +} +process publishStatesProc { + // todo: check publishpath? + publishDir path: "${getPublishDir()}/", mode: "copy" + tag "$id" + input: + tuple val(id), val(yamlBlob), val(yamlFile) + output: + tuple val(id), path{[yamlFile]} + script: + """ + mkdir -p "\$(dirname '${yamlFile}')" + echo "Storing state as yaml" + cat > '${yamlFile}' << HERE +${yamlBlob} +HERE + """ +} + + +// this assumes that the state contains no other values other than those specified in the config +def publishStatesByConfig(Map args) { + def config = args.get("config") + assert config != null : "publishStatesByConfig: config must be specified" + + def key_ = args.get("key", config.name) + assert key_ != null : "publishStatesByConfig: key must be specified" + + workflow publishStatesSimpleWf { + take: input_ch + main: + input_ch + | map { tup -> + def id_ = tup[0] + def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10] + def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad'] + + // TODO: allow overriding the state.yaml template + // TODO TODO: if auto.publish == "state", add output_state as an argument + def yamlTemplate = params.containsKey("output_state") ? params.output_state : '$id.$key.state.yaml' + def yamlFilename = yamlTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent() + + // the processed state is a list of [key, value] tuples, where + // - key is a String + // - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path) + // - (key, value) are the tuples that will be saved to the state.yaml file + def processedState = + config.allArguments + .findAll { it.direction == "output" } + .collectMany { par -> + def plainName_ = par.plainName + // if the state does not contain the key, it's an + // optional argument for which the component did + // not generate any output + if (!state_.containsKey(plainName_)) { + return [] + } + def value = state_[plainName_] + // if the parameter is not a file, it should be stored + // in the state as-is, but is not something that needs + // to be copied from the source path to the dest path + if (par.type != "file") { + return [[key: plainName_, value: value]] + } + // if the orig state does not contain this filename, + // it's an optional argument for which the user specified + // that it should not be returned as a state + if (!origState_.containsKey(plainName_)) { + return [] + } + def filenameTemplate = origState_[plainName_] + // if the pararameter is multiple: true, fetch the template + if (par.multiple && filenameTemplate instanceof List) { + filenameTemplate = filenameTemplate[0] + } + // instantiate the template + def filename = filenameTemplate + .replaceAll('\\$id', id_) + .replaceAll('\\$\\{id\\}', id_) + .replaceAll('\\$key', key_) + .replaceAll('\\$\\{key\\}', key_) + if (par.multiple) { + // if the parameter is multiple: true, the filename + // should contain a wildcard '*' that is replaced with + // the index of the file + assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}" + def outputPerFile = value.withIndex().collect{ val, ix -> + def filename_ix = filename.replace("*", ix.toString()) + def value_ = java.nio.file.Paths.get(filename_ix) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + return value_ + } + return [["key": plainName_, "value": outputPerFile]] + } else { + def value_ = java.nio.file.Paths.get(filename) + // if id contains a slash + if (yamlDir != null) { + value_ = yamlDir.relativize(value_) + } + def inputPath = value instanceof File ? value.toPath() : value + return [["key": plainName_, value: value_]] + } + } + + + def updatedState_ = processedState.collectEntries{[it.key, it.value]} + + // convert state to yaml blob + def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_) + + [id_, yamlBlob_, yamlFilename] + } + | publishStatesProc + emit: input_ch + } + return publishStatesSimpleWf +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/states/setState.nf' +def setState(fun) { + assert fun instanceof Closure || fun instanceof Map || fun instanceof List : + "Error in setState: Expected process argument to be a Closure, a Map, or a List. Found: class ${fun.getClass()}" + + // if fun is a List, convert to map + if (fun instanceof List) { + // check whether fun is a list[string] + assert fun.every{it instanceof CharSequence} : "Error in setState: argument is a List, but not all elements are Strings" + fun = fun.collectEntries{[it, it]} + } + + // if fun is a map, convert to closure + if (fun instanceof Map) { + // check whether fun is a map[string, string] + assert fun.values().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all values are Strings" + assert fun.keySet().every{it instanceof CharSequence} : "Error in setState: argument is a Map, but not all keys are Strings" + def funMap = fun.clone() + // turn the map into a closure to be used later on + fun = { id_, state_ -> + assert state_ instanceof Map : "Error in setState: the state is not a Map" + funMap.collectMany{newkey, origkey -> + if (state_.containsKey(origkey)) { + [[newkey, state_[origkey]]] + } else { + [] + } + }.collectEntries() + } + } + + map { tup -> + def id = tup[0] + def state = tup[1] + def unfilteredState = fun(id, state) + def newState = unfilteredState.findAll{key, val -> val != null} + [id, newState] + tup.drop(2) + } +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processAuto.nf' +// TODO: unit test processAuto +def processAuto(Map auto) { + // remove null values + auto = auto.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = ["simplifyInput", "simplifyOutput", "transcript", "publish"] + def unexpectedKeys = auto.keySet() - expectedKeys + assert unexpectedKeys.isEmpty(), "unexpected keys in auto: '${unexpectedKeys.join("', '")}'" + + // check auto.simplifyInput + assert auto.simplifyInput instanceof Boolean, "auto.simplifyInput must be a boolean" + + // check auto.simplifyOutput + assert auto.simplifyOutput instanceof Boolean, "auto.simplifyOutput must be a boolean" + + // check auto.transcript + assert auto.transcript instanceof Boolean, "auto.transcript must be a boolean" + + // check auto.publish + assert auto.publish instanceof Boolean || auto.publish == "state", "auto.publish must be a boolean or 'state'" + + return auto.subMap(expectedKeys) +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processDirectives.nf' +def assertMapKeys(map, expectedKeys, requiredKeys, mapName) { + assert map instanceof Map : "Expected argument '$mapName' to be a Map. Found: class ${map.getClass()}" + map.forEach { key, val -> + assert key in expectedKeys : "Unexpected key '$key' in ${mapName ? mapName + " " : ""}map" + } + requiredKeys.forEach { requiredKey -> + assert map.containsKey(requiredKey) : "Missing required key '$key' in ${mapName ? mapName + " " : ""}map" + } +} + +// TODO: unit test processDirectives +def processDirectives(Map drctv) { + // remove null values + drctv = drctv.findAll{k, v -> v != null} + + // check for unexpected keys + def expectedKeys = [ + "accelerator", "afterScript", "beforeScript", "cache", "conda", "container", "containerOptions", "cpus", "disk", "echo", "errorStrategy", "executor", "machineType", "maxErrors", "maxForks", "maxRetries", "memory", "module", "penv", "pod", "publishDir", "queue", "label", "scratch", "storeDir", "stageInMode", "stageOutMode", "tag", "time" + ] + def unexpectedKeys = drctv.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Unexpected keys in process directive: '${unexpectedKeys.join("', '")}'" + + /* DIRECTIVE accelerator + accepted examples: + - [ limit: 4, type: "nvidia-tesla-k80" ] + */ + if (drctv.containsKey("accelerator")) { + assertMapKeys(drctv["accelerator"], ["type", "limit", "request", "runtime"], [], "accelerator") + } + + /* DIRECTIVE afterScript + accepted examples: + - "source /cluster/bin/cleanup" + */ + if (drctv.containsKey("afterScript")) { + assert drctv["afterScript"] instanceof CharSequence + } + + /* DIRECTIVE beforeScript + accepted examples: + - "source /cluster/bin/setup" + */ + if (drctv.containsKey("beforeScript")) { + assert drctv["beforeScript"] instanceof CharSequence + } + + /* DIRECTIVE cache + accepted examples: + - true + - false + - "deep" + - "lenient" + */ + if (drctv.containsKey("cache")) { + assert drctv["cache"] instanceof CharSequence || drctv["cache"] instanceof Boolean + if (drctv["cache"] instanceof CharSequence) { + assert drctv["cache"] in ["deep", "lenient"] : "Unexpected value for cache" + } + } + + /* DIRECTIVE conda + accepted examples: + - "bwa=0.7.15" + - "bwa=0.7.15 fastqc=0.11.5" + - ["bwa=0.7.15", "fastqc=0.11.5"] + */ + if (drctv.containsKey("conda")) { + if (drctv["conda"] instanceof List) { + drctv["conda"] = drctv["conda"].join(" ") + } + assert drctv["conda"] instanceof CharSequence + } + + /* DIRECTIVE container + accepted examples: + - "foo/bar:tag" + - [ registry: "reg", image: "im", tag: "ta" ] + is transformed to "reg/im:ta" + - [ image: "im" ] + is transformed to "im:latest" + */ + if (drctv.containsKey("container")) { + assert drctv["container"] instanceof Map || drctv["container"] instanceof CharSequence + if (drctv["container"] instanceof Map) { + def m = drctv["container"] + assertMapKeys(m, [ "registry", "image", "tag" ], ["image"], "container") + def part1 = + System.getenv('OVERRIDE_CONTAINER_REGISTRY') ? System.getenv('OVERRIDE_CONTAINER_REGISTRY') + "/" : + params.containsKey("override_container_registry") ? params["override_container_registry"] + "/" : // todo: remove? + m.registry ? m.registry + "/" : + "" + def part2 = m.image + def part3 = m.tag ? ":" + m.tag : ":latest" + drctv["container"] = part1 + part2 + part3 + } + } + + /* DIRECTIVE containerOptions + accepted examples: + - "--foo bar" + - ["--foo bar", "-f b"] + */ + if (drctv.containsKey("containerOptions")) { + if (drctv["containerOptions"] instanceof List) { + drctv["containerOptions"] = drctv["containerOptions"].join(" ") + } + assert drctv["containerOptions"] instanceof CharSequence + } + + /* DIRECTIVE cpus + accepted examples: + - 1 + - 10 + */ + if (drctv.containsKey("cpus")) { + assert drctv["cpus"] instanceof Integer + } + + /* DIRECTIVE disk + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("disk")) { + assert drctv["disk"] instanceof CharSequence + // assert drctv["disk"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE echo + accepted examples: + - true + - false + */ + if (drctv.containsKey("echo")) { + assert drctv["echo"] instanceof Boolean + } + + /* DIRECTIVE errorStrategy + accepted examples: + - "terminate" + - "finish" + */ + if (drctv.containsKey("errorStrategy")) { + assert drctv["errorStrategy"] instanceof CharSequence + assert drctv["errorStrategy"] in ["terminate", "finish", "ignore", "retry"] : "Unexpected value for errorStrategy" + } + + /* DIRECTIVE executor + accepted examples: + - "local" + - "sge" + */ + if (drctv.containsKey("executor")) { + assert drctv["executor"] instanceof CharSequence + assert drctv["executor"] in ["local", "sge", "uge", "lsf", "slurm", "pbs", "pbspro", "moab", "condor", "nqsii", "ignite", "k8s", "awsbatch", "google-pipelines"] : "Unexpected value for executor" + } + + /* DIRECTIVE machineType + accepted examples: + - "n1-highmem-8" + */ + if (drctv.containsKey("machineType")) { + assert drctv["machineType"] instanceof CharSequence + } + + /* DIRECTIVE maxErrors + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxErrors")) { + assert drctv["maxErrors"] instanceof Integer + } + + /* DIRECTIVE maxForks + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxForks")) { + assert drctv["maxForks"] instanceof Integer + } + + /* DIRECTIVE maxRetries + accepted examples: + - 1 + - 3 + */ + if (drctv.containsKey("maxRetries")) { + assert drctv["maxRetries"] instanceof Integer + } + + /* DIRECTIVE memory + accepted examples: + - "1 GB" + - "2TB" + - "3.2KB" + - "10.B" + */ + if (drctv.containsKey("memory")) { + assert drctv["memory"] instanceof CharSequence + // assert drctv["memory"].matches("[0-9]+(\\.[0-9]*)? *[KMGTPEZY]?B") + // ^ does not allow closures + } + + /* DIRECTIVE module + accepted examples: + - "ncbi-blast/2.2.27" + - "ncbi-blast/2.2.27:t_coffee/10.0" + - ["ncbi-blast/2.2.27", "t_coffee/10.0"] + */ + if (drctv.containsKey("module")) { + if (drctv["module"] instanceof List) { + drctv["module"] = drctv["module"].join(":") + } + assert drctv["module"] instanceof CharSequence + } + + /* DIRECTIVE penv + accepted examples: + - "smp" + */ + if (drctv.containsKey("penv")) { + assert drctv["penv"] instanceof CharSequence + } + + /* DIRECTIVE pod + accepted examples: + - [ label: "key", value: "val" ] + - [ annotation: "key", value: "val" ] + - [ env: "key", value: "val" ] + - [ [label: "l", value: "v"], [env: "e", value: "v"]] + */ + if (drctv.containsKey("pod")) { + if (drctv["pod"] instanceof Map) { + drctv["pod"] = [ drctv["pod"] ] + } + assert drctv["pod"] instanceof List + drctv["pod"].forEach { pod -> + assert pod instanceof Map + // TODO: should more checks be added? + // See https://www.nextflow.io/docs/latest/process.html?highlight=directives#pod + // e.g. does it contain 'label' and 'value', or 'annotation' and 'value', or ...? + } + } + + /* DIRECTIVE publishDir + accepted examples: + - [] + - [ [ path: "foo", enabled: true ], [ path: "bar", enabled: false ] ] + - "/path/to/dir" + is transformed to [[ path: "/path/to/dir" ]] + - [ path: "/path/to/dir", mode: "cache" ] + is transformed to [[ path: "/path/to/dir", mode: "cache" ]] + */ + // TODO: should we also look at params["publishDir"]? + if (drctv.containsKey("publishDir")) { + def pblsh = drctv["publishDir"] + + // check different options + assert pblsh instanceof List || pblsh instanceof Map || pblsh instanceof CharSequence + + // turn into list if not already so + // for some reason, 'if (!pblsh instanceof List) pblsh = [ pblsh ]' doesn't work. + pblsh = pblsh instanceof List ? pblsh : [ pblsh ] + + // check elements of publishDir + pblsh = pblsh.collect{ elem -> + // turn into map if not already so + elem = elem instanceof CharSequence ? [ path: elem ] : elem + + // check types and keys + assert elem instanceof Map : "Expected publish argument '$elem' to be a String or a Map. Found: class ${elem.getClass()}" + assertMapKeys(elem, [ "path", "mode", "overwrite", "pattern", "saveAs", "enabled" ], ["path"], "publishDir") + + // check elements in map + assert elem.containsKey("path") + assert elem["path"] instanceof CharSequence + if (elem.containsKey("mode")) { + assert elem["mode"] instanceof CharSequence + assert elem["mode"] in [ "symlink", "rellink", "link", "copy", "copyNoFollow", "move" ] + } + if (elem.containsKey("overwrite")) { + assert elem["overwrite"] instanceof Boolean + } + if (elem.containsKey("pattern")) { + assert elem["pattern"] instanceof CharSequence + } + if (elem.containsKey("saveAs")) { + assert elem["saveAs"] instanceof CharSequence //: "saveAs as a Closure is currently not supported. Surround your closure with single quotes to get the desired effect. Example: '\{ foo \}'" + } + if (elem.containsKey("enabled")) { + assert elem["enabled"] instanceof Boolean + } + + // return final result + elem + } + // store final directive + drctv["publishDir"] = pblsh + } + + /* DIRECTIVE queue + accepted examples: + - "long" + - "short,long" + - ["short", "long"] + */ + if (drctv.containsKey("queue")) { + if (drctv["queue"] instanceof List) { + drctv["queue"] = drctv["queue"].join(",") + } + assert drctv["queue"] instanceof CharSequence + } + + /* DIRECTIVE label + accepted examples: + - "big_mem" + - "big_cpu" + - ["big_mem", "big_cpu"] + */ + if (drctv.containsKey("label")) { + if (drctv["label"] instanceof CharSequence) { + drctv["label"] = [ drctv["label"] ] + } + assert drctv["label"] instanceof List + drctv["label"].forEach { label -> + assert label instanceof CharSequence + // assert label.matches("[a-zA-Z0-9]([a-zA-Z0-9_]*[a-zA-Z0-9])?") + // ^ does not allow closures + } + } + + /* DIRECTIVE scratch + accepted examples: + - true + - "/path/to/scratch" + - '$MY_PATH_TO_SCRATCH' + - "ram-disk" + */ + if (drctv.containsKey("scratch")) { + assert drctv["scratch"] == true || drctv["scratch"] instanceof CharSequence + } + + /* DIRECTIVE storeDir + accepted examples: + - "/path/to/storeDir" + */ + if (drctv.containsKey("storeDir")) { + assert drctv["storeDir"] instanceof CharSequence + } + + /* DIRECTIVE stageInMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageInMode")) { + assert drctv["stageInMode"] instanceof CharSequence + assert drctv["stageInMode"] in ["copy", "link", "symlink", "rellink"] + } + + /* DIRECTIVE stageOutMode + accepted examples: + - "copy" + - "link" + */ + if (drctv.containsKey("stageOutMode")) { + assert drctv["stageOutMode"] instanceof CharSequence + assert drctv["stageOutMode"] in ["copy", "move", "rsync"] + } + + /* DIRECTIVE tag + accepted examples: + - "foo" + - '$id' + */ + if (drctv.containsKey("tag")) { + assert drctv["tag"] instanceof CharSequence + } + + /* DIRECTIVE time + accepted examples: + - "1h" + - "2days" + - "1day 6hours 3minutes 30seconds" + */ + if (drctv.containsKey("time")) { + assert drctv["time"] instanceof CharSequence + // todo: validation regex? + } + + return drctv +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/processWorkflowArgs.nf' +def processWorkflowArgs(Map args, Map defaultWfArgs, Map meta) { + // override defaults with args + def workflowArgs = defaultWfArgs + args + + // check whether 'key' exists + assert workflowArgs.containsKey("key") : "Error in module '${meta.config.name}': key is a required argument" + + // if 'key' is a closure, apply it to the original key + if (workflowArgs["key"] instanceof Closure) { + workflowArgs["key"] = workflowArgs["key"](meta.config.name) + } + def key = workflowArgs["key"] + assert key instanceof CharSequence : "Expected process argument 'key' to be a String. Found: class ${key.getClass()}" + assert key ==~ /^[a-zA-Z_]\w*$/ : "Error in module '$key': Expected process argument 'key' to consist of only letters, digits or underscores. Found: ${key}" + + // check for any unexpected keys + def expectedKeys = ["key", "directives", "auto", "map", "mapId", "mapData", "mapPassthrough", "filter", "runIf", "fromState", "toState", "args", "renameKeys", "debug"] + def unexpectedKeys = workflowArgs.keySet() - expectedKeys + assert unexpectedKeys.isEmpty() : "Error in module '$key': unexpected arguments to the '.run()' function: '${unexpectedKeys.join("', '")}'" + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("directives") : "Error in module '$key': directives is a required argument" + assert workflowArgs["directives"] instanceof Map : "Error in module '$key': Expected process argument 'directives' to be a Map. Found: class ${workflowArgs['directives'].getClass()}" + workflowArgs["directives"] = processDirectives(defaultWfArgs.directives + workflowArgs["directives"]) + + // check whether directives exists and apply defaults + assert workflowArgs.containsKey("auto") : "Error in module '$key': auto is a required argument" + assert workflowArgs["auto"] instanceof Map : "Error in module '$key': Expected process argument 'auto' to be a Map. Found: class ${workflowArgs['auto'].getClass()}" + workflowArgs["auto"] = processAuto(defaultWfArgs.auto + workflowArgs["auto"]) + + // auto define publish, if so desired + if (workflowArgs.auto.publish == true && (workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : [:]).isEmpty()) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.publish is true, params.publish_dir needs to be defined.\n" + + // " Example: params.publish_dir = \"./output/\"" + def publishDir = getPublishDir() + + if (publishDir != null) { + workflowArgs.directives.publishDir = [[ + path: publishDir, + saveAs: "{ it.startsWith('.') ? null : it }", // don't publish hidden files, by default + mode: "copy" + ]] + } + } + + // auto define transcript, if so desired + if (workflowArgs.auto.transcript == true) { + // can't assert at this level thanks to the no_publish profile + // assert params.containsKey("transcriptsDir") || params.containsKey("transcripts_dir") || params.containsKey("publishDir") || params.containsKey("publish_dir") : + // "Error in module '${workflowArgs['key']}': if auto.transcript is true, either params.transcripts_dir or params.publish_dir needs to be defined.\n" + + // " Example: params.transcripts_dir = \"./transcripts/\"" + def transcriptsDir = + params.containsKey("transcripts_dir") ? params.transcripts_dir : + params.containsKey("transcriptsDir") ? params.transcriptsDir : + params.containsKey("publish_dir") ? params.publish_dir + "/_transcripts" : + params.containsKey("publishDir") ? params.publishDir + "/_transcripts" : + null + if (transcriptsDir != null) { + def timestamp = nextflow.Nextflow.getSession().getWorkflowMetadata().start.format('yyyy-MM-dd_HH-mm-ss') + def transcriptsPublishDir = [ + path: "$transcriptsDir/$timestamp/\${task.process.replaceAll(':', '-')}/\${id}/", + saveAs: "{ it.startsWith('.') ? it.replaceAll('^.', '') : null }", + mode: "copy" + ] + def publishDirs = workflowArgs.directives.publishDir != null ? workflowArgs.directives.publishDir : null ? workflowArgs.directives.publishDir : [] + workflowArgs.directives.publishDir = publishDirs + transcriptsPublishDir + } + } + + // if this is a stubrun, remove certain directives? + if (workflow.stubRun) { + workflowArgs.directives.keySet().removeAll(["publishDir", "cpus", "memory", "label"]) + } + + for (nam in ["map", "mapId", "mapData", "mapPassthrough", "filter", "runIf"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam]) { + assert workflowArgs[nam] instanceof Closure : "Error in module '$key': Expected process argument '$nam' to be null or a Closure. Found: class ${workflowArgs[nam].getClass()}" + } + } + + // TODO: should functions like 'map', 'mapId', 'mapData', 'mapPassthrough' be deprecated as well? + for (nam in ["map", "mapData", "mapPassthrough", "renameKeys"]) { + if (workflowArgs.containsKey(nam) && workflowArgs[nam] != null) { + log.warn "module '$key': workflow argument '$nam' is deprecated and will be removed in Viash 0.9.0. Please use 'fromState' and 'toState' instead." + } + } + + // check fromState + workflowArgs["fromState"] = _processFromState(workflowArgs.get("fromState"), key, meta.config) + + // check toState + workflowArgs["toState"] = _processToState(workflowArgs.get("toState"), key, meta.config) + + // return output + return workflowArgs +} + +def _processFromState(fromState, key_, config_) { + assert fromState == null || fromState instanceof Closure || fromState instanceof Map || fromState instanceof List : + "Error in module '$key_': Expected process argument 'fromState' to be null, a Closure, a Map, or a List. Found: class ${fromState.getClass()}" + if (fromState == null) { + return null + } + + // if fromState is a List, convert to map + if (fromState instanceof List) { + // check whether fromstate is a list[string] + assert fromState.every{it instanceof CharSequence} : "Error in module '$key_': fromState is a List, but not all elements are Strings" + fromState = fromState.collectEntries{[it, it]} + } + + // if fromState is a map, convert to closure + if (fromState instanceof Map) { + // check whether fromstate is a map[string, string] + assert fromState.values().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all values are Strings" + assert fromState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': fromState is a Map, but not all keys are Strings" + def fromStateMap = fromState.clone() + def requiredInputNames = meta.config.allArguments.findAll{it.required && it.direction == "Input"}.collect{it.plainName} + // turn the map into a closure to be used later on + fromState = { it -> + def state = it[1] + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def data = fromStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (state.containsKey(origkey)) { + [[newkey, state[origkey]]] + } else if (!requiredInputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': fromState key '$origkey' not found in current state") + } + }.collectEntries() + data + } + } + + return fromState +} + +def _processToState(toState, key_, config_) { + if (toState == null) { + toState = { tup -> tup[1] } + } + + // toState should be a closure, map[string, string], or list[string] + assert toState instanceof Closure || toState instanceof Map || toState instanceof List : + "Error in module '$key_': Expected process argument 'toState' to be a Closure, a Map, or a List. Found: class ${toState.getClass()}" + + // if toState is a List, convert to map + if (toState instanceof List) { + // check whether toState is a list[string] + assert toState.every{it instanceof CharSequence} : "Error in module '$key_': toState is a List, but not all elements are Strings" + toState = toState.collectEntries{[it, it]} + } + + // if toState is a map, convert to closure + if (toState instanceof Map) { + // check whether toState is a map[string, string] + assert toState.values().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all values are Strings" + assert toState.keySet().every{it instanceof CharSequence} : "Error in module '$key_': toState is a Map, but not all keys are Strings" + def toStateMap = toState.clone() + def requiredOutputNames = config_.allArguments.findAll{it.required && it.direction == "Output"}.collect{it.plainName} + // turn the map into a closure to be used later on + toState = { it -> + def output = it[1] + def state = it[2] + assert output instanceof Map : "Error in module '$key_': the output is not a Map" + assert state instanceof Map : "Error in module '$key_': the state is not a Map" + def extraEntries = toStateMap.collectMany{newkey, origkey -> + // check whether newkey corresponds to a required argument + if (output.containsKey(origkey)) { + [[newkey, output[origkey]]] + } else if (!requiredOutputNames.contains(origkey)) { + [] + } else { + throw new Exception("Error in module '$key_': toState key '$origkey' not found in current output") + } + }.collectEntries() + state + extraEntries + } + } + + return toState +} + +// helper file: 'src/main/resources/io/viash/runners/nextflow/workflowFactory/workflowFactory.nf' +def _debug(workflowArgs, debugKey) { + if (workflowArgs.debug) { + view { "process '${workflowArgs.key}' $debugKey tuple: $it" } + } else { + map { it } + } +} + +// depends on: innerWorkflowFactory +def workflowFactory(Map args, Map defaultWfArgs, Map meta) { + def workflowArgs = processWorkflowArgs(args, defaultWfArgs, meta) + def key_ = workflowArgs["key"] + def multipleArgs = meta.config.allArguments.findAll{ it.multiple }.collect{it.plainName} + + workflow workflowInstance { + take: input_ + + main: + def chModified = input_ + | checkUniqueIds([:]) + | _debug(workflowArgs, "input") + | map { tuple -> + tuple = deepClone(tuple) + + if (workflowArgs.map) { + tuple = workflowArgs.map(tuple) + } + if (workflowArgs.mapId) { + tuple[0] = workflowArgs.mapId(tuple[0]) + } + if (workflowArgs.mapData) { + tuple[1] = workflowArgs.mapData(tuple[1]) + } + if (workflowArgs.mapPassthrough) { + tuple = tuple.take(2) + workflowArgs.mapPassthrough(tuple.drop(2)) + } + + // check tuple + assert tuple instanceof List : + "Error in module '${key_}': element in channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + assert tuple.size() >= 2 : + "Error in module '${key_}': expected length of tuple in input channel to be two or greater.\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: tuple.size() == ${tuple.size()}" + + // check id field + if (tuple[0] instanceof GString) { + tuple[0] = tuple[0].toString() + } + assert tuple[0] instanceof CharSequence : + "Error in module '${key_}': first element of tuple in channel should be a String\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Found: ${tuple[0]}" + + // match file to input file + if (workflowArgs.auto.simplifyInput && (tuple[1] instanceof Path || tuple[1] instanceof List)) { + def inputFiles = meta.config.allArguments + .findAll { it.type == "file" && it.direction == "input" } + + assert inputFiles.size() == 1 : + "Error in module '${key_}' id '${tuple[0]}'.\n" + + " Anonymous file inputs are only allowed when the process has exactly one file input.\n" + + " Expected: inputFiles.size() == 1. Found: inputFiles.size() is ${inputFiles.size()}" + + tuple[1] = [[ inputFiles[0].plainName, tuple[1] ]].collectEntries() + } + + // check data field + assert tuple[1] instanceof Map : + "Error in module '${key_}' id '${tuple[0]}': second element of tuple in channel should be a Map\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // rename keys of data field in tuple + if (workflowArgs.renameKeys) { + assert workflowArgs.renameKeys instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class: Map. Found: renameKeys.getClass() is ${workflowArgs.renameKeys.getClass()}" + assert tuple[1] instanceof Map : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Expected class: Map. Found: tuple[1].getClass() is ${tuple[1].getClass()}" + + // TODO: allow renameKeys to be a function? + workflowArgs.renameKeys.each { newKey, oldKey -> + assert newKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of newKey: String. Found: newKey.getClass() is ${newKey.getClass()}" + assert oldKey instanceof CharSequence : + "Error renaming data keys in module '${key_}' id '${tuple[0]}'.\n" + + " Example: renameKeys: ['new_key': 'old_key'].\n" + + " Expected class of oldKey: String. Found: oldKey.getClass() is ${oldKey.getClass()}" + assert tuple[1].containsKey(oldKey) : + "Error renaming data keys in module '${key}' id '${tuple[0]}'.\n" + + " Key '$oldKey' is missing in the data map. tuple[1].keySet() is '${tuple[1].keySet()}'" + tuple[1].put(newKey, tuple[1][oldKey]) + } + tuple[1].keySet().removeAll(workflowArgs.renameKeys.collect{ newKey, oldKey -> oldKey }) + } + tuple + } + + + def chRun = null + def chPassthrough = null + if (workflowArgs.runIf) { + def runIfBranch = chModified.branch{ tup -> + run: workflowArgs.runIf(tup[0], tup[1]) + passthrough: true + } + chRun = runIfBranch.run + chPassthrough = runIfBranch.passthrough + } else { + chRun = chModified + chPassthrough = Channel.empty() + } + + def chRunFiltered = workflowArgs.filter ? + chRun | filter{workflowArgs.filter(it)} : + chRun + + def chArgs = workflowArgs.fromState ? + chRunFiltered | map{ + def new_data = workflowArgs.fromState(it.take(2)) + [it[0], new_data] + } : + chRunFiltered | map {tup -> tup.take(2)} + + // fill in defaults + def chArgsWithDefaults = chArgs + | map { tuple -> + def id_ = tuple[0] + def data_ = tuple[1] + + // TODO: could move fromState to here + + // fetch default params from functionality + def defaultArgs = meta.config.allArguments + .findAll { it.containsKey("default") } + .collectEntries { [ it.plainName, it.default ] } + + // fetch overrides in params + def paramArgs = meta.config.allArguments + .findAll { par -> + def argKey = key_ + "__" + par.plainName + params.containsKey(argKey) + } + .collectEntries { [ it.plainName, params[key_ + "__" + it.plainName] ] } + + // fetch overrides in data + def dataArgs = meta.config.allArguments + .findAll { data_.containsKey(it.plainName) } + .collectEntries { [ it.plainName, data_[it.plainName] ] } + + // combine params + def combinedArgs = defaultArgs + paramArgs + workflowArgs.args + dataArgs + + // remove arguments with explicit null values + combinedArgs + .removeAll{_, val -> val == null || val == "viash_no_value" || val == "force_null"} + + combinedArgs = _processInputValues(combinedArgs, meta.config, id_, key_) + + [id_, combinedArgs] + tuple.drop(2) + } + + // TODO: move some of the _meta.join_id wrangling to the safeJoin() function. + def chInitialOutputMulti = chArgsWithDefaults + | _debug(workflowArgs, "processed") + // run workflow + | innerWorkflowFactory(workflowArgs) + def chInitialOutputList = chInitialOutputMulti instanceof List ? chInitialOutputMulti : [chInitialOutputMulti] + assert chInitialOutputList.size() > 0: "should have emitted at least one output channel" + // Add a channel ID to the events, which designates the channel the event was emitted from as a running number + // This number is used to sort the events later when the events are gathered from across the channels. + def chInitialOutputListWithIndexedEvents = chInitialOutputList.withIndex().collect{channel, channelIndex -> + def newChannel = channel + | map {tuple -> + assert tuple instanceof List : + "Error in module '${key_}': element in output channel should be a tuple [id, data, ...otherargs...]\n" + + " Example: [\"id\", [input: file('foo.txt'), arg: 10]].\n" + + " Expected class: List. Found: tuple.getClass() is ${tuple.getClass()}" + + def newEvent = [channelIndex] + tuple + return newEvent + } + return newChannel + } + // Put the events into 1 channel, cover case where there is only one channel is emitted + def chInitialOutput = chInitialOutputList.size() > 1 ? \ + chInitialOutputListWithIndexedEvents[0].mix(*chInitialOutputListWithIndexedEvents.tail()) : \ + chInitialOutputListWithIndexedEvents[0] + def chInitialOutputProcessed = chInitialOutput + | map { tuple -> + def channelId = tuple[0] + def id_ = tuple[1] + def output_ = tuple[2] + + // see if output map contains metadata + def meta_ = + output_ instanceof Map && output_.containsKey("_meta") ? + output_["_meta"] : + [:] + def join_id = meta_.join_id ?: id_ + + // remove metadata + output_ = output_.findAll{k, v -> k != "_meta"} + + // check value types + output_ = _checkValidOutputArgument(output_, meta.config, id_, key_) + + [join_id, channelId, id_, output_] + } + // | view{"chInitialOutput: ${it.take(3)}"} + + // join the output [prev_id, channel_id, new_id, output] with the previous state [prev_id, state, ...] + def chPublishWithPreviousState = safeJoin(chInitialOutputProcessed, chRunFiltered, key_) + // input tuple format: [join_id, channel_id, id, output, prev_state, ...] + // output tuple format: [join_id, channel_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(2).take(3)) + tup.take(3) + [new_state] + tup.drop(5) + } + if (workflowArgs.auto.publish == "state") { + def chPublishFiles = chPublishWithPreviousState + // input tuple format: [join_id, channel_id, id, new_state, ...] + // output tuple format: [join_id, channel_id, id, new_state] + | map{ tup -> + tup.take(4) + } + + safeJoin(chPublishFiles, chArgsWithDefaults, key_) + // input tuple format: [join_id, channel_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(2).take(3) + } + | publishFilesByConfig(key: key_, config: meta.config) + } + // Join the state from the events that were emitted from different channels + def chJoined = chInitialOutputProcessed + | map {tuple -> + def join_id = tuple[0] + def channel_id = tuple[1] + def id = tuple[2] + def other = tuple.drop(3) + // Below, groupTuple is used to join the events. To make sure resuming a workflow + // keeps working, the output state must be deterministic. This means the state needs to be + // sorted with groupTuple's has a 'sort' argument. This argument can be set to 'hash', + // but hashing the state when it is large can be problematic in terms of performance. + // Therefore, a custom comparator function is provided. We add the channel ID to the + // states so that we can use the channel ID to sort the items. + def stateWithChannelID = [[channel_id] * other.size(), other].transpose() + // A comparator that is provided to groupTuple's 'sort' argument is applied + // to all elements of the event tuple (that is not the 'id'). The comparator + // closure that is used below expects the input to be List. So the join_id and + // channel_id must also be wrapped in a list. + [[join_id], [channel_id], id] + stateWithChannelID + } + | groupTuple(by: 2, sort: {a, b -> a[0] <=> b[0]}, size: chInitialOutputList.size(), remainder: true) + | map {join_ids, _, id, statesWithChannelID -> + // Remove the channel IDs from the states + def states = statesWithChannelID.collect{it[1]} + def newJoinId = join_ids.flatten().unique{a, b -> a <=> b} + assert newJoinId.size() == 1: "Multiple events were emitted for '$id'." + def newJoinIdUnique = newJoinId[0] + + // Merge the states from the different channels + def newState = states.inject([:]){ old_state, state_to_add -> + return old_state + state_to_add.collectEntries{k, v -> + if (!multipleArgs.contains(k)) { + // if the key is not a multiple argument, we expect only one value + if (old_state.containsKey(k)) { + assert old_state[k] == v : "ID $id: multiple entries for argument $k were emitted." + } + [k, v] + } else { + // if the key is a multiple argument, append the different values into one list + def prevValue = old_state.getOrDefault(k, []) + def prevValueAsList = prevValue instanceof List ? prevValue : [prevValue] + [k, prevValueAsList + v] + } + } + } + + _checkAllRequiredOuputsPresent(newState, meta.config, id, key_) + + // simplify output if need be + if (workflowArgs.auto.simplifyOutput && newState.size() == 1) { + newState = newState.values()[0] + } + + return [newJoinIdUnique, id, newState] + } + + // join the output [prev_id, new_id, output] with the previous state [prev_id, state, ...] + def chNewState = safeJoin(chJoined, chRunFiltered, key_) + // input tuple format: [join_id, id, output, prev_state, ...] + // output tuple format: [join_id, id, new_state, ...] + | map{ tup -> + def new_state = workflowArgs.toState(tup.drop(1).take(3)) + tup.take(2) + [new_state] + tup.drop(4) + } + + if (workflowArgs.auto.publish == "state") { + def chPublishStates = chNewState + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [join_id, id, new_state] + | map{ tup -> + tup.take(3) + } + + safeJoin(chPublishStates, chArgsWithDefaults, key_) + // input tuple format: [join_id, id, new_state, orig_state, ...] + // output tuple format: [id, new_state, orig_state] + | map { tup -> + tup.drop(1).take(3) + } + | publishStatesByConfig(key: key_, config: meta.config) + } + chReturn = chNewState + | map { tup -> + // input tuple format: [join_id, id, new_state, ...] + // output tuple format: [id, new_state, ...] + tup.drop(1) + } + | _debug(workflowArgs, "output") + | concat(chPassthrough) + + emit: chReturn + } + + def wf = workflowInstance.cloneWithName(key_) + + // add factory function + wf.metaClass.run = { runArgs -> + workflowFactory(runArgs, workflowArgs, meta) + } + // add config to module for later introspection + wf.metaClass.config = meta.config + + return wf +} + +nextflow.enable.dsl=2 + +// START COMPONENT-SPECIFIC CODE + +// create meta object +meta = [ + "resources_dir": moduleDir.toRealPath().normalize(), + "config": processConfig(readJsonBlob('''{ + "name" : "nichecompass_leiden", + "namespace" : "workflows/niche", + "version" : "niche-compass", + "authors" : [ + { + "name" : "Dorien Roosen", + "roles" : [ + "author", + "maintainer" + ], + "info" : { + "role" : "Core Team Member", + "links" : { + "email" : "dorien@data-intuitive.com", + "github" : "dorien-er", + "linkedin" : "dorien-roosen" + }, + "organizations" : [ + { + "name" : "Data Intuitive", + "href" : "https://www.data-intuitive.com", + "role" : "Data Scientist" + } + ] + } + }, + { + "name" : "Weiwei Schultz", + "roles" : [ + "contributor" + ], + "info" : { + "role" : "Contributor", + "organizations" : [ + { + "name" : "Janssen R&D US", + "role" : "Associate Director Data Sciences" + } + ] + } + } + ], + "argument_groups" : [ + { + "name" : "Inputs", + "arguments" : [ + { + "type" : "string", + "name" : "--id", + "description" : "ID of the sample.", + "example" : [ + "foo" + ], + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input", + "alternatives" : [ + "-i" + ], + "description" : "Path to the sample.", + "example" : [ + "input.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--input_gp_mask", + "description" : "JSON file containing a nested dictionary containing the gene programs,\nwith keys being gene program names and values being dictionaries with keys `targets` and `sources`,\nwhere `targets` contains a list of the names of genes in the gene program for the reconstruction of the gene expression of the node itself (receiving node)\nand `sources` contains a list of the names of genes in the gene program for the reconstruction of the gene expression of the node's neighbors (transmitting nodes).\n", + "example" : [ + "prior_knowledge_gp_mask.json" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--modality", + "description" : "Which modality to process.", + "default" : [ + "rna" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--layer", + "description" : "Use specified layer for calculation of qc metrics. If not specified, adata.X is used.", + "example" : [ + "raw_counts" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--input_obs_covariates", + "description" : "Keys of the adata.obs fields to use as covariates.", + "default" : [ + "sample_id" + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--input_obsm_spatial_coords", + "description" : "Key in adata.obsm where spatial coordinates are stored", + "default" : [ + "spatial" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Sample ID options", + "description" : "Options for adding the id to .obs on the MuData object. Having a sample \nid present in a requirement of several components for this pipeline.\n", + "arguments" : [ + { + "type" : "boolean", + "name" : "--include_sample_as_covariate", + "description" : "Whether to include the sample information as a categorical covariate for the \nNicheCompass model.\n", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--add_id_to_obs", + "description" : "Add the value passed with --id to .obs.", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--add_id_obs_output", + "description" : ".Obs column to add the sample IDs to. Required and only used when \n--add_id_to_obs is set to 'true'\n", + "default" : [ + "sample_id" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--add_id_make_observation_keys_unique", + "description" : "Join the id to the .obs index (.obs_names). \nOnly used when --add_id_to_obs is set to 'true'.\n", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Spatial Neighbors Calculation", + "description" : "Options for the calculation of the spatial neighborhood graph.\n", + "arguments" : [ + { + "type" : "string", + "name" : "--coord_type", + "description" : "Type of coordinate system provided by `--input_obsm_spatial_coords`. Valid options are:\n`grid` - grid coordinates.\n`generic` - generic coordinates.\nIf not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used.\n", + "required" : false, + "choices" : [ + "generic", + "grid" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--n_spatial_neighbors", + "description" : "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n`generic` - number of neighborhoods for non-grid data. Only used when `--delaunay False`.\n", + "default" : [ + 6 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--delaunay", + "description" : "Whether to use Delaunay triangulation to determine spatial neighborhood graph.\nOnly used when `--coord_type generic`.\n", + "default" : [ + false + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Gene Program Mask", + "description" : "Options for filtering gene programs based on the number of genes available in the data.", + "arguments" : [ + { + "type" : "integer", + "name" : "--min_genes_per_gp", + "description" : "Minimum number of genes in a gene program inluding both target and source genes that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "default" : [ + 1 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_source_genes_per_gp", + "description" : "Minimum number of source genes in a gene program that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "default" : [ + 0 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--min_target_genes_per_gp", + "description" : "Minimum number of target genes in a gene program that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "default" : [ + 0 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_genes_per_gp", + "description" : "Maximum number of genes in a gene program inluding both target and source genes that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_source_genes_per_gp", + "description" : "Maximum number of source genes in a gene program that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--max_target_genes_per_gp", + "description" : "Maximum number of target genes in a gene program that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean_true", + "name" : "--filter_genes_not_in_masks", + "description" : "Whether to remove the genes that are not in the gp masks from the input data.\n", + "direction" : "input" + } + ] + }, + { + "name" : "NicheCompass Model Architecture", + "description" : "Options for the NicheCompass model architecture.", + "arguments" : [ + { + "type" : "boolean", + "name" : "--covariate_edges", + "description" : "List of booleans that indicate whether there can be edges between different categories of the categorical covariates.\nIf this is `True` for a specific categorical covariate, this covariate will be excluded from the edge reconstruction loss.\nNeeds to match the length and order of `--input_obs_covariates`.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--gene_expr_recon_dist", + "description" : "The distribution used for gene expression reconstruction. \nIf `nb`, uses a negative binomial distribution. \nIf `zinb`, uses a zero-inflated negative binomial distribution.\n", + "default" : [ + "nb" + ], + "required" : false, + "choices" : [ + "nb", + "zinb" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "boolean", + "name" : "--log_variational", + "description" : "Whether to transform x by log(x+1) prior to encoding for numerical stability (not for normalization).\n", + "default" : [ + true + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--node_label_method", + "description" : "Node label method that will be used for omics reconstruction.\nIf `one-hop-sum`, uses a concatenation of the node's input features with the sum of the input features of all nodes in the node's one-hop neighborhood.\nIf `one-hop-norm`, uses a concatenation of the node's input features with the node's one-hop neighbors input features normalized as per Kipf, T. N. & Welling, M. Semi-Supervised Classification with Graph Convolutional Networks. arXiv [cs.LG] (2016).\nIf `one-hop-attention`, uses a concatenation of the node's input features with the node's one-hop neighbors input features weighted by an attention mechanism.\n", + "default" : [ + "one-hop-norm" + ], + "required" : false, + "choices" : [ + "one-hop-norm", + "two-hop-norm", + "one-hop-attention" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--active_gp_thresh_ratio", + "description" : "Ratio that determines which gene programs are considered active and are used in the latent representation after model training.\nAll inactive gene programs will be dropped during model training after a determined number of epochs.\nAggregations of the absolute values of the gene weights of the gene expression decoder per gene program are calculated.\nThe maximum value, i.e. the value of the gene program with the highest aggregated value will be used as a benchmark and all gene programs whose aggregated value is smaller than `--active_gp_thresh_ratio` times this maximum value will be set to inactive.\nIf set to 0, all gene programs will be considered active.\n", + "default" : [ + 0.1 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--active_gp_type", + "description" : "Type to determine active gene programs. \nCan be `mixed`, in which case active gene programs are determined across prior and add-on gene programs jointly,\nor `separate` in which case they are determined separately for prior and add-on gene programs.\n", + "default" : [ + "separate" + ], + "required" : false, + "choices" : [ + "mixed", + "separate" + ], + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--n_addon_gp", + "description" : "Number of addon gene programs (i.e. gene programs that are not included in masks but can be learned de novo).\n", + "default" : [ + 100 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--cat_covariates_embeds_nums", + "description" : "Number of embedding nodes for all categorical covariates.\nMust be the same length as `--input_obs_covariates`.\n", + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--random_state", + "description" : "Random seed for reproducibility.\n", + "default" : [ + 0 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "NicheCompass Training Parameters", + "description" : "Options for training the NicheCompass model.", + "arguments" : [ + { + "type" : "integer", + "name" : "--n_epochs", + "description" : "Number of training epochs", + "default" : [ + 100 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--n_epochs_all_gps", + "description" : "Number of epochs during which all gene programs are used for model training.\nAfter that only active gene programs are retained.\n", + "default" : [ + 25 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--n_epochs_no_edge_recon", + "description" : "Number of epochs during which the edge reconstruction loss is excluded from backpropagation for pretraining using the other loss components.\n", + "default" : [ + 0 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--n_epochs_no_cat_covariates_contrastive", + "description" : "Number of epochs during which the categorical covariates contrastive loss is excluded from backpropagation for pretraining using the other loss components.\n", + "default" : [ + 5 + ], + "required" : false, + "min" : 0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--lr", + "description" : "Learning rate", + "default" : [ + 0.001 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--weight_decay", + "description" : "Weight decay (L2 penalty).", + "default" : [ + 0.001 + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--edge_val_ratio", + "description" : "Fraction of the data that is used as validation set on edge-level. The rest of the data will be used as training set on edge-level.\n", + "default" : [ + 0.1 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--node_val_ratio", + "description" : "Fraction of the data that is used as validation set on node-level. The rest of the data will be used as training set on node-level.\n", + "default" : [ + 0.1 + ], + "required" : false, + "min" : 0.0, + "max" : 1.0, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--edge_batch_size", + "description" : "Batch size for the edge-level dataloaders.\n", + "default" : [ + 256 + ], + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--node_batch_size", + "description" : "Batch size for the node-level dataloaders.\nIf not provided, is automatically determined based on `--edge_batch_size`.\n", + "required" : false, + "min" : 1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "integer", + "name" : "--n_sampled_neighbors", + "description" : "Number of neighbors that are sampled during model training from the spatial neighborhood graph.\nIf set to -1, all direct neighbors are included.\n", + "default" : [ + -1 + ], + "required" : false, + "min" : -1, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Clustering options", + "arguments" : [ + { + "type" : "string", + "name" : "--obs_cluster", + "description" : "Prefix for the .obs keys under which to add the cluster labels. Newly created columns in .obs will \nbe created from the specified value for '--obs_cluster' suffixed with an underscore and one of the resolutions\nresolutions specified in '--leiden_resolution'.\n", + "default" : [ + "nichecompass_leiden" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "double", + "name" : "--leiden_resolution", + "description" : "Control the coarseness of the clustering. Higher values lead to more clusters.", + "default" : [ + 1.0 + ], + "required" : false, + "direction" : "input", + "multiple" : true, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Umap options", + "arguments" : [ + { + "type" : "string", + "name" : "--obsm_umap", + "description" : "In which .obsm slot to store the resulting UMAP embedding.", + "default" : [ + "X_leiden_nichecompass_umap" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Neighbour calculation", + "arguments" : [ + { + "type" : "string", + "name" : "--uns_neighbors", + "description" : "In which .uns slot to store various neighbor output objects.", + "default" : [ + "nichecompass_neighbors" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--obsp_neighbor_distances", + "description" : "In which .obsp slot to store the distance matrix between the resulting neighbors.", + "default" : [ + "nichecompass_distances" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--obsp_neighbor_connectivities", + "description" : "In which .obsp slot to store the connectivities matrix between the resulting neighbors.", + "default" : [ + "nichecompass_connectivities" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + }, + { + "name" : "Outputs", + "arguments" : [ + { + "type" : "file", + "name" : "--output", + "description" : "Destination path to the output.", + "example" : [ + "output.h5mu" + ], + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "file", + "name" : "--output_model", + "description" : "Directory to save the trained NicheCompass model.", + "must_exist" : true, + "create_parent" : true, + "required" : true, + "direction" : "output", + "multiple" : false, + "multiple_sep" : ";" + }, + { + "type" : "string", + "name" : "--output_obsm_embedding", + "description" : "Key of the obsm field where the latent / gene program representation of active gene programs will be stored after NicheCompass model training.\n", + "default" : [ + "nichecompass_latent" + ], + "required" : false, + "direction" : "input", + "multiple" : false, + "multiple_sep" : ";" + } + ] + } + ], + "resources" : [ + { + "type" : "nextflow_script", + "path" : "main.nf", + "is_executable" : true, + "entrypoint" : "run_wf" + }, + { + "type" : "file", + "path" : "/src/workflows/utils/labels.config", + "dest" : "nextflow_labels.config" + } + ], + "description" : "A pipeline to compute the spatial neighborhood graph, perform nichecompass embedding followed by Leiden clustering.", + "test_resources" : [ + { + "type" : "nextflow_script", + "path" : "test.nf", + "is_executable" : true, + "entrypoint" : "test_wf" + }, + { + "type" : "file", + "path" : "/resources_test/xenium/xenium_tiny.h5mu" + }, + { + "type" : "file", + "path" : "/resources_test/cosmx/Lung5_Rep2_tiny.h5mu" + }, + { + "type" : "file", + "path" : "/resources_test/niche/prior_knowledge_gp_mask.json" + } + ], + "info" : { + "test_dependencies" : [ + { + "name" : "nichecompass_leiden_test", + "namespace" : "test_workflows/niche" + } + ] + }, + "status" : "enabled", + "scope" : { + "image" : "public", + "target" : "public" + }, + "dependencies" : [ + { + "name" : "dataflow/obsp_block_concatenation", + "repository" : { + "type" : "local" + } + }, + { + "name" : "neighbors/spatial_neighborhood_graph", + "repository" : { + "type" : "local" + } + }, + { + "name" : "nichecompass/nichecompass", + "repository" : { + "type" : "local" + } + }, + { + "name" : "metadata/add_id", + "repository" : { + "type" : "vsh", + "repo" : "openpipeline", + "tag" : "v3.0.0" + } + }, + { + "name" : "workflows/multiomics/neighbors_leiden_umap", + "repository" : { + "type" : "vsh", + "repo" : "openpipeline", + "tag" : "v3.0.0" + } + } + ], + "repositories" : [ + { + "type" : "vsh", + "name" : "openpipeline", + "repo" : "openpipeline", + "tag" : "v3.0.0" + } + ], + "links" : { + "repository" : "https://github.com/openpipelines-bio/openpipeline_spatial", + "docker_registry" : "ghcr.io" + }, + "runners" : [ + { + "type" : "nextflow", + "id" : "nextflow", + "directives" : { + "tag" : "$id" + }, + "auto" : { + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false + }, + "config" : { + "labels" : { + "mem1gb" : "memory = 1000000000.B", + "mem2gb" : "memory = 2000000000.B", + "mem5gb" : "memory = 5000000000.B", + "mem10gb" : "memory = 10000000000.B", + "mem20gb" : "memory = 20000000000.B", + "mem50gb" : "memory = 50000000000.B", + "mem100gb" : "memory = 100000000000.B", + "mem200gb" : "memory = 200000000000.B", + "mem500gb" : "memory = 500000000000.B", + "mem1tb" : "memory = 1000000000000.B", + "mem2tb" : "memory = 2000000000000.B", + "mem5tb" : "memory = 5000000000000.B", + "mem10tb" : "memory = 10000000000000.B", + "mem20tb" : "memory = 20000000000000.B", + "mem50tb" : "memory = 50000000000000.B", + "mem100tb" : "memory = 100000000000000.B", + "mem200tb" : "memory = 200000000000000.B", + "mem500tb" : "memory = 500000000000000.B", + "mem1gib" : "memory = 1073741824.B", + "mem2gib" : "memory = 2147483648.B", + "mem4gib" : "memory = 4294967296.B", + "mem8gib" : "memory = 8589934592.B", + "mem16gib" : "memory = 17179869184.B", + "mem32gib" : "memory = 34359738368.B", + "mem64gib" : "memory = 68719476736.B", + "mem128gib" : "memory = 137438953472.B", + "mem256gib" : "memory = 274877906944.B", + "mem512gib" : "memory = 549755813888.B", + "mem1tib" : "memory = 1099511627776.B", + "mem2tib" : "memory = 2199023255552.B", + "mem4tib" : "memory = 4398046511104.B", + "mem8tib" : "memory = 8796093022208.B", + "mem16tib" : "memory = 17592186044416.B", + "mem32tib" : "memory = 35184372088832.B", + "mem64tib" : "memory = 70368744177664.B", + "mem128tib" : "memory = 140737488355328.B", + "mem256tib" : "memory = 281474976710656.B", + "mem512tib" : "memory = 562949953421312.B", + "cpu1" : "cpus = 1", + "cpu2" : "cpus = 2", + "cpu5" : "cpus = 5", + "cpu10" : "cpus = 10", + "cpu20" : "cpus = 20", + "cpu50" : "cpus = 50", + "cpu100" : "cpus = 100", + "cpu200" : "cpus = 200", + "cpu500" : "cpus = 500", + "cpu1000" : "cpus = 1000" + }, + "script" : [ + "includeConfig(\\"nextflow_labels.config\\")" + ] + }, + "debug" : false, + "container" : "docker" + } + ], + "engines" : [ + { + "type" : "native", + "id" : "native" + } + ], + "build_info" : { + "config" : "/workdir/root/repo/src/workflows/niche/nichecompass_leiden/config.vsh.yaml", + "runner" : "nextflow", + "engine" : "native", + "output" : "/workdir/root/repo/target/nextflow/workflows/niche/nichecompass_leiden", + "viash_version" : "0.9.4", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", + "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" + }, + "package_config" : { + "name" : "openpipeline_spatial", + "version" : "niche-compass", + "info" : { + "test_resources" : [ + { + "type" : "s3", + "path" : "s3://openpipelines-bio/openpipeline_spatial/resources_test", + "dest" : "resources_test" + } + ] + }, + "repositories" : [ + { + "type" : "vsh", + "name" : "openpipeline", + "repo" : "openpipeline", + "tag" : "v3.0.0" + } + ], + "viash_version" : "0.9.4", + "source" : "/workdir/root/repo/src", + "target" : "/workdir/root/repo/target", + "config_mods" : [ + ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n.runners[.type == 'nextflow'].config.script := 'includeConfig(\\"nextflow_labels.config\\")'", + ".engines += { type: \\"native\\" }", + ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'", + ".engines[.type == 'docker'].target_tag := 'niche-compass'" + ], + "organization" : "vsh", + "links" : { + "repository" : "https://github.com/openpipelines-bio/openpipeline_spatial", + "docker_registry" : "ghcr.io" + } + } +}''')) +] + +// resolve dependencies dependencies (if any) +meta["root_dir"] = getRootDir() +include { obsp_block_concatenation } from "${meta.resources_dir}/../../../../nextflow/dataflow/obsp_block_concatenation/main.nf" +include { spatial_neighborhood_graph } from "${meta.resources_dir}/../../../../nextflow/neighbors/spatial_neighborhood_graph/main.nf" +include { nichecompass } from "${meta.resources_dir}/../../../../nextflow/nichecompass/nichecompass/main.nf" +include { add_id } from "${meta.root_dir}/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/metadata/add_id/main.nf" +include { neighbors_leiden_umap } from "${meta.root_dir}/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/workflows/multiomics/neighbors_leiden_umap/main.nf" + +// inner workflow +// user-provided Nextflow code +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | map { id, state -> + [id, state + [ + workflow_output: state.output, + _meta: [join_id: id] + ]] + } + // If requested, add the id of the events (samples) to a column in .obs. + // Also allows to make .obs_names (the .obs index) unique, by prefixing the values with an unique id per .h5mu file. + // The latter is usefull to avoid duplicate observations during concatenation. + | add_id.run( + filter: {id, state -> state.add_id_to_obs }, + fromState: {id, state -> + def newState = [ + "input": state.input, + "input_id": id, + "make_observation_keys_unique": state.add_id_make_observation_keys_unique, + "obs_output": state.add_id_obs_output, + "add_id_to_obs": state.add_id_to_obs + ] + newState + }, + toState: {id, output, state -> + def keysToRemove = ["add_id_to_obs", "add_id_obs_output", "add_id_make_observation_keys_unique"] + def newState = state.findAll{it.key !in keysToRemove} + newState + ["input": output.output] + } + ) + + | spatial_neighborhood_graph.run( + fromState: {id, state -> [ + "input": state.input, + "modality": state.modality, + "layer": state.layer, + "input_obsm_spatial_coords": state.input_obsm_spatial_coords, + "coord_type": state.coord_type, + "n_spatial_neighbors": state.n_spatial_neighbors, + "delaunay": state.delaunay + ]}, + toState: {id, output, state -> + def keysToRemove = ["input_obsm_spatial_coords", "coord_type", "n_spatial_neighbors", "delaunay"] + def newState = state.findAll{it.key !in keysToRemove} + newState + ["input": output.output] + } + ) + + | joinStates { ids, states -> + def newId = "merged" + // gather keys with unique values across states that should be combined + def new_state_non_unique_values = [ + input: states.collect{it.input}, + input_id: ids, + _meta: [join_id: ids[0]] + ] + // gather keys from different states + def all_state_keys = states.inject([].toSet()){ current_keys, state -> + def new_keys = current_keys + state.keySet() + return new_keys + }.minus(["output", "id", "input", "_meta"]) + // Create the new state from the keys, values should be the same across samples + def new_state = all_state_keys.inject([:]){ old_state, argument_name -> + argument_values = states.collect{it.get(argument_name)}.unique() + assert argument_values.size() == 1, "Arguments should be the same across samples. Argument name: $argument_name, \ + argument value: $argument_values" + // take the unique value from the set (there is only one) + def argument_value + argument_values.each { argument_value = it } + def current_state = old_state + [(argument_name): argument_value] + return current_state + } + def data_state = new_state_non_unique_values + new_state + [ newId, data_state ] + } + + | obsp_block_concatenation.run( + fromState: { id, state -> [ + "input": state.input, + "modality": state.modality, + "input_id": state.input_id + ]}, + toState: {id, output, state -> + def keysToRemove = ["input_id"] + def newState = state.findAll{it.key !in keysToRemove} + newState + ["input": output.output] + } + ) + + | nichecompass.run( + fromState: {id, state -> [ + "input": state.input, + "input_gp_mask": state.input_gp_mask, + "input_obs_covariates": state.input_obs_covariates, + "modality": state.modality, + "layer": state.layer, + "min_genes_per_gp": state.min_genes_per_gp, + "min_source_genes_per_gp": state.min_source_genes_per_gp, + "min_target_genes_per_gp": state.min_target_genes_per_gp, + "max_genes_per_gp": state.max_genes_per_gp, + "max_source_genes_per_gp": state.max_source_genes_per_gp, + "max_target_genes_per_gp": state.max_target_genes_per_gp, + "filter_genes_not_in_masks": state.filter_genes_not_in_masks, + "covariate_edges": state.covariate_edges, + "gene_expr_recon_distribution": state.gene_expr_recon_dist, + "log_variational": state.log_variational, + "node_label_method": state.node_label_method, + "active_gp_thresh_ratio": state.active_gp_thresh_ratio, + "active_gp_type": state.active_gp_type, + "n_addon_gp": state.n_addon_gp, + "cat_covariates_embeds_nums": state.cat_covariates_embeds_nums, + "random_state": state.random_state, + "n_epochs": state.n_epochs, + "n_epochs_all_gps": state.n_epochs_all_gps, + "n_epochs_no_edge_recon": state.n_epochs_no_edge_recon, + "n_epochs_no_cat_covariates_contrastive_loss": state.n_epochs_no_cat_covariates_contrastive_loss, + "lr": state.lr, + "weight_decay": state.weight_decay, + "edge_val_ratio": state.edge_val_ratio, + "node_val_ratio": state.node_val_ratio, + "edge_batch_size": state.edge_batch_size, + "node_batch_size": state.node_batch_size, + "n_sampled_neighbors": state.n_sampled_neighbors, + "output_obsm_embedding": state.output_obsm_embedding, + "output_model": state.output_model + ]}, + args: [ + "input_obsm_spatial_connectivities": "spatial_connectivities" + ], + toState: [ + "input": "output", + "output_model": "output_model" + ] + ) + + | neighbors_leiden_umap.run( + fromState: { id, state -> [ + "input": state.input, + "modality": state.modality, + "obsm_input": state.output_obsm_embedding, + "output": state.workflow_output, + "uns_neighbors": state.uns_neighbors, + "obsp_neighbor_distances": state.obsp_neighbor_distances, + "obsp_neighbor_connectivities": state.obsp_neighbor_connectivities, + "leiden_resolution": state.leiden_resolution, + "obs_cluster": state.obs_cluster, + "obsm_umap": state.obsm_umap, + ]}, + toState: ["output": "output"] + ) + + | setState(["output": "output", "output_model": "output_model", "_meta": "_meta"]) + + | view() + + emit: + output_ch +} + +// inner workflow hook +def innerWorkflowFactory(args) { + return run_wf +} + +// defaults +meta["defaults"] = [ + // key to be used to trace the process and determine output names + key: null, + + // fixed arguments to be passed to script + args: [:], + + // default directives + directives: readJsonBlob('''{ + "tag" : "$id" +}'''), + + // auto settings + auto: readJsonBlob('''{ + "simplifyInput" : true, + "simplifyOutput" : false, + "transcript" : false, + "publish" : false +}'''), + + // Apply a map over the incoming tuple + // Example: `{ tup -> [ tup[0], [input: tup[1].output] ] + tup.drop(2) }` + map: null, + + // Apply a map over the ID element of a tuple (i.e. the first element) + // Example: `{ id -> id + "_foo" }` + mapId: null, + + // Apply a map over the data element of a tuple (i.e. the second element) + // Example: `{ data -> [ input: data.output ] }` + mapData: null, + + // Apply a map over the passthrough elements of a tuple (i.e. the tuple excl. the first two elements) + // Example: `{ pt -> pt.drop(1) }` + mapPassthrough: null, + + // Filter the channel + // Example: `{ tup -> tup[0] == "foo" }` + filter: null, + + // Choose whether or not to run the component on the tuple if the condition is true. + // Otherwise, the tuple will be passed through. + // Example: `{ tup -> tup[0] != "skip_this" }` + runIf: null, + + // Rename keys in the data field of the tuple (i.e. the second element) + // Will likely be deprecated in favour of `fromState`. + // Example: `[ "new_key": "old_key" ]` + renameKeys: null, + + // Fetch data from the state and pass it to the module without altering the current state. + // + // `fromState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be passed to the module as is. + // - If it is a `List[String]`, the data will be the values of the state at the given keys. + // - If it is a `Map[String, String]`, the data will be the values of the state at the given keys, with the keys renamed according to the map. + // - If it is a function, the tuple (`[id, state]`) in the channel will be passed to the function, and the result will be used as the data. + // + // Example: `{ id, state -> [input: state.fastq_file] }` + // Default: `null` + fromState: null, + + // Determine how the state should be updated after the module has been run. + // + // `toState` should be `null`, `List[String]`, `Map[String, String]` or a function. + // + // - If it is `null`, the state will be replaced with the output of the module. + // - If it is a `List[String]`, the state will be updated with the values of the data at the given keys. + // - If it is a `Map[String, String]`, the state will be updated with the values of the data at the given keys, with the keys renamed according to the map. + // - If it is a function, a tuple (`[id, output, state]`) will be passed to the function, and the result will be used as the new state. + // + // Example: `{ id, output, state -> state + [counts: state.output] }` + // Default: `{ id, output, state -> output }` + toState: null, + + // Whether or not to print debug messages + // Default: `false` + debug: false +] + +// initialise default workflow +meta["workflow"] = workflowFactory([key: meta.config.name], meta.defaults, meta) + +// add workflow to environment +nextflow.script.ScriptMeta.current().addDefinition(meta.workflow) + +// anonymous workflow for running this module as a standalone +workflow { + // add id argument if it's not already in the config + // TODO: deep copy + def newConfig = deepClone(meta.config) + def newParams = deepClone(params) + + def argsContainsId = newConfig.allArguments.any{it.plainName == "id"} + if (!argsContainsId) { + def idArg = [ + 'name': '--id', + 'required': false, + 'type': 'string', + 'description': 'A unique id for every entry.', + 'multiple': false + ] + newConfig.arguments.add(0, idArg) + newConfig = processConfig(newConfig) + } + if (!newParams.containsKey("id")) { + newParams.id = "run" + } + + helpMessage(newConfig) + + channelFromParams(newParams, newConfig) + // make sure id is not in the state if id is not in the args + | map {id, state -> + if (!argsContainsId) { + [id, state.findAll{k, v -> k != "id"}] + } else { + [id, state] + } + } + | meta.workflow.run( + auto: [ publish: "state" ] + ) +} + +// END COMPONENT-SPECIFIC CODE diff --git a/target/nextflow/workflows/niche/nichecompass_leiden/nextflow.config b/target/nextflow/workflows/niche/nichecompass_leiden/nextflow.config new file mode 100644 index 0000000..6e43e25 --- /dev/null +++ b/target/nextflow/workflows/niche/nichecompass_leiden/nextflow.config @@ -0,0 +1,126 @@ +manifest { + name = 'workflows/niche/nichecompass_leiden' + mainScript = 'main.nf' + nextflowVersion = '!>=20.12.1-edge' + version = 'niche-compass' + description = 'A pipeline to compute the spatial neighborhood graph, perform nichecompass embedding followed by Leiden clustering.' + author = 'Dorien Roosen, Weiwei Schultz' +} + +process.container = 'nextflow/bash:latest' + +// detect tempdir +tempDir = java.nio.file.Paths.get( + System.getenv('NXF_TEMP') ?: + System.getenv('VIASH_TEMP') ?: + System.getenv('TEMPDIR') ?: + System.getenv('TMPDIR') ?: + '/tmp' +).toAbsolutePath() + +profiles { + no_publish { + process { + withName: '.*' { + publishDir = [ + enabled: false + ] + } + } + } + mount_temp { + docker.temp = tempDir + podman.temp = tempDir + charliecloud.temp = tempDir + } + docker { + docker.enabled = true + // docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } +} + +process{ + withLabel: mem1gb { memory = 1000000000.B } + withLabel: mem2gb { memory = 2000000000.B } + withLabel: mem5gb { memory = 5000000000.B } + withLabel: mem10gb { memory = 10000000000.B } + withLabel: mem20gb { memory = 20000000000.B } + withLabel: mem50gb { memory = 50000000000.B } + withLabel: mem100gb { memory = 100000000000.B } + withLabel: mem200gb { memory = 200000000000.B } + withLabel: mem500gb { memory = 500000000000.B } + withLabel: mem1tb { memory = 1000000000000.B } + withLabel: mem2tb { memory = 2000000000000.B } + withLabel: mem5tb { memory = 5000000000000.B } + withLabel: mem10tb { memory = 10000000000000.B } + withLabel: mem20tb { memory = 20000000000000.B } + withLabel: mem50tb { memory = 50000000000000.B } + withLabel: mem100tb { memory = 100000000000000.B } + withLabel: mem200tb { memory = 200000000000000.B } + withLabel: mem500tb { memory = 500000000000000.B } + withLabel: mem1gib { memory = 1073741824.B } + withLabel: mem2gib { memory = 2147483648.B } + withLabel: mem4gib { memory = 4294967296.B } + withLabel: mem8gib { memory = 8589934592.B } + withLabel: mem16gib { memory = 17179869184.B } + withLabel: mem32gib { memory = 34359738368.B } + withLabel: mem64gib { memory = 68719476736.B } + withLabel: mem128gib { memory = 137438953472.B } + withLabel: mem256gib { memory = 274877906944.B } + withLabel: mem512gib { memory = 549755813888.B } + withLabel: mem1tib { memory = 1099511627776.B } + withLabel: mem2tib { memory = 2199023255552.B } + withLabel: mem4tib { memory = 4398046511104.B } + withLabel: mem8tib { memory = 8796093022208.B } + withLabel: mem16tib { memory = 17592186044416.B } + withLabel: mem32tib { memory = 35184372088832.B } + withLabel: mem64tib { memory = 70368744177664.B } + withLabel: mem128tib { memory = 140737488355328.B } + withLabel: mem256tib { memory = 281474976710656.B } + withLabel: mem512tib { memory = 562949953421312.B } + withLabel: cpu1 { cpus = 1 } + withLabel: cpu2 { cpus = 2 } + withLabel: cpu5 { cpus = 5 } + withLabel: cpu10 { cpus = 10 } + withLabel: cpu20 { cpus = 20 } + withLabel: cpu50 { cpus = 50 } + withLabel: cpu100 { cpus = 100 } + withLabel: cpu200 { cpus = 200 } + withLabel: cpu500 { cpus = 500 } + withLabel: cpu1000 { cpus = 1000 } +} + +includeConfig("nextflow_labels.config") diff --git a/target/nextflow/workflows/niche/nichecompass_leiden/nextflow_labels.config b/target/nextflow/workflows/niche/nichecompass_leiden/nextflow_labels.config new file mode 100644 index 0000000..541aaad --- /dev/null +++ b/target/nextflow/workflows/niche/nichecompass_leiden/nextflow_labels.config @@ -0,0 +1,68 @@ +process { + // Default resources for components that hardly do any processing + memory = { 2.GB * task.attempt } + cpus = 1 + + // Retry for exit codes that have something to do with memory issues + errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' } + maxRetries = 3 + maxMemory = null + + // CPU resources + withLabel: singlecpu { cpus = 1 } + withLabel: lowcpu { cpus = 4 } + withLabel: midcpu { cpus = 10 } + withLabel: highcpu { cpus = 20 } + + // Memory resources + withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } } + withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } } + + // Disk space + // Nextflow apparently can't handle empty directives, i.e. + // withLabel: lowdisk {} + // so for that reason we have to add a dummy directive + withLabel: lowdisk { + dummyDirective = "dummyValue" + } + withLabel: middisk { + dummyDirective = "dummyValue" + } + withLabel: highdisk { + dummyDirective = "dummyValue" + } + withLabel: veryhighdisk { + dummyDirective = "dummyValue" + } + // NOTE: The above labels intentionally do not have an effect by default. + // The user should set the disk space requirements by adding the following + // to the compute environment: + // + // withLabel: lowdisk { disk = { 20.GB * task.attempt } } + // withLabel: middisk { disk = { 100.GB * task.attempt } } + // withLabel: highdisk { disk = { 200.GB * task.attempt } } + // withLabel: veryhighdisk { disk = { 500.GB * task.attempt } } +} + +def get_memory(to_compare) { + if (!process.containsKey("maxMemory") || !process.maxMemory) { + return to_compare + } + + try { + if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) { + return process.maxMemory + } + else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) { + return max_memory as nextflow.util.MemoryUnit + } + else { + return to_compare + } + } catch (all) { + println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!" + System.exit(1) + } +} diff --git a/target/nextflow/workflows/niche/nichecompass_leiden/nextflow_schema.json b/target/nextflow/workflows/niche/nichecompass_leiden/nextflow_schema.json new file mode 100644 index 0000000..a1a49f1 --- /dev/null +++ b/target/nextflow/workflows/niche/nichecompass_leiden/nextflow_schema.json @@ -0,0 +1,453 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "nichecompass_leiden", + "description": "A pipeline to compute the spatial neighborhood graph, perform nichecompass embedding followed by Leiden clustering.", + "type": "object", + "$defs": { + "inputs": { + "title": "Inputs", + "type": "object", + "description": "No description", + "properties": { + "id": { + "type": "string", + "description": "ID of the sample.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"foo\"`. " + }, + "input": { + "type": "string", + "format": "path", + "exists": true, + "description": "Path to the sample.", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"input.h5mu\"`. " + }, + "input_gp_mask": { + "type": "string", + "format": "path", + "exists": true, + "description": "JSON file containing a nested dictionary containing the gene programs,\nwith keys being gene program names and values being dictionaries with keys `targets` and `sources`,\nwhere `targets` contains a list of the names of genes in the gene program for the reconstruction of the gene expression of the node itself (receiving node)\nand `sources` contains a list of the names of genes in the gene program for the reconstruction of the gene expression of the node's neighbors (transmitting nodes).\n", + "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"prior_knowledge_gp_mask.json\"`. " + }, + "modality": { + "type": "string", + "description": "Which modality to process.", + "help_text": "Type: `string`, multiple: `False`, default: `\"rna\"`. ", + "default": "rna" + }, + "layer": { + "type": "string", + "description": "Use specified layer for calculation of qc metrics", + "help_text": "Type: `string`, multiple: `False`, example: `\"raw_counts\"`. " + }, + "input_obs_covariates": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Keys of the adata.obs fields to use as covariates.", + "help_text": "Type: `string`, multiple: `True`, default: `[\"sample_id\"]`. ", + "default": [ + "sample_id" + ] + }, + "input_obsm_spatial_coords": { + "type": "string", + "description": "Key in adata.obsm where spatial coordinates are stored", + "help_text": "Type: `string`, multiple: `False`, default: `\"spatial\"`. ", + "default": "spatial" + } + } + }, + "outputs": { + "title": "Outputs", + "type": "object", + "description": "No description", + "properties": { + "output": { + "type": "string", + "format": "path", + "description": "Destination path to the output.", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output.h5mu\"`, direction: `output`, example: `\"output.h5mu\"`. ", + "default": "$id.$key.output.h5mu" + }, + "output_model": { + "type": "string", + "format": "path", + "description": "Directory to save the trained NicheCompass model.", + "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output_model\"`, direction: `output`. ", + "default": "$id.$key.output_model" + }, + "output_obsm_embedding": { + "type": "string", + "description": "Key of the obsm field where the latent / gene program representation of active gene programs will be stored after NicheCompass model training.\n", + "help_text": "Type: `string`, multiple: `False`, default: `\"nichecompass_latent\"`. ", + "default": "nichecompass_latent" + } + } + }, + "sample id options": { + "title": "Sample ID options", + "type": "object", + "description": "Options for adding the id to .obs on the MuData object. Having a sample \nid present in a requirement of several components for this pipeline.\n", + "properties": { + "include_sample_as_covariate": { + "type": "boolean", + "description": "Whether to include the sample information as a categorical covariate for the \nNicheCompass model.\n", + "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ", + "default": true + }, + "add_id_to_obs": { + "type": "boolean", + "description": "Add the value passed with --id to .obs.", + "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ", + "default": true + }, + "add_id_obs_output": { + "type": "string", + "description": ".Obs column to add the sample IDs to", + "help_text": "Type: `string`, multiple: `False`, default: `\"sample_id\"`. ", + "default": "sample_id" + }, + "add_id_make_observation_keys_unique": { + "type": "boolean", + "description": "Join the id to the .obs index (.obs_names)", + "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ", + "default": true + } + } + }, + "spatial neighbors calculation": { + "title": "Spatial Neighbors Calculation", + "type": "object", + "description": "Options for the calculation of the spatial neighborhood graph.\n", + "properties": { + "coord_type": { + "type": "string", + "description": "Type of coordinate system provided by `--input_obsm_spatial_coords`", + "help_text": "Type: `string`, multiple: `False`, choices: ``generic`, `grid``. ", + "enum": [ + "generic", + "grid" + ] + }, + "n_spatial_neighbors": { + "type": "integer", + "description": "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n`generic` - number of neighborhoods for non-grid data", + "help_text": "Type: `integer`, multiple: `False`, default: `6`. ", + "default": 6 + }, + "delaunay": { + "type": "boolean", + "description": "Whether to use Delaunay triangulation to determine spatial neighborhood graph.\nOnly used when `--coord_type generic`.\n", + "help_text": "Type: `boolean`, multiple: `False`, default: `false`. ", + "default": false + } + } + }, + "gene program mask": { + "title": "Gene Program Mask", + "type": "object", + "description": "Options for filtering gene programs based on the number of genes available in the data.", + "properties": { + "min_genes_per_gp": { + "type": "integer", + "description": "Minimum number of genes in a gene program inluding both target and source genes that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `1`. ", + "default": 1 + }, + "min_source_genes_per_gp": { + "type": "integer", + "description": "Minimum number of source genes in a gene program that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `0`. ", + "default": 0 + }, + "min_target_genes_per_gp": { + "type": "integer", + "description": "Minimum number of target genes in a gene program that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `0`. ", + "default": 0 + }, + "max_genes_per_gp": { + "type": "integer", + "description": "Maximum number of genes in a gene program inluding both target and source genes that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "help_text": "Type: `integer`, multiple: `False`. " + }, + "max_source_genes_per_gp": { + "type": "integer", + "description": "Maximum number of source genes in a gene program that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "help_text": "Type: `integer`, multiple: `False`. " + }, + "max_target_genes_per_gp": { + "type": "integer", + "description": "Maximum number of target genes in a gene program that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.\n", + "help_text": "Type: `integer`, multiple: `False`. " + }, + "filter_genes_not_in_masks": { + "type": "boolean", + "description": "Whether to remove the genes that are not in the gp masks from the input data.\n", + "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ", + "default": false + } + } + }, + "nichecompass model architecture": { + "title": "NicheCompass Model Architecture", + "type": "object", + "description": "Options for the NicheCompass model architecture.", + "properties": { + "covariate_edges": { + "type": "array", + "items": { + "type": "boolean" + }, + "description": "List of booleans that indicate whether there can be edges between different categories of the categorical covariates.\nIf this is `True` for a specific categorical covariate, this covariate will be excluded from the edge reconstruction loss.\nNeeds to match the length and order of `--input_obs_covariates`.\n", + "help_text": "Type: `boolean`, multiple: `True`. " + }, + "gene_expr_recon_dist": { + "type": "string", + "description": "The distribution used for gene expression reconstruction", + "help_text": "Type: `string`, multiple: `False`, default: `\"nb\"`, choices: ``nb`, `zinb``. ", + "enum": [ + "nb", + "zinb" + ], + "default": "nb" + }, + "log_variational": { + "type": "boolean", + "description": "Whether to transform x by log(x+1) prior to encoding for numerical stability (not for normalization).\n", + "help_text": "Type: `boolean`, multiple: `False`, default: `true`. ", + "default": true + }, + "node_label_method": { + "type": "string", + "description": "Node label method that will be used for omics reconstruction.\nIf `one-hop-sum`, uses a concatenation of the node's input features with the sum of the input features of all nodes in the node's one-hop neighborhood.\nIf `one-hop-norm`, uses a concatenation of the node's input features with the node's one-hop neighbors input features normalized as per Kipf, T", + "help_text": "Type: `string`, multiple: `False`, default: `\"one-hop-norm\"`, choices: ``one-hop-norm`, `two-hop-norm`, `one-hop-attention``. ", + "enum": [ + "one-hop-norm", + "two-hop-norm", + "one-hop-attention" + ], + "default": "one-hop-norm" + }, + "active_gp_thresh_ratio": { + "type": "number", + "description": "Ratio that determines which gene programs are considered active and are used in the latent representation after model training.\nAll inactive gene programs will be dropped during model training after a determined number of epochs.\nAggregations of the absolute values of the gene weights of the gene expression decoder per gene program are calculated.\nThe maximum value, i.e", + "help_text": "Type: `double`, multiple: `False`, default: `0.1`. ", + "default": 0.1 + }, + "active_gp_type": { + "type": "string", + "description": "Type to determine active gene programs", + "help_text": "Type: `string`, multiple: `False`, default: `\"separate\"`, choices: ``mixed`, `separate``. ", + "enum": [ + "mixed", + "separate" + ], + "default": "separate" + }, + "n_addon_gp": { + "type": "integer", + "description": "Number of addon gene programs (i.e", + "help_text": "Type: `integer`, multiple: `False`, default: `100`. ", + "default": 100 + }, + "cat_covariates_embeds_nums": { + "type": "array", + "items": { + "type": "integer" + }, + "description": "Number of embedding nodes for all categorical covariates.\nMust be the same length as `--input_obs_covariates`.\n", + "help_text": "Type: `integer`, multiple: `True`. " + }, + "random_state": { + "type": "integer", + "description": "Random seed for reproducibility.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `0`. ", + "default": 0 + } + } + }, + "nichecompass training parameters": { + "title": "NicheCompass Training Parameters", + "type": "object", + "description": "Options for training the NicheCompass model.", + "properties": { + "n_epochs": { + "type": "integer", + "description": "Number of training epochs", + "help_text": "Type: `integer`, multiple: `False`, default: `100`. ", + "default": 100 + }, + "n_epochs_all_gps": { + "type": "integer", + "description": "Number of epochs during which all gene programs are used for model training.\nAfter that only active gene programs are retained.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `25`. ", + "default": 25 + }, + "n_epochs_no_edge_recon": { + "type": "integer", + "description": "Number of epochs during which the edge reconstruction loss is excluded from backpropagation for pretraining using the other loss components.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `0`. ", + "default": 0 + }, + "n_epochs_no_cat_covariates_contrastive": { + "type": "integer", + "description": "Number of epochs during which the categorical covariates contrastive loss is excluded from backpropagation for pretraining using the other loss components.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `5`. ", + "default": 5 + }, + "lr": { + "type": "number", + "description": "Learning rate", + "help_text": "Type: `double`, multiple: `False`, default: `0.001`. ", + "default": 0.001 + }, + "weight_decay": { + "type": "number", + "description": "Weight decay (L2 penalty).", + "help_text": "Type: `double`, multiple: `False`, default: `0.001`. ", + "default": 0.001 + }, + "edge_val_ratio": { + "type": "number", + "description": "Fraction of the data that is used as validation set on edge-level", + "help_text": "Type: `double`, multiple: `False`, default: `0.1`. ", + "default": 0.1 + }, + "node_val_ratio": { + "type": "number", + "description": "Fraction of the data that is used as validation set on node-level", + "help_text": "Type: `double`, multiple: `False`, default: `0.1`. ", + "default": 0.1 + }, + "edge_batch_size": { + "type": "integer", + "description": "Batch size for the edge-level dataloaders.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `256`. ", + "default": 256 + }, + "node_batch_size": { + "type": "integer", + "description": "Batch size for the node-level dataloaders.\nIf not provided, is automatically determined based on `--edge_batch_size`.\n", + "help_text": "Type: `integer`, multiple: `False`. " + }, + "n_sampled_neighbors": { + "type": "integer", + "description": "Number of neighbors that are sampled during model training from the spatial neighborhood graph.\nIf set to -1, all direct neighbors are included.\n", + "help_text": "Type: `integer`, multiple: `False`, default: `-1`. ", + "default": -1 + } + } + }, + "clustering options": { + "title": "Clustering options", + "type": "object", + "description": "No description", + "properties": { + "obs_cluster": { + "type": "string", + "description": "Prefix for the .obs keys under which to add the cluster labels", + "help_text": "Type: `string`, multiple: `False`, default: `\"nichecompass_leiden\"`. ", + "default": "nichecompass_leiden" + }, + "leiden_resolution": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Control the coarseness of the clustering", + "help_text": "Type: `double`, multiple: `True`, default: `[1.0]`. ", + "default": [ + 1.0 + ] + } + } + }, + "umap options": { + "title": "Umap options", + "type": "object", + "description": "No description", + "properties": { + "obsm_umap": { + "type": "string", + "description": "In which .obsm slot to store the resulting UMAP embedding.", + "help_text": "Type: `string`, multiple: `False`, default: `\"X_leiden_nichecompass_umap\"`. ", + "default": "X_leiden_nichecompass_umap" + } + } + }, + "neighbour calculation": { + "title": "Neighbour calculation", + "type": "object", + "description": "No description", + "properties": { + "uns_neighbors": { + "type": "string", + "description": "In which .uns slot to store various neighbor output objects.", + "help_text": "Type: `string`, multiple: `False`, default: `\"nichecompass_neighbors\"`. ", + "default": "nichecompass_neighbors" + }, + "obsp_neighbor_distances": { + "type": "string", + "description": "In which .obsp slot to store the distance matrix between the resulting neighbors.", + "help_text": "Type: `string`, multiple: `False`, default: `\"nichecompass_distances\"`. ", + "default": "nichecompass_distances" + }, + "obsp_neighbor_connectivities": { + "type": "string", + "description": "In which .obsp slot to store the connectivities matrix between the resulting neighbors.", + "help_text": "Type: `string`, multiple: `False`, default: `\"nichecompass_connectivities\"`. ", + "default": "nichecompass_connectivities" + } + } + }, + "nextflow input-output arguments": { + "title": "Nextflow input-output arguments", + "type": "object", + "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.", + "properties": { + "publish_dir": { + "type": "string", + "description": "Path to an output directory.", + "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. " + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/inputs" + }, + { + "$ref": "#/$defs/outputs" + }, + { + "$ref": "#/$defs/sample id options" + }, + { + "$ref": "#/$defs/spatial neighbors calculation" + }, + { + "$ref": "#/$defs/gene program mask" + }, + { + "$ref": "#/$defs/nichecompass model architecture" + }, + { + "$ref": "#/$defs/nichecompass training parameters" + }, + { + "$ref": "#/$defs/clustering options" + }, + { + "$ref": "#/$defs/umap options" + }, + { + "$ref": "#/$defs/neighbour calculation" + }, + { + "$ref": "#/$defs/nextflow input-output arguments" + } + ] +} diff --git a/target/nextflow/workflows/qc/spatial_qc/.config.vsh.yaml b/target/nextflow/workflows/qc/spatial_qc/.config.vsh.yaml index 609d200..15d6871 100644 --- a/target/nextflow/workflows/qc/spatial_qc/.config.vsh.yaml +++ b/target/nextflow/workflows/qc/spatial_qc/.config.vsh.yaml @@ -387,7 +387,7 @@ build_info: output: "target/nextflow/workflows/qc/spatial_qc" executable: "target/nextflow/workflows/qc/spatial_qc/main.nf" viash_version: "0.9.4" - git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831" + git_commit: "9151204629228da14d7c82f49f24c607efb9251e" git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial" dependencies: - "target/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/workflows/qc/qc" diff --git a/target/nextflow/workflows/qc/spatial_qc/main.nf b/target/nextflow/workflows/qc/spatial_qc/main.nf index 6489913..db00807 100644 --- a/target/nextflow/workflows/qc/spatial_qc/main.nf +++ b/target/nextflow/workflows/qc/spatial_qc/main.nf @@ -3505,7 +3505,7 @@ meta = [ "engine" : "native", "output" : "/workdir/root/repo/target/nextflow/workflows/qc/spatial_qc", "viash_version" : "0.9.4", - "git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831", + "git_commit" : "9151204629228da14d7c82f49f24c607efb9251e", "git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial" }, "package_config" : {