Build branch openpipeline_spatial/niche-compass with version niche-compass to openpipeline_spatial on branch niche-compass (9151204)

Build pipeline: openpipelines-bio.openpipeline-spatial.niche-compass-29shs

Source commit: 9151204629

Source message: poc
This commit is contained in:
CI
2025-12-13 14:07:35 +00:00
parent 560dea5ec5
commit db66152ef1
105 changed files with 21553 additions and 1280 deletions

View File

@@ -22,6 +22,8 @@
* `convert/from_cosmx_to_h5mu`: Updated component to handle CosMx output bundles generated with AtoMx SIP versions < v1.3.2 (PR #25).
* `nichecompass/gene_program_mask`: Added a component to create a prior knowledge gene program mask for NicheComnpass analysis (PR #27).
# openpipeline_spatial 0.1.0
## NEW FUNCTIONALITY

View File

@@ -0,0 +1,51 @@
#!/bin/bash
set -eo pipefail
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"
DIR="resources_test/niche"
ID="nichecompass"
# create tempdir
MY_TEMP="${VIASH_TEMP:-/tmp}"
TMPDIR=$(mktemp -d "$MY_TEMP/$ID-XXXXXX")
function clean_up {
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
}
trap clean_up EXIT
if [ ! -d "$DIR" ]; then
mkdir -p "$DIR"
orthologue_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_annotations/human_mouse_gene_orthologs.csv"
orthologue_file="human_mouse_gene_orthologs.csv"
wget "$orthologue_url" -O "$DIR/$orthologue_file"
enzymes_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_programs/metabolite_enzyme_sensor_gps/mouse_metabolite_enzymes.tsv"
enzymes_file="mouse_metabolite_enzymes.tsv"
wget "$enzymes_url" -O "$DIR/$enzymes_file"
sensors_url="https://raw.githubusercontent.com/Lotfollahi-lab/nichecompass/refs/tags/0.3.2/data/gene_programs/metabolite_enzyme_sensor_gps/mouse_metabolite_sensors.tsv"
sensors_file="mouse_metabolite_sensors.tsv"
wget "$sensors_url" -O "$DIR/$sensors_file"
fi
gp_mask="prior_knowledge_gp_mask.json"
viash run src/nichecompass/gene_program_mask/config.vsh.yaml -- \
--input_gene_orthologs_mapping_file "$DIR/$orthologue_file" \
--input_metabolite_enzymes "$DIR/$enzymes_file" \
--input_metabolite_sensors "$DIR/$sensors_file" \
--output "${DIR}/${gp_mask}"
# Sync to S3
aws s3 sync \
--profile di \
"$DIR" \
s3://openpipelines-bio/openpipeline_spatial/resources_test/niche \
--delete \
--dryrun

View File

@@ -0,0 +1,10 @@
setup:
- type: apt
packages:
- procps
- git
- type: python
__merge__:
- /src/base/requirements/anndata_mudata.yaml
- /src/base/requirements/openpipeline_testutils.yaml
- /src/base/requirements/viashpy.yaml

View File

@@ -75,9 +75,8 @@ spe <- paste0(meta[["resources_dir"]], "/Lung5_Rep2_tiny")
out_rds <- "output.rds"
create_folder_archive <- function(
folder_path,
archive = "Lung5_Rep2_tiny.zip"
) {
folder_path,
archive = "Lung5_Rep2_tiny.zip") {
old_wd <- getwd()
on.exit(setwd(old_wd))
setwd(meta$resources_dir)

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
name: neighbors
namespace: spatial_neighborhood_graph
name: spatial_neighborhood_graph
namespace: neighbors
scope: public
description: Calculates a spatial neighborhood graph.

View File

@@ -0,0 +1,186 @@
name: gene_program_mask
namespace: nichecompass
scope: public
description: Generation of a prior knowledge gene program mask for NicheCompass.
authors:
- __merge__: /src/authors/dorien_roosen.yaml
roles: [ maintainer ]
argument_groups:
- name: Inputs
arguments:
- name: "--input_gene_orthologs_mapping_file"
type: file
required: false
description: |
Path to a CSV file mapping human genes to mouse orthologs.
Required for the OmniPath and NicheNet masks if `--species mouse`.
- name: "--input_metabolite_enzymes"
type: file
required: false
description: |
Path to the MeBocost metabolite-enzymes TSV file.
Required for generating the MeBocost gene program mask.
- name: "--input_metabolite_sensors"
type: file
required: false
description: |
Path to the MeBocost metabolite-sensors TSV file.
Required for generating the MeBocost gene program mask.
- name: Parameters
arguments:
- name: "--species"
type: string
choices: ["human", "mouse"]
default: "human"
description: Species of the organism (human or mouse).
- name: "--create_omnipath_gene_program_mask"
type: boolean
default: true
description: Whether to create the OmniPath gene program mask.
- name: "--create_nichenet_gene_program_mask"
type: boolean
default: true
description: Whether to create the NicheNet gene program mask.
- name: "--create_mebocost_gene_program_mask"
type: boolean
default: true
description: Whether to create the MeBocost gene program mask.
- name: "--create_collectri_tf_gene_program_mask"
type: boolean
default: true
description: Whether to create the CollecTRI TF gene program mask.
- name: "--overlap_thresh_target_genes"
type: double
default: 1.0
min: 0.0
max: 1.0
description: |
The minimum ratio of target genes that need to overlap between a GP without source genes and another GP for the GP to be dropped.
Gene programs with different source genes are never combined or dropped.
- name: Omnipath Parameters
arguments:
- name: "--omnipath_min_curation_effort"
type: integer
default: 2
description: Minimum number of times an interaction has to be described in a paper and mentioned in a database to be included in the OmniPath gene programs.
- name: NicheNet Parameters
arguments:
- name: "--nichenet_version"
type: string
choices: ["v1", "v2"]
default: "v2"
description: |
Version of the NicheNet ligand receptor network and ligand target gene regulatory potential matrix.
´v2´ is an improved version of ´v1´, and has separate files for mouse and human.
- name: "--nichenet_keep_target_genes_ratio"
type: double
default: 1.0
description: |
Ratio of target genes that are kept compared to total target genes.
This ratio is applied over the entire matrix (not on gene program level), and determines the ´all_gps_score_keep_threshold´, which will be used to filter target genes according to their regulatory potential scores.
- name: "--nichenet_max_n_target_genes_per_gp"
type: integer
default: 250
description: |
Maximum number of target genes per gene program. If a gene program has more target genes than ´max_n_target_genes_per_gp´, only the ´max_n_target_genes_per_gp´ gene programs with the highest regulatory potential scores will be kept.
Default value is chosen based on MultiNicheNet specification (s. Browaeys, R. et al. MultiNicheNet: a flexible framework for differential cell-cell communication analysis from multi-sample multi-condition single-cell transcriptomics data. bioRxiv (2023) doi:10.1101/2023.06.13.544751).
- name: Outputs
arguments:
- name: "--output"
type: file
direction: output
required: true
description: Path to the output gene program mask JSON file.
example: gp_mask.json
- name: "--output_omnipath_lr_network"
type: file
direction: output
required: false
description: Path to the output OmniPath ligand-receptor network CSV file.
example: omnipath_lr_network.csv
- name: "--output_nichenet_lr_network"
type: file
direction: output
required: false
description: Path to the output NicheNet ligand-receptor network CSV file.
example: nichenet_lr_network.csv
- name: "--output_nichenet_ligand_target_matrix"
type: file
direction: output
required: false
description: Path to the output NicheNet ligand-target gene regulatory potential matrix file.
example: nichenet_ligand_target_matrix.csv
- name: "--output_collectri_tf_network"
type: file
direction: output
required: false
description: Path to the output CollecTRI TF-target gene regulatory potential network CSV file.
example: collectri_tf_network.csv
- name: "--output_omnipath_gp_gene_count_distributions"
type: file
direction: output
required: false
description: Path to save the OmniPath gene program gene count distributions plot.
example: omnipath_gp_gene_count_distributions.svg
- name: "--output_nichenet_gp_gene_count_distributions"
type: file
direction: output
required: false
description: Path to save the NicheNet gene program gene count distributions plot.
example: nichenet_gp_gene_count_distributions.svg
- name: "--output_mebocost_gp_gene_count_distributions"
type: file
direction: output
required: false
description: Path to save the MeBocost gene program gene count distributions plot.
example: mebocost_gp_gene_count_distributions.svg
- name: "--output_collectri_tf_gp_gene_count_distributions"
type: file
direction: output
required: false
description: Path to save the CollecTRI TF gene program gene count distributions plot.
example: collectri_tf_gp_gene_count_distributions.svg
resources:
- type: python_script
path: script.py
- path: /src/utils/setup_logger.py
test_resources:
- type: python_script
path: test.py
- path: /resources_test/niche/
engines:
- type: docker
image: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
setup:
- type: apt
packages:
- libhdf5-dev
- python3-pip
- python3-dev
- python-is-python3
- type: docker
run: |
pip install torch --index-url https://download.pytorch.org/whl/cu124 \
&& pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html
- type: python
packages:
- numpy<2
- nichecompass
test_setup:
- type: python
__merge__: [ /src/base/requirements/viashpy.yaml, .]
runners:
- type: executable
- type: nextflow
directives:
label: [lowcpu, lowmem, lowdisk]

View File

@@ -0,0 +1,207 @@
import os
import sys
import shutil
import json
from nichecompass.utils import (
extract_gp_dict_from_mebocost_ms_interactions,
extract_gp_dict_from_nichenet_lrt_interactions,
extract_gp_dict_from_omnipath_lr_interactions,
filter_and_combine_gp_dict_gps_v2,
extract_gp_dict_from_collectri_tf_network,
)
## VIASH START
par = {
"species": "mouse",
"create_omnipath_gene_program_mask": True,
"create_nichenet_gene_program_mask": True,
"create_mebocost_gene_program_mask": True,
"create_collectri_tf_gene_program_mask": False,
# omnipath params
"input_gene_orthologs_mapping_file": "resources_test/niche/human_mouse_gene_orthologs.csv",
"omnipath_min_curation_effort": 2,
# nichenet params
"nichenet_version": "v2",
"nichenet_keep_target_genes_ratio": 1.0,
"nichenet_max_n_target_genes_per_gp": 250,
# mebocost_gene_program_mask
"input_metabolite_enzymes": "resources_test/niche/mouse_metabolite_enzymes.tsv",
"input_metabolite_sensors": "resources_test/niche/mouse_metabolite_sensors.tsv",
# filter and combine programs
"overlap_thresh_target_genes": 1.0,
# output paths
"output": "prior_knowledge_gene_program_mask.json",
"output_omnipath_lr_network": "omnipath_lr_network.csv",
"output_nichenet_lr_network": "nichenet_lr_network.csv",
"output_nichenet_ligand_target_matrix": "nichenet_ligand_target_matrix_v2_mouse.csv",
"output_collectri_tf_network": "collectri_tf_network.csv",
"output_omnipath_gp_gene_count_distributions": "omnipath_gp_gene_count_distributions.svg",
"output_nichenet_gp_gene_count_distributions": "nichenet_gp_gene_count_distributions.svg",
"output_mebocost_gp_gene_count_distributions": "mebocost_gp_gene_count_distributions.svg",
"output_collectri_tf_gp_gene_count_distributions": "collectri_tf_gp_gene_count_distributions.svg",
}
meta = {"temp_dir": "tmp/", "resources_dir": "src/utils/"}
## VIASH END
sys.path.append(meta["resources_dir"])
from setup_logger import setup_logger
logger = setup_logger()
# Validate that inputs are provided correctly
if not any(
[
par["create_omnipath_gene_program_mask"],
par["create_nichenet_gene_program_mask"],
par["create_mebocost_gene_program_mask"],
par["create_collectri_tf_gene_program_mask"],
]
):
raise ValueError("At least one gene program mask must be set to True")
if (
par["create_omnipath_gene_program_mask"]
and par["species"] == "mouse"
and not par["input_gene_orthologs_mapping_file"]
):
raise ValueError(
"For mouse species, a --input_gene_orthologs_mapping_file file must be provided for generating the omnipath mask."
)
if (
par["create_nichenet_gene_program_mask"]
and par["species"] == "mouse"
and not par["input_gene_orthologs_mapping_file"]
):
raise ValueError(
"For mouse species, a --input_gene_orthologs_mapping_file file must be provided for generating the nichenet mask."
)
if par["create_mebocost_gene_program_mask"] and (
not par["input_metabolite_enzymes"] or not par["input_metabolite_sensors"]
):
raise ValueError(
"For mebocost gene program mask, both --input_metabolite_enzymes and --input_metabolite_sensors files must be provided."
)
# Assemble gene program dictionaries
gp_dicts = []
if par["create_omnipath_gene_program_mask"]:
logger.info("Generating Omnipath gene program mask...")
plot_gp_gene_count_distributions = (
True if par["output_omnipath_gp_gene_count_distributions"] else False
)
save_to_disk = True if par["output_omnipath_lr_network"] else False
omnipath_gp_dict = extract_gp_dict_from_omnipath_lr_interactions(
species=par["species"],
min_curation_effort=par["omnipath_min_curation_effort"],
load_from_disk=False,
save_to_disk=True,
lr_network_file_path=par["output_omnipath_lr_network"],
gene_orthologs_mapping_file_path=par["input_gene_orthologs_mapping_file"],
plot_gp_gene_count_distributions=plot_gp_gene_count_distributions,
gp_gene_count_distributions_save_path=par[
"output_omnipath_gp_gene_count_distributions"
],
)
gp_dicts.append(omnipath_gp_dict)
if par["create_nichenet_gene_program_mask"]:
logger.info("Generating NicheNet gene program mask...")
plot_gp_gene_count_distributions = (
True if par["output_nichenet_gp_gene_count_distributions"] else False
)
save_to_disk = (
True
if (
par["output_nichenet_lr_network"]
or par["output_nichenet_ligand_target_matrix"]
)
else False
)
nichenet_gp_dict = extract_gp_dict_from_nichenet_lrt_interactions(
species=par["species"],
version=par["nichenet_version"],
keep_target_genes_ratio=par["nichenet_keep_target_genes_ratio"],
max_n_target_genes_per_gp=par["nichenet_max_n_target_genes_per_gp"],
load_from_disk=False,
save_to_disk=save_to_disk,
lr_network_file_path=par["output_nichenet_lr_network"],
ligand_target_matrix_file_path=par["output_nichenet_ligand_target_matrix"],
gene_orthologs_mapping_file_path=par["input_gene_orthologs_mapping_file"],
plot_gp_gene_count_distributions=plot_gp_gene_count_distributions,
gp_gene_count_distributions_save_path=par[
"output_nichenet_gp_gene_count_distributions"
],
)
gp_dicts.append(nichenet_gp_dict)
if par["create_mebocost_gene_program_mask"]:
logger.info("Generating MeBocost gene program mask...")
shutil.copy2(
par["input_metabolite_enzymes"],
os.path.join(meta["temp_dir"], f"{par['species']}_metabolite_enzymes.tsv"),
)
shutil.copy2(
par["input_metabolite_sensors"],
os.path.join(meta["temp_dir"], f"{par['species']}_metabolite_sensors.tsv"),
)
plot_gp_gene_count_distributions = (
True if par["output_mebocost_gp_gene_count_distributions"] else False
)
mebocost_gp_dict = extract_gp_dict_from_mebocost_ms_interactions(
dir_path=meta["temp_dir"],
species=par["species"],
plot_gp_gene_count_distributions=plot_gp_gene_count_distributions,
gp_gene_count_distributions_save_path=par[
"output_mebocost_gp_gene_count_distributions"
],
)
gp_dicts.append(mebocost_gp_dict)
if par["create_collectri_tf_gene_program_mask"]:
logger.info("Generating CollecTRI TF gene program mask...")
plot_gp_gene_count_distributions = (
True if par["output_collectri_tf_gp_gene_count_distributions"] else False
)
save_to_disk = True if par["output_collectri_tf_network"] else False
collectri_gp_dict = extract_gp_dict_from_collectri_tf_network(
species=par["species"],
save_to_disk=save_to_disk,
tf_network_file_path=par["output_collectri_tf_network"],
plot_gp_gene_count_distributions=plot_gp_gene_count_distributions,
gp_gene_count_distributions_save_path=par[
"output_collectri_tf_gp_gene_count_distributions"
],
)
gp_dicts.append(collectri_gp_dict)
# Filter and combine GPs
assert len(gp_dicts) > 0, "No gene program dictionaries were created."
combined_gp_dict = filter_and_combine_gp_dict_gps_v2(
gp_dicts,
overlap_thresh_target_genes=par["overlap_thresh_target_genes"],
verbose=True,
)
logger.info("Gene program mask generation completed.")
logger.info(
f"Number of gene programs after filtering and combining: {len(combined_gp_dict)}."
)
logger.info(f"Saving combined gene program mask to: {par['output']}")
with open(par["output"], "w") as f:
json.dump(combined_gp_dict, f)

View File

@@ -0,0 +1,132 @@
import pytest
import json
## VIASH START
meta = {
"executable": "./target/executable/nichecompass/gene_program_mask/gene_program_mask",
"resources_dir": "./resources_test/niche/",
}
## VIASH END
import sys
sys.path.append(meta["resources_dir"])
ortholog_file = f"{meta['resources_dir']}/niche/human_mouse_gene_orthologs.csv"
enzymes_file = f"{meta['resources_dir']}/niche/mouse_metabolite_enzymes.tsv"
sensors_file = f"{meta['resources_dir']}/niche/mouse_metabolite_sensors.tsv"
def test_simple_execution(run_component, tmp_path):
output = tmp_path / "output.json"
args = [
"--input_gene_orthologs_mapping_file",
ortholog_file,
"--input_metabolite_enzymes",
enzymes_file,
"--input_metabolite_sensors",
sensors_file,
"--species",
"mouse",
"--output",
output,
]
run_component(args)
# check files
assert output.is_file(), "Output file does not exist"
# Read gene program mask
with open(output, "r") as f:
gp_mask = json.load(f)
expected_gp_keys = [
"igand_receptor_GP",
"ligand_receptor_target_gene_GP",
"metabolite_enzyme_sensor_GP",
"TF_target_genes_GP",
"combined_GP",
]
matching_gp = []
for key in expected_gp_keys:
assert any(key in gp for gp in gp_mask.keys()), (
f"No gene programs containing '{key}' found"
)
gp = next(gp for gp in gp_mask.keys() if key in gp)
matching_gp.append(gp)
for gp in matching_gp:
expected_keys = [
"sources",
"targets",
"sources_categories",
"targets_categories",
]
assert all([key in gp_mask[gp] for key in expected_keys]), (
f"Gene program {gp} is missing expected keys"
)
def test_outputs(run_component, tmp_path):
output = tmp_path / "output.json"
omnipath_lr = tmp_path / "omnipath_lr_network.tsv"
nichenet_lr = tmp_path / "nichenet_lr_network.tsv"
nichenet_lt = tmp_path / "nichenet_ligand_target_matrix.csv"
collectri_tf = tmp_path / "output_collectri_tf_network.csv"
omnipath_distr = tmp_path / "omnipath_distr.svg"
nichenet_distr = tmp_path / "nichenet_distr.svg"
mebocost_distr = tmp_path / "mebocost_distr.svg"
collectri_distr = tmp_path / "collectri_distr.svg"
args = [
"--input_gene_orthologs_mapping_file",
ortholog_file,
"--input_metabolite_enzymes",
enzymes_file,
"--input_metabolite_sensors",
sensors_file,
"--species",
"mouse",
"--output",
output,
"--output_omnipath_lr_network",
omnipath_lr,
"--output_nichenet_lr_network",
nichenet_lr,
"--output_nichenet_ligand_target_matrix",
nichenet_lt,
"--output_collectri_tf_network",
collectri_tf,
"--output_omnipath_gp_gene_count_distributions",
omnipath_distr,
"--output_nichenet_gp_gene_count_distributions",
nichenet_distr,
"--output_mebocost_gp_gene_count_distributions",
mebocost_distr,
"--output_collectri_tf_gp_gene_count_distributions",
collectri_distr,
]
run_component(args)
expected_outputs = [
omnipath_lr,
nichenet_lr,
nichenet_lt,
collectri_tf,
omnipath_distr,
nichenet_distr,
mebocost_distr,
collectri_distr,
]
for output in expected_outputs:
assert output.is_file(), f"Expected output file {output} does not exist"
if __name__ == "__main__":
sys.exit(pytest.main([__file__]))

View File

@@ -42,30 +42,6 @@ argument_groups:
multiple: true
description: "Keys of the adata.obs fields to use as covariates."
- name: "Spatial Neighbors Calculation"
arguments:
- name: "--coord_type"
type: string
choices: ["generic", "grid"]
description: |
Type of coordinate system. Valid options are:
`grid` - grid coordinates.
`generic` - generic coordinates.
If not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used.
- name: "--n_spatial_neighbors"
type: integer
default: 6
description: |
Depending on `--coord_type`:
`grid` - number of neighboring tiles.
`generic` - number of neighborhoods for non-grid data. Only used when `--delaunay False`.
- name: "--delaunay"
type: boolean
default: false
description: |
Whether to use Delaunay triangulation to determine spatial neighborhood graph.
Only used when `--coord_type generic`.
- name: Gene Program Mask
arguments:
- name: "--min_genes_per_gp"
@@ -416,9 +392,9 @@ argument_groups:
default: nichecompass_active_gp_names
description: |
Key of the uns field where the active gene program names will be stored.
- name: "--output_uns_gene_index"
- name: "--output_uns_genes_index"
type: string
default: nichecompass_gene_idx
default: nichecompass_genes_idx
description: |
Key of the uns field where the index of a concatenated vector of target and source genes that are in the gene program masks will be stored.
- name: "--output_uns_target_genes_index"
@@ -447,6 +423,7 @@ argument_groups:
default: nichecompass_agg_weights
description: |
Key of the obsp field where the aggregation weights of the node label aggregator will be stored.
__merge__: [., /src/base/h5_compression_argument.yaml]
resources:
- type: python_script

View File

@@ -9,7 +9,7 @@ from torch.cuda import is_available as cuda_is_available
## VIASH START
par = {
# Inputs
"input": "resources_test/cosmx/Lung5_Rep2_tiny.h5mu",
"input": "work/8c/d3f8f50ac967abac81eabcda42798e/_viash_par/input_1/merged.obsp_block_concatenation.output.h5mu",
"modality": "rna",
"layer": None,
"input_gp_mask": "resources_test/niche/prior_knowledge_gp_mask.json",
@@ -98,21 +98,19 @@ logger.info("GPU enabled? %s", use_gpu)
## Read in data
adata = mu.read_h5ad(par["input"], mod=par["modality"])
# ## Compute spatial neighbor graph
# logger.info("Computing spatial neighbor graph...")
# # Compute connectivities and distances
# sq.gr.spatial_neighbors(
# adata,
# coord_type=par["coord_type"],
# spatial_key=par["input_obsm_spatial_coords"],
# n_neighs=par["n_spatial_neighbors"],
# delaunay=par["delaunay"],
# )
# # Making the connectivity matrix symmetric
# adata.obsp["spatial_connectivities"] = adata.obsp["spatial_connectivities"].maximum(
# adata.obsp["spatial_connectivities"].T
# )
# Counts need to be float32 to be processed by nichecompass model
# See https://discuss.pytorch.org/t/runtimeerror-mat1-and-mat2-must-have-the-same-dtype/166759
counts_dtype = (
adata.layers[par["layer"]].dtype if par["layer"] is not None else adata.X.dtype
)
if counts_dtype != "float32":
logger.info(
f"Converting count data to float32 from {counts_dtype} for model compatibility..."
)
if par["layer"] is not None:
adata.layers[par["layer"]] = adata.layers[par["layer"]].astype("float32")
else:
adata.X = adata.X.astype("float32")
## Add GP mask to data
logger.info("Adding prior knowledge gene program mask to data...")
@@ -125,7 +123,7 @@ add_gps_from_gp_dict_to_adata(
gp_targets_mask_key=par["output_varm_gp_targets_mask"],
gp_sources_mask_key=par["output_varm_gp_sources_mask"],
gp_names_key=par["output_uns_gp_names"],
genes_idx_key=par["output_uns_gene_index"],
genes_idx_key=par["output_uns_genes_index"],
target_genes_idx_key=par["output_uns_target_genes_index"],
source_genes_idx_key=par["output_uns_source_genes_index"],
min_genes_per_gp=par["min_genes_per_gp"],
@@ -148,12 +146,12 @@ model = NicheCompass(
gp_sources_mask_key=par["output_varm_gp_sources_mask"],
latent_key=par["output_obsm_embedding"],
cat_covariates_keys=par["input_obs_covariates"],
cat_covariates_no_edges=par["covariates_edges"],
cat_covariates_no_edges=par["covariate_edges"],
cat_covariates_embeds_keys=par["output_uns_covariate_embeddings"],
cat_covariates_embeds_injection_layers=par["covariate_embedding_injection_layers"],
gene_idx_key=par["output_uns_gene_index"],
target_gene_idx_key=par["output_uns_target_genes_index"],
source_gene_idx_key=par["output_uns_source_genes_index"],
cat_covariates_embeds_injection=par["covariate_embedding_injection_layers"],
genes_idx_key=par["output_uns_genes_index"],
target_genes_idx_key=par["output_uns_target_genes_index"],
source_genes_idx_key=par["output_uns_source_genes_index"],
recon_adj_key=par["output_obsp_reconstructed_adj_edge_proba"],
agg_weights_key=par["output_obsp_agg_weights"],
include_edge_recon_loss=par["include_edge_recon_loss"],
@@ -174,7 +172,6 @@ model = NicheCompass(
encoder_use_bn=par["encoder_use_bn"],
dropout_rate_encoder=par["dropout_rate_encoder"],
dropout_rate_graph_decoder=par["dropout_rate_graph_decoder"],
cat_covariates_cats=par["cat_covariates_cats"],
n_addon_gp=par["n_addon_gp"],
cat_covariates_embeds_nums=par["cat_covariates_embeds_nums"],
seed=par["random_state"],
@@ -212,6 +209,6 @@ model.train(
## Save model and data
logger.info("Saving NicheCompass model and data...")
mdata = mu.MuData({par["modality"]: adata})
mdata.write_h5mu(par["output"])
mdata.write_h5mu(par["output"], compression=par["output_compression"])
model.save(par["output_model"], save_adata=False)

View File

@@ -1,5 +1,6 @@
import pytest
import mudata as mu
import sys
## VIASH START
meta = {
@@ -24,13 +25,16 @@ def test_simple_execution_xenium(run_component, tmp_path):
gp_mask,
"--n_epochs",
"1",
"n_epochs_all_gps",
"--n_epochs_all_gps",
"0",
"n_epochs_no_edge_recon",
"--n_epochs_no_edge_recon",
"0",
"n_epochs_no_cat_covariates_contrastive",
"0--output",
"--n_epochs_no_cat_covariates_contrastive",
"0",
"--output",
str(output),
"--output_model",
"test_model",
"--output_compression",
"gzip",
]
@@ -50,17 +54,15 @@ def test_simple_execution_xenium(run_component, tmp_path):
"nichecompass_gp_names",
"nichecompass_active_gp_names",
]
assert all([uns in expected_uns_keys for uns in adata.uns.keys()])
assert all([uns in adata.uns.keys() for uns in expected_uns_keys]), (
f"Expected uns keys: {expected_uns_keys}, found: {list(adata.uns.keys())}"
)
assert len(adata.uns["nichecompass_gp_names"]) > len(
adata.uns["nichecompass_active_gp_names"]
), "Expected less active GP names than total GP names"
assert adata.uns["nichecompass_genes_idx"] == (
adata.uns["nichecompass_source_genes_idx"]
+ adata.uns["nichecompass_target_genes_idx"]
), "Expected genes idx to be union of source and target genes idx"
expected_obsm_keys = ["nichecompass_latent"]
assert all([obsm in expected_obsm_keys for obsm in adata.obsm.keys()]), (
assert all([obsm in adata.obsm.keys() for obsm in expected_obsm_keys]), (
"Not all expected obsm keys found"
)
assert all(adata.obsm[obsm].dtype.kind == "f" for obsm in expected_obsm_keys), (
@@ -73,7 +75,7 @@ def test_simple_execution_xenium(run_component, tmp_path):
"nichecompass_gp_sources_categories",
"nichecompass_gp_targets_categories",
]
assert all([varm in expected_varm_keys for varm in adata.varm.keys()]), (
assert all([varm in adata.varm.keys() for varm in expected_varm_keys]), (
"Not all expected varm keys found"
)
assert (
@@ -83,4 +85,4 @@ def test_simple_execution_xenium(run_component, tmp_path):
if __name__ == "__main__":
pytest.main([__file__])
sys.exit(pytest.main([__file__]))

View File

@@ -0,0 +1,371 @@
name: "nichecompass_leiden"
namespace: "workflows/niche"
scope: "public"
description: "A pipeline to compute the spatial neighborhood graph, perform nichecompass embedding followed by Leiden clustering."
authors:
- __merge__: /src/authors/dorien_roosen.yaml
roles: [ author, maintainer ]
- __merge__: /src/authors/weiwei_schultz.yaml
roles: [ contributor ]
info:
test_dependencies:
- name: nichecompass_leiden_test
namespace: test_workflows/niche
argument_groups:
- name: Inputs
arguments:
- name: "--id"
required: true
type: string
description: ID of the sample.
example: foo
- name: "--input"
alternatives: [-i]
description: Path to the sample.
required: true
example: input.h5mu
type: file
- name: "--input_gp_mask"
type: file
required: true
description: |
JSON file containing a nested dictionary containing the gene programs,
with keys being gene program names and values being dictionaries with keys `targets` and `sources`,
where `targets` contains a list of the names of genes in the gene program for the reconstruction of the gene expression of the node itself (receiving node)
and `sources` contains a list of the names of genes in the gene program for the reconstruction of the gene expression of the node's neighbors (transmitting nodes).
example: prior_knowledge_gp_mask.json
- name: "--modality"
description: Which modality to process.
type: string
default: "rna"
required: false
- name: "--layer"
description: "Use specified layer for calculation of qc metrics. If not specified, adata.X is used."
type: string
example: "raw_counts"
required: false
- name: "--input_obs_covariates"
type: string
multiple: true
default: ["sample_id"]
description: "Keys of the adata.obs fields to use as covariates."
- name: "--input_obsm_spatial_coords"
type: string
default: "spatial"
description: "Key in adata.obsm where spatial coordinates are stored"
- name: "Sample ID options"
description: |
Options for adding the id to .obs on the MuData object. Having a sample
id present in a requirement of several components for this pipeline.
arguments:
- name: "--include_sample_as_covariate"
description: |
Whether to include the sample information as a categorical covariate for the
NicheCompass model.
type: boolean
default: true
- name: "--add_id_to_obs"
description: "Add the value passed with --id to .obs."
type: boolean
default: true
- name: --add_id_obs_output
description: |
.Obs column to add the sample IDs to. Required and only used when
--add_id_to_obs is set to 'true'
type: string
default: "sample_id"
- name: "--add_id_make_observation_keys_unique"
type: boolean
description: |
Join the id to the .obs index (.obs_names).
Only used when --add_id_to_obs is set to 'true'.
default: true
- name: "Spatial Neighbors Calculation"
description: |
Options for the calculation of the spatial neighborhood graph.
arguments:
- name: "--coord_type"
type: string
choices: ["generic", "grid"]
description: |
Type of coordinate system provided by `--input_obsm_spatial_coords`. Valid options are:
`grid` - grid coordinates.
`generic` - generic coordinates.
If not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used.
- name: "--n_spatial_neighbors"
type: integer
default: 6
description: |
Depending on `--coord_type`:
`grid` - number of neighboring tiles.
`generic` - number of neighborhoods for non-grid data. Only used when `--delaunay False`.
- name: "--delaunay"
type: boolean
default: false
description: |
Whether to use Delaunay triangulation to determine spatial neighborhood graph.
Only used when `--coord_type generic`.
- name: Gene Program Mask
description: Options for filtering gene programs based on the number of genes available in the data.
arguments:
- name: "--min_genes_per_gp"
type: integer
default: 1
min: 0
description: |
Minimum number of genes in a gene program inluding both target and source genes that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.
- name: "--min_source_genes_per_gp"
type: integer
default: 0
min: 0
description: |
Minimum number of source genes in a gene program that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.
- name: "--min_target_genes_per_gp"
type: integer
default: 0
min: 0
description: |
Minimum number of target genes in a gene program that need to be available in the input data (gene expression has been probed) for a gene program not to be discarded.
- name: "--max_genes_per_gp"
type: integer
min: 1
description: |
Maximum number of genes in a gene program inluding both target and source genes that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.
- name: "--max_source_genes_per_gp"
type: integer
min: 1
description: |
Maximum number of source genes in a gene program that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.
- name: "--max_target_genes_per_gp"
type: integer
min: 1
description: |
Maximum number of target genes in a gene program that can be available in the input data (gene expression has been probed) for a gene program not to be discarded.
- name: "--filter_genes_not_in_masks"
type: boolean_true
description: |
Whether to remove the genes that are not in the gp masks from the input data.
- name: NicheCompass Model Architecture
description: Options for the NicheCompass model architecture.
arguments:
- name: "--covariate_edges"
type: boolean
multiple: true
description: |
List of booleans that indicate whether there can be edges between different categories of the categorical covariates.
If this is `True` for a specific categorical covariate, this covariate will be excluded from the edge reconstruction loss.
Needs to match the length and order of `--input_obs_covariates`.
- name: "--gene_expr_recon_dist"
type: string
choices: ["nb", "zinb"]
default: "nb"
description: |
The distribution used for gene expression reconstruction.
If `nb`, uses a negative binomial distribution.
If `zinb`, uses a zero-inflated negative binomial distribution.
- name: "--log_variational"
type: boolean
default: true
description: |
Whether to transform x by log(x+1) prior to encoding for numerical stability (not for normalization).
- name: "--node_label_method"
type: string
choices: ["one-hop-norm", "two-hop-norm", "one-hop-attention"]
default: "one-hop-norm"
description: |
Node label method that will be used for omics reconstruction.
If `one-hop-sum`, uses a concatenation of the node's input features with the sum of the input features of all nodes in the node's one-hop neighborhood.
If `one-hop-norm`, uses a concatenation of the node's input features with the node's one-hop neighbors input features normalized as per Kipf, T. N. & Welling, M. Semi-Supervised Classification with Graph Convolutional Networks. arXiv [cs.LG] (2016).
If `one-hop-attention`, uses a concatenation of the node's input features with the node's one-hop neighbors input features weighted by an attention mechanism.
- name: "--active_gp_thresh_ratio"
type: double
default: 0.1
min: 0.0
max: 1.0
description: |
Ratio that determines which gene programs are considered active and are used in the latent representation after model training.
All inactive gene programs will be dropped during model training after a determined number of epochs.
Aggregations of the absolute values of the gene weights of the gene expression decoder per gene program are calculated.
The maximum value, i.e. the value of the gene program with the highest aggregated value will be used as a benchmark and all gene programs whose aggregated value is smaller than `--active_gp_thresh_ratio` times this maximum value will be set to inactive.
If set to 0, all gene programs will be considered active.
- name: "--active_gp_type"
type: string
choices: ["mixed", "separate"]
default: "separate"
description: |
Type to determine active gene programs.
Can be `mixed`, in which case active gene programs are determined across prior and add-on gene programs jointly,
or `separate` in which case they are determined separately for prior and add-on gene programs.
- name: "--n_addon_gp"
type: integer
default: 100
min: 0
description: |
Number of addon gene programs (i.e. gene programs that are not included in masks but can be learned de novo).
- name: "--cat_covariates_embeds_nums"
type: integer
multiple: true
description: |
Number of embedding nodes for all categorical covariates.
Must be the same length as `--input_obs_covariates`.
- name: "--random_state"
default: 0
type: integer
min: 0
description: |
Random seed for reproducibility.
- name: NicheCompass Training Parameters
description: Options for training the NicheCompass model.
arguments:
- name: "--n_epochs"
type: integer
min: 1
default: 100
description: Number of training epochs
- name: "--n_epochs_all_gps"
type: integer
min: 0
default: 25
description: |
Number of epochs during which all gene programs are used for model training.
After that only active gene programs are retained.
- name: "--n_epochs_no_edge_recon"
type: integer
default: 0
min: 0
description: |
Number of epochs during which the edge reconstruction loss is excluded from backpropagation for pretraining using the other loss components.
- name: "--n_epochs_no_cat_covariates_contrastive"
type: integer
default: 5
min: 0
description: |
Number of epochs during which the categorical covariates contrastive loss is excluded from backpropagation for pretraining using the other loss components.
- name: "--lr"
type: double
default: 0.001
min: 0.0
max: 1.0
description: Learning rate
- name: "--weight_decay"
type: double
default: 0.001
description: Weight decay (L2 penalty).
- name: "--edge_val_ratio"
type: double
default: 0.1
min: 0.0
max: 1.0
description: |
Fraction of the data that is used as validation set on edge-level. The rest of the data will be used as training set on edge-level.
- name: "--node_val_ratio"
type: double
default: 0.1
min: 0.0
max: 1.0
description: |
Fraction of the data that is used as validation set on node-level. The rest of the data will be used as training set on node-level.
- name: "--edge_batch_size"
type: integer
min: 1
default: 256
description: |
Batch size for the edge-level dataloaders.
- name: "--node_batch_size"
type: integer
min: 1
description: |
Batch size for the node-level dataloaders.
If not provided, is automatically determined based on `--edge_batch_size`.
- name: "--n_sampled_neighbors"
type: integer
default: -1
min: -1
description: |
Number of neighbors that are sampled during model training from the spatial neighborhood graph.
If set to -1, all direct neighbors are included.
- name: Clustering options
arguments:
- name: "--obs_cluster"
type: string
description: |
Prefix for the .obs keys under which to add the cluster labels. Newly created columns in .obs will
be created from the specified value for '--obs_cluster' suffixed with an underscore and one of the resolutions
resolutions specified in '--leiden_resolution'.
default: "nichecompass_leiden"
- name: "--leiden_resolution"
type: double
description: Control the coarseness of the clustering. Higher values lead to more clusters.
default: [1]
multiple: true
- name: Umap options
arguments:
- name: "--obsm_umap"
type: string
default: "X_leiden_nichecompass_umap"
required: false
description: "In which .obsm slot to store the resulting UMAP embedding."
- name: Neighbour calculation
arguments:
- name: "--uns_neighbors"
type: string
default: nichecompass_neighbors
description: In which .uns slot to store various neighbor output objects.
- name: "--obsp_neighbor_distances"
type: string
default: "nichecompass_distances"
description: "In which .obsp slot to store the distance matrix between the resulting neighbors."
- name: "--obsp_neighbor_connectivities"
type: string
default: "nichecompass_connectivities"
description: "In which .obsp slot to store the connectivities matrix between the resulting neighbors."
- name: "Outputs"
arguments:
- name: "--output"
type: file
required: true
direction: output
description: Destination path to the output.
example: output.h5mu
- name: "--output_model"
type: file
required: true
direction: output
description: Directory to save the trained NicheCompass model.
- name: "--output_obsm_embedding"
type: string
default: nichecompass_latent
description: |
Key of the obsm field where the latent / gene program representation of active gene programs will be stored after NicheCompass model training.
dependencies:
- name: dataflow/obsp_block_concatenation
- name: neighbors/spatial_neighborhood_graph
- name: nichecompass/nichecompass
- name: metadata/add_id
repository: openpipeline
- name: workflows/multiomics/neighbors_leiden_umap
repository: openpipeline
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
test_resources:
- type: nextflow_script
path: test.nf
entrypoint: test_wf
- path: /resources_test/xenium/xenium_tiny.h5mu
- path: /resources_test/cosmx/Lung5_Rep2_tiny.h5mu
- path: /resources_test/niche/prior_knowledge_gp_mask.json
runners:
- type: nextflow

View File

@@ -0,0 +1,16 @@
#!/bin/bash
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"
nextflow \
run . \
-main-script src/workflows/niche/nichecompass_leiden/test.nf \
-entry test_wf \
-resume \
-profile docker,no_publish \
-c src/workflows/utils/labels_ci.config \
-c src/workflows/utils/integration_tests.config

View File

@@ -0,0 +1,162 @@
workflow run_wf {
take:
input_ch
main:
output_ch = input_ch
| map { id, state ->
[id, state + [
workflow_output: state.output,
_meta: [join_id: id]
]]
}
// If requested, add the id of the events (samples) to a column in .obs.
// Also allows to make .obs_names (the .obs index) unique, by prefixing the values with an unique id per .h5mu file.
// The latter is usefull to avoid duplicate observations during concatenation.
| add_id.run(
filter: {id, state -> state.add_id_to_obs },
fromState: {id, state ->
def newState = [
"input": state.input,
"input_id": id,
"make_observation_keys_unique": state.add_id_make_observation_keys_unique,
"obs_output": state.add_id_obs_output,
"add_id_to_obs": state.add_id_to_obs
]
newState
},
toState: {id, output, state ->
def keysToRemove = ["add_id_to_obs", "add_id_obs_output", "add_id_make_observation_keys_unique"]
def newState = state.findAll{it.key !in keysToRemove}
newState + ["input": output.output]
}
)
| spatial_neighborhood_graph.run(
fromState: {id, state -> [
"input": state.input,
"modality": state.modality,
"layer": state.layer,
"input_obsm_spatial_coords": state.input_obsm_spatial_coords,
"coord_type": state.coord_type,
"n_spatial_neighbors": state.n_spatial_neighbors,
"delaunay": state.delaunay
]},
toState: {id, output, state ->
def keysToRemove = ["input_obsm_spatial_coords", "coord_type", "n_spatial_neighbors", "delaunay"]
def newState = state.findAll{it.key !in keysToRemove}
newState + ["input": output.output]
}
)
| joinStates { ids, states ->
def newId = "merged"
// gather keys with unique values across states that should be combined
def new_state_non_unique_values = [
input: states.collect{it.input},
input_id: ids,
_meta: [join_id: ids[0]]
]
// gather keys from different states
def all_state_keys = states.inject([].toSet()){ current_keys, state ->
def new_keys = current_keys + state.keySet()
return new_keys
}.minus(["output", "id", "input", "_meta"])
// Create the new state from the keys, values should be the same across samples
def new_state = all_state_keys.inject([:]){ old_state, argument_name ->
argument_values = states.collect{it.get(argument_name)}.unique()
assert argument_values.size() == 1, "Arguments should be the same across samples. Argument name: $argument_name, \
argument value: $argument_values"
// take the unique value from the set (there is only one)
def argument_value
argument_values.each { argument_value = it }
def current_state = old_state + [(argument_name): argument_value]
return current_state
}
def data_state = new_state_non_unique_values + new_state
[ newId, data_state ]
}
| obsp_block_concatenation.run(
fromState: { id, state -> [
"input": state.input,
"modality": state.modality,
"input_id": state.input_id
]},
toState: {id, output, state ->
def keysToRemove = ["input_id"]
def newState = state.findAll{it.key !in keysToRemove}
newState + ["input": output.output]
}
)
| nichecompass.run(
fromState: {id, state -> [
"input": state.input,
"input_gp_mask": state.input_gp_mask,
"input_obs_covariates": state.input_obs_covariates,
"modality": state.modality,
"layer": state.layer,
"min_genes_per_gp": state.min_genes_per_gp,
"min_source_genes_per_gp": state.min_source_genes_per_gp,
"min_target_genes_per_gp": state.min_target_genes_per_gp,
"max_genes_per_gp": state.max_genes_per_gp,
"max_source_genes_per_gp": state.max_source_genes_per_gp,
"max_target_genes_per_gp": state.max_target_genes_per_gp,
"filter_genes_not_in_masks": state.filter_genes_not_in_masks,
"covariate_edges": state.covariate_edges,
"gene_expr_recon_distribution": state.gene_expr_recon_dist,
"log_variational": state.log_variational,
"node_label_method": state.node_label_method,
"active_gp_thresh_ratio": state.active_gp_thresh_ratio,
"active_gp_type": state.active_gp_type,
"n_addon_gp": state.n_addon_gp,
"cat_covariates_embeds_nums": state.cat_covariates_embeds_nums,
"random_state": state.random_state,
"n_epochs": state.n_epochs,
"n_epochs_all_gps": state.n_epochs_all_gps,
"n_epochs_no_edge_recon": state.n_epochs_no_edge_recon,
"n_epochs_no_cat_covariates_contrastive_loss": state.n_epochs_no_cat_covariates_contrastive_loss,
"lr": state.lr,
"weight_decay": state.weight_decay,
"edge_val_ratio": state.edge_val_ratio,
"node_val_ratio": state.node_val_ratio,
"edge_batch_size": state.edge_batch_size,
"node_batch_size": state.node_batch_size,
"n_sampled_neighbors": state.n_sampled_neighbors,
"output_obsm_embedding": state.output_obsm_embedding,
"output_model": state.output_model
]},
args: [
"input_obsm_spatial_connectivities": "spatial_connectivities"
],
toState: [
"input": "output",
"output_model": "output_model"
]
)
| neighbors_leiden_umap.run(
fromState: { id, state -> [
"input": state.input,
"modality": state.modality,
"obsm_input": state.output_obsm_embedding,
"output": state.workflow_output,
"uns_neighbors": state.uns_neighbors,
"obsp_neighbor_distances": state.obsp_neighbor_distances,
"obsp_neighbor_connectivities": state.obsp_neighbor_connectivities,
"leiden_resolution": state.leiden_resolution,
"obs_cluster": state.obs_cluster,
"obsm_umap": state.obsm_umap,
]},
toState: ["output": "output"]
)
| setState(["output": "output", "output_model": "output_model", "_meta": "_meta"])
| view()
emit:
output_ch
}

View File

@@ -0,0 +1,10 @@
manifest {
nextflowVersion = '!>=20.12.1-edge'
}
params {
rootDir = java.nio.file.Paths.get("$projectDir/../../../../").toAbsolutePath().normalize().toString()
}
// include common settings
includeConfig("${params.rootDir}/src/workflows/utils/labels.config")

View File

@@ -0,0 +1,70 @@
nextflow.enable.dsl=2
include { nichecompass_leiden } from params.rootDir + "/target/nextflow/workflows/niche/nichecompass_leiden/main.nf"
include { nichecompass_leiden_test } from params.rootDir + "/target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf"
params.resources_test = params.rootDir + "/resources_test"
workflow test_wf {
resources_test = file(params.resources_test)
output_ch =
Channel.fromList([
[
id: "xenium",
input: resources_test.resolve("xenium/xenium_tiny.h5mu"),
input_gp_mask: resources_test.resolve("niche/prior_knowledge_gp_mask.json"),
n_epochs: 1,
n_epochs_all_gps: 0,
n_epochs_no_edge_recon: 0,
n_epochs_no_cat_covariates_contrastive_loss: 0,
output_model: "simple_execution_test_model"
],
[
id: "cosmx",
input: resources_test.resolve("cosmx/Lung5_Rep2_tiny.h5mu"),
input_gp_mask: resources_test.resolve("niche/prior_knowledge_gp_mask.json"),
n_epochs: 1,
n_epochs_all_gps: 0,
n_epochs_no_edge_recon: 0,
n_epochs_no_cat_covariates_contrastive_loss: 0,
output_model: "simple_execution_test_model"
]
])
| map { state -> [state.id, state] }
| nichecompass_leiden.run(
toState: { id, output, state -> output + [og_input: state.input] }
)
| view { output ->
assert output.size() == 2 : "Outputs should contain two elements; [id, state]"
// check id
def id = output[0]
assert id == "merged"
// check output
def state = output[1]
assert state instanceof Map : "State should be a map. Found: ${state}"
assert state.containsKey("output") : "Output should contain key 'output'."
assert state.output.isFile() : "'output' should be a file."
assert state.output.toString().endsWith(".h5mu") : "Output file should end with '.h5mu'. Found: ${state.output}"
// check model_output
assert state.containsKey("output_model") : "Output should contain key 'output_model'."
assert state.output_model.isDirectory() : "'output_model' should be a directory."
assert state.output_model.toString().endsWith("_model") : "Model output directory should end with '_model'. Found: ${state.output_model}"
"Output: $output"
}
| nichecompass_leiden_test.run(
fromState: [
"input": "output"
]
)
| toSortedList({a, b -> a[0] <=> b[0]})
| map { output_list ->
assert output_list.size() == 1 : "output channel should contain 1 events"
}
}

View File

@@ -0,0 +1,25 @@
name: "nichecompass_leiden_test"
namespace: "test_workflows/niche"
scope: "test"
description: "This component tests the output of nichecompass leiden workflow."
authors:
- __merge__: /src/authors/dorien_roosen.yaml
argument_groups:
- name: Inputs
arguments:
- name: "--input"
type: file
required: true
description: Path to h5mu output.
example: foo.h5mu
resources:
- type: python_script
path: script.py
- path: /src/utils/setup_logger.py
engines:
- type: docker
image: python:3.12-slim
__merge__: /src/base/requirements/testworkflows_setup.yaml
runners:
- type: executable
- type: nextflow

View File

@@ -0,0 +1,64 @@
from mudata import read_h5mu
import sys
import pytest
##VIASH START
par = {"input": "nichecompass_leiden/output.h5mu"}
meta = {"resources_dir": "resources_test"}
##VIASH END
def test_run():
input_mudata = read_h5mu(par["input"])
expected_mod = ["rna"]
expected_obsm = ["X_leiden_nichecompass_umap", "nichecompass_latent"]
expected_obs = ["sample_id", "nichecompass_leiden_1.0"]
expected_obsp = [
"spatial_distances",
"spatial_connectivities",
"nichecompass_connectivities",
"nichecompass_distances"
]
expected_varm = [
"nichecompass_gp_sources",
"nichecompass_gp_targets",
"nichecompass_gp_sources_categories",
"nichecompass_gp_targets_categories"
]
expected_uns = [
"nichecompass_sources_categories_label_encoder",
"nichecompass_targets_categories_label_encoder",
"nichecompass_source_genes_idx",
"nichecompass_target_genes_idx",
"nichecompass_genes_idx",
"nichecompass_gp_names",
"nichecompass_active_gp_names",
"nichecompass_neighbors",
"spatial",
"xenium_spatial_neighbors"
]
assert all(key in list(input_mudata.mod) for key in expected_mod), (
f"Input modalities should be: {expected_mod}, found: {input_mudata.mod.keys()}."
)
assert all(key in list(input_mudata.mod["rna"].obsm) for key in expected_obsm), (
f"Input mod['rna'] obsm columns should be: {expected_obsm}, found: {input_mudata.mod['rna'].obsm.keys()}."
)
assert all(key in list(input_mudata.mod["rna"].obs) for key in expected_obs), (
f"Input mod['rna'] obs columns should be: {expected_obs}, found: {input_mudata.mod['rna'].obs.keys()}."
)
assert all(key in list(input_mudata.mod["rna"].obsp) for key in expected_obsp), (
f"Input mod['rna'] obsp columns should be: {expected_obsp}, found: {input_mudata.mod['rna'].obsp.keys()}."
)
assert all(key in list(input_mudata.mod["rna"].uns) for key in expected_uns), (
f"Input mod['rna'] uns columns should be: {expected_uns}, found: {input_mudata.mod['rna'].uns.keys()}."
)
assert all(key in list(input_mudata.mod["rna"].varm) for key in expected_varm), (
f"Input mod['rna'] varm columns should be: {expected_varm}, found: {input_mudata.mod['rna'].varm.keys()}."
)
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "--import-mode=importlib"]))

View File

@@ -228,7 +228,7 @@ build_info:
output: "target/_private/executable/filter/subset_cosmx"
executable: "target/_private/executable/filter/subset_cosmx/subset_cosmx"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -458,9 +458,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz"
LABEL org.opencontainers.image.description="Companion container for running component filter subset_cosmx"
LABEL org.opencontainers.image.created="2025-12-08T20:39:05Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:07Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -228,7 +228,7 @@ build_info:
output: "target/_private/nextflow/filter/subset_cosmx"
executable: "target/_private/nextflow/filter/subset_cosmx/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3334,7 +3334,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/_private/nextflow/filter/subset_cosmx",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -0,0 +1,185 @@
name: "nichecompass_leiden_test"
namespace: "test_workflows/niche"
version: "niche-compass"
authors:
- name: "Dorien Roosen"
info:
role: "Core Team Member"
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Path to h5mu output."
info: null
example:
- "foo.h5mu"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "setup_logger.py"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "This component tests the output of nichecompass leiden workflow."
info: null
status: "enabled"
scope:
image: "test"
target: "test"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "niche-compass"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "git"
interactive: false
- type: "python"
user: false
packages:
- "anndata~=0.11.1"
- "mudata~=0.3.1"
- "viashpy==0.9.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
script:
- "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\
nelse: exit(1)\")"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/_test/executable/test_workflows/niche/nichecompass_leiden_test"
executable: "target/_test/executable/test_workflows/niche/nichecompass_leiden_test/nichecompass_leiden_test"
viash_version: "0.9.4"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"
version: "niche-compass"
info:
test_resources:
- type: "s3"
path: "s3://openpipelines-bio/openpipeline_spatial/resources_test"
dest: "resources_test"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'niche-compass'"
organization: "vsh"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"

View File

@@ -0,0 +1,185 @@
name: "nichecompass_leiden_test"
namespace: "test_workflows/niche"
version: "niche-compass"
authors:
- name: "Dorien Roosen"
info:
role: "Core Team Member"
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input"
description: "Path to h5mu output."
info: null
example:
- "foo.h5mu"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "setup_logger.py"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "This component tests the output of nichecompass leiden workflow."
info: null
status: "enabled"
scope:
image: "test"
target: "test"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "python:3.12-slim"
target_registry: "images.viash-hub.com"
target_tag: "niche-compass"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "procps"
- "git"
interactive: false
- type: "python"
user: false
packages:
- "anndata~=0.11.1"
- "mudata~=0.3.1"
- "viashpy==0.9.0"
github:
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
script:
- "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\
nelse: exit(1)\")"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/workflows/test_workflows/niche/nichecompass_leiden/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test"
executable: "target/_test/nextflow/test_workflows/niche/nichecompass_leiden_test/main.nf"
viash_version: "0.9.4"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"
version: "niche-compass"
info:
test_resources:
- type: "s3"
path: "s3://openpipelines-bio/openpipeline_spatial/resources_test"
dest: "resources_test"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'niche-compass'"
organization: "vsh"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
manifest {
name = 'test_workflows/niche/nichecompass_leiden_test'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'niche-compass'
description = 'This component tests the output of nichecompass leiden workflow.'
author = 'Dorien Roosen'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}
includeConfig("nextflow_labels.config")

View File

@@ -301,7 +301,7 @@ build_info:
output: "target/executable/convert/from_cells2stats_to_h5mu"
executable: "target/executable/convert/from_cells2stats_to_h5mu/from_cells2stats_to_h5mu"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component convert from_cells2stats_to_h5mu"
LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -238,7 +238,7 @@ build_info:
output: "target/executable/convert/from_cosmx_to_h5mu"
executable: "target/executable/convert/from_cosmx_to_h5mu/from_cosmx_to_h5mu"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -460,9 +460,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz"
LABEL org.opencontainers.image.description="Companion container for running component convert from_cosmx_to_h5mu"
LABEL org.opencontainers.image.created="2025-12-08T20:39:07Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -234,7 +234,7 @@ build_info:
output: "target/executable/convert/from_cosmx_to_spatialexperiment"
executable: "target/executable/convert/from_cosmx_to_spatialexperiment/from_cosmx_to_spatialexperiment"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -457,9 +457,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component convert from_cosmx_to_spatialexperiment"
LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -224,7 +224,7 @@ build_info:
output: "target/executable/convert/from_h5mu_to_spatialexperiment"
executable: "target/executable/convert/from_h5mu_to_spatialexperiment/from_h5mu_to_spatialexperiment"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -458,9 +458,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("remotes", quietly = TR
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component convert from_h5mu_to_spatialexperiment"
LABEL org.opencontainers.image.created="2025-12-08T20:39:07Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -221,7 +221,7 @@ build_info:
output: "target/executable/convert/from_spatialdata_to_h5mu"
executable: "target/executable/convert/from_spatialdata_to_h5mu/from_spatialdata_to_h5mu"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz"
LABEL org.opencontainers.image.description="Companion container for running component convert from_spatialdata_to_h5mu"
LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:07Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -244,7 +244,7 @@ build_info:
output: "target/executable/convert/from_xenium_to_h5mu"
executable: "target/executable/convert/from_xenium_to_h5mu/from_xenium_to_h5mu"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_h5mu"
LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -326,7 +326,7 @@ build_info:
output: "target/executable/convert/from_xenium_to_spatialdata"
executable: "target/executable/convert/from_xenium_to_spatialdata/from_xenium_to_spatialdata"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dorien Roosen, Weiwei Schultz"
LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_spatialdata"
LABEL org.opencontainers.image.created="2025-12-08T20:39:05Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -224,7 +224,7 @@ build_info:
output: "target/executable/convert/from_xenium_to_spatialexperiment"
executable: "target/executable/convert/from_xenium_to_spatialexperiment/from_xenium_to_spatialexperiment"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -457,9 +457,9 @@ RUN Rscript -e 'options(warn = 2); if (!requireNamespace("BiocManager", quietly
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component convert from_xenium_to_spatialexperiment"
LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -296,7 +296,7 @@ build_info:
output: "target/executable/dataflow/obsp_block_concatenation"
executable: "target/executable/dataflow/obsp_block_concatenation/obsp_block_concatenation"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -459,9 +459,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont, Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component dataflow obsp_block_concatenation"
LABEL org.opencontainers.image.created="2025-12-08T20:39:07Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -426,7 +426,7 @@ build_info:
output: "target/executable/mapping/spaceranger_count"
executable: "target/executable/mapping/spaceranger_count/spaceranger_count"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -453,9 +453,9 @@ apt upgrade -y && apt install -y procps && rm -rf /var/lib/apt/lists/*
LABEL org.opencontainers.image.authors="Jakub Majercik"
LABEL org.opencontainers.image.description="Companion container for running component mapping spaceranger_count"
LABEL org.opencontainers.image.created="2025-12-08T20:39:07Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER

View File

@@ -1,5 +1,5 @@
name: "neighbors"
namespace: "spatial_neighborhood_graph"
name: "spatial_neighborhood_graph"
namespace: "neighbors"
version: "niche-compass"
authors:
- name: "Dorien Roosen"
@@ -269,10 +269,10 @@ build_info:
config: "src/neighbors/spatial_neighborhood_graph/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/spatial_neighborhood_graph/neighbors"
executable: "target/executable/spatial_neighborhood_graph/neighbors/neighbors"
output: "target/executable/neighbors/spatial_neighborhood_graph"
executable: "target/executable/neighbors/spatial_neighborhood_graph/spatial_neighborhood_graph"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -0,0 +1,68 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// CPU resources
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
// Memory resources
withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
// Disk space
// Nextflow apparently can't handle empty directives, i.e.
// withLabel: lowdisk {}
// so for that reason we have to add a dummy directive
withLabel: lowdisk {
dummyDirective = "dummyValue"
}
withLabel: middisk {
dummyDirective = "dummyValue"
}
withLabel: highdisk {
dummyDirective = "dummyValue"
}
withLabel: veryhighdisk {
dummyDirective = "dummyValue"
}
// NOTE: The above labels intentionally do not have an effect by default.
// The user should set the disk space requirements by adding the following
// to the compute environment:
//
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
// withLabel: middisk { disk = { 100.GB * task.attempt } }
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,12 @@
def setup_logger():
import logging
from sys import stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler(stdout)
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
console_handler.setFormatter(logFormatter)
logger.addHandler(console_handler)
return logger

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env bash
# neighbors niche-compass
# spatial_neighborhood_graph niche-compass
#
# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -165,8 +165,8 @@ VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}`
VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR`
# define meta fields
VIASH_META_NAME="neighbors"
VIASH_META_FUNCTIONALITY_NAME="neighbors"
VIASH_META_NAME="spatial_neighborhood_graph"
VIASH_META_FUNCTIONALITY_NAME="spatial_neighborhood_graph"
VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME"
VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
VIASH_META_TEMP_DIR="$VIASH_TEMP"
@@ -457,10 +457,10 @@ RUN pip install --upgrade pip && \
python -c 'exec("try:\n import awkward\nexcept ModuleNotFoundError:\n exit(0)\nelse: exit(1)")'
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component spatial_neighborhood_graph neighbors"
LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z"
LABEL org.opencontainers.image.description="Companion container for running component neighbors spatial_neighborhood_graph"
LABEL org.opencontainers.image.created="2025-12-13T13:27:08Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER
@@ -578,7 +578,7 @@ VIASH_DOCKER_RUN_ARGS=(-i --rm)
# ViashHelp: Display helpful explanation about this executable
function ViashHelp {
echo "neighbors niche-compass"
echo "spatial_neighborhood_graph niche-compass"
echo ""
echo "Calculates a spatial neighborhood graph."
echo ""
@@ -687,7 +687,7 @@ while [[ $# -gt 0 ]]; do
shift 1
;;
--version)
echo "neighbors niche-compass"
echo "spatial_neighborhood_graph niche-compass"
exit
;;
--input)
@@ -877,7 +877,7 @@ if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
# determine docker image id
if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then
VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/openpipeline_spatial/spatial_neighborhood_graph/neighbors:niche-compass'
VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/openpipeline_spatial/neighbors/spatial_neighborhood_graph:niche-compass'
fi
# print dockerfile
@@ -1207,7 +1207,7 @@ fi
ViashDebug "Running command: $(echo $VIASH_CMD)"
cat << VIASHEOF | eval $VIASH_CMD
set -e
tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-neighbors-XXXXXX").py
tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-spatial_neighborhood_graph-XXXXXX").py
function clean_up {
rm "\$tempscript"
}

View File

@@ -0,0 +1,469 @@
name: "gene_program_mask"
namespace: "nichecompass"
version: "niche-compass"
authors:
- name: "Dorien Roosen"
roles:
- "maintainer"
info:
role: "Core Team Member"
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input_gene_orthologs_mapping_file"
description: "Path to a CSV file mapping human genes to mouse orthologs.\nRequired\
\ for the OmniPath and NicheNet masks if `--species mouse`.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_metabolite_enzymes"
description: "Path to the MeBocost metabolite-enzymes TSV file.\nRequired for\
\ generating the MeBocost gene program mask.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_metabolite_sensors"
description: "Path to the MeBocost metabolite-sensors TSV file.\nRequired for\
\ generating the MeBocost gene program mask.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Parameters"
arguments:
- type: "string"
name: "--species"
description: "Species of the organism (human or mouse)."
info: null
default:
- "human"
required: false
choices:
- "human"
- "mouse"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--create_omnipath_gene_program_mask"
description: "Whether to create the OmniPath gene program mask."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--create_nichenet_gene_program_mask"
description: "Whether to create the NicheNet gene program mask."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--create_mebocost_gene_program_mask"
description: "Whether to create the MeBocost gene program mask."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--create_collectri_tf_gene_program_mask"
description: "Whether to create the CollecTRI TF gene program mask."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--overlap_thresh_target_genes"
description: "The minimum ratio of target genes that need to overlap between a\
\ GP without source genes and another GP for the GP to be dropped.\nGene programs\
\ with different source genes are never combined or dropped.\n"
info: null
default:
- 1.0
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Omnipath Parameters"
arguments:
- type: "integer"
name: "--omnipath_min_curation_effort"
description: "Minimum number of times an interaction has to be described in a\
\ paper and mentioned in a database to be included in the OmniPath gene programs."
info: null
default:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "NicheNet Parameters"
arguments:
- type: "string"
name: "--nichenet_version"
description: "Version of the NicheNet ligand receptor network and ligand target\
\ gene regulatory potential matrix.\n´v2´ is an improved version of ´v1´, and\
\ has separate files for mouse and human.\n"
info: null
default:
- "v2"
required: false
choices:
- "v1"
- "v2"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--nichenet_keep_target_genes_ratio"
description: "Ratio of target genes that are kept compared to total target genes.\n\
This ratio is applied over the entire matrix (not on gene program level), and\
\ determines the ´all_gps_score_keep_threshold´, which will be used to filter\
\ target genes according to their regulatory potential scores.\n"
info: null
default:
- 1.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--nichenet_max_n_target_genes_per_gp"
description: "Maximum number of target genes per gene program. If a gene program\
\ has more target genes than ´max_n_target_genes_per_gp´, only the ´max_n_target_genes_per_gp´\
\ gene programs with the highest regulatory potential scores will be kept.\n\
Default value is chosen based on MultiNicheNet specification (s. Browaeys, R.\
\ et al. MultiNicheNet: a flexible framework for differential cell-cell communication\
\ analysis from multi-sample multi-condition single-cell transcriptomics data.\
\ bioRxiv (2023) doi:10.1101/2023.06.13.544751).\n"
info: null
default:
- 250
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Path to the output gene program mask JSON file."
info: null
example:
- "gp_mask.json"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_omnipath_lr_network"
description: "Path to the output OmniPath ligand-receptor network CSV file."
info: null
example:
- "omnipath_lr_network.csv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_nichenet_lr_network"
description: "Path to the output NicheNet ligand-receptor network CSV file."
info: null
example:
- "nichenet_lr_network.csv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_nichenet_ligand_target_matrix"
description: "Path to the output NicheNet ligand-target gene regulatory potential\
\ matrix file."
info: null
example:
- "nichenet_ligand_target_matrix.csv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_collectri_tf_network"
description: "Path to the output CollecTRI TF-target gene regulatory potential\
\ network CSV file."
info: null
example:
- "collectri_tf_network.csv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_omnipath_gp_gene_count_distributions"
description: "Path to save the OmniPath gene program gene count distributions\
\ plot."
info: null
example:
- "omnipath_gp_gene_count_distributions.svg"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_nichenet_gp_gene_count_distributions"
description: "Path to save the NicheNet gene program gene count distributions\
\ plot."
info: null
example:
- "nichenet_gp_gene_count_distributions.svg"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_mebocost_gp_gene_count_distributions"
description: "Path to save the MeBocost gene program gene count distributions\
\ plot."
info: null
example:
- "mebocost_gp_gene_count_distributions.svg"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_collectri_tf_gp_gene_count_distributions"
description: "Path to save the CollecTRI TF gene program gene count distributions\
\ plot."
info: null
example:
- "collectri_tf_gp_gene_count_distributions.svg"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "setup_logger.py"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Generation of a prior knowledge gene program mask for NicheCompass."
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "niche"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
label:
- "lowcpu"
- "lowmem"
- "lowdisk"
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04"
target_registry: "images.viash-hub.com"
target_tag: "niche-compass"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "libhdf5-dev"
- "python3-pip"
- "python3-dev"
- "python-is-python3"
interactive: false
- type: "docker"
run:
- "pip install torch --index-url https://download.pytorch.org/whl/cu124 \\\n&&\
\ pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html\
\ \n"
- type: "python"
user: false
packages:
- "numpy<2"
- "nichecompass"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy==0.9.0"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/nichecompass/gene_program_mask/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/nichecompass/gene_program_mask"
executable: "target/executable/nichecompass/gene_program_mask/gene_program_mask"
viash_version: "0.9.4"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"
version: "niche-compass"
info:
test_resources:
- type: "s3"
path: "s3://openpipelines-bio/openpipeline_spatial/resources_test"
dest: "resources_test"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'niche-compass'"
organization: "vsh"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,68 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// CPU resources
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
// Memory resources
withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
// Disk space
// Nextflow apparently can't handle empty directives, i.e.
// withLabel: lowdisk {}
// so for that reason we have to add a dummy directive
withLabel: lowdisk {
dummyDirective = "dummyValue"
}
withLabel: middisk {
dummyDirective = "dummyValue"
}
withLabel: highdisk {
dummyDirective = "dummyValue"
}
withLabel: veryhighdisk {
dummyDirective = "dummyValue"
}
// NOTE: The above labels intentionally do not have an effect by default.
// The user should set the disk space requirements by adding the following
// to the compute environment:
//
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
// withLabel: middisk { disk = { 100.GB * task.attempt } }
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,12 @@
def setup_logger():
import logging
from sys import stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler(stdout)
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
console_handler.setFormatter(logFormatter)
logger.addHandler(console_handler)
return logger

View File

@@ -83,45 +83,6 @@ argument_groups:
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Spatial Neighbors Calculation"
arguments:
- type: "string"
name: "--coord_type"
description: "Type of coordinate system. Valid options are:\n`grid` - grid coordinates.\n\
`generic` - generic coordinates.\nIf not provided, `grid` is used if `--input_obsm_spatial_coords`\
\ is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is\
\ used.\n"
info: null
required: false
choices:
- "generic"
- "grid"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_spatial_neighbors"
description: "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n\
`generic` - number of neighborhoods for non-grid data. Only used when `--delaunay\
\ False`.\n"
info: null
default:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--delaunay"
description: "Whether to use Delaunay triangulation to determine spatial neighborhood\
\ graph.\nOnly used when `--coord_type generic`.\n"
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Gene Program Mask"
arguments:
- type: "integer"
@@ -807,12 +768,12 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_uns_gene_index"
name: "--output_uns_genes_index"
description: "Key of the uns field where the index of a concatenated vector of\
\ target and source genes that are in the gene program masks will be stored.\n"
info: null
default:
- "nichecompass_gene_idx"
- "nichecompass_genes_idx"
required: false
direction: "input"
multiple: false
@@ -870,6 +831,20 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
info: null
example:
- "gzip"
required: false
choices:
- "gzip"
- "lzf"
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
@@ -1028,7 +1003,7 @@ build_info:
output: "target/executable/nichecompass/nichecompass"
executable: "target/executable/nichecompass/nichecompass/nichecompass"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -464,9 +464,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dorien Roosen"
LABEL org.opencontainers.image.description="Companion container for running component nichecompass nichecompass"
LABEL org.opencontainers.image.created="2025-12-08T20:39:06Z"
LABEL org.opencontainers.image.created="2025-12-13T13:27:09Z"
LABEL org.opencontainers.image.source="https://github.com/openpipelines-bio/openpipeline_spatial"
LABEL org.opencontainers.image.revision="0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
LABEL org.opencontainers.image.revision="9151204629228da14d7c82f49f24c607efb9251e"
LABEL org.opencontainers.image.version="niche-compass"
VIASHDOCKER
@@ -626,32 +626,6 @@ function ViashHelp {
echo " type: string, multiple values allowed"
echo " Keys of the adata.obs fields to use as covariates."
echo ""
echo "Spatial Neighbors Calculation:"
echo " --coord_type"
echo " type: string"
echo " choices: [ generic, grid ]"
echo " Type of coordinate system. Valid options are:"
echo " \`grid\` - grid coordinates."
echo " \`generic\` - generic coordinates."
echo " If not provided, \`grid\` is used if \`--input_obsm_spatial_coords\` is in"
echo " --input .uns with \`--n_neighs\` = 6 (Visium), otherwise \`generic\` is"
echo " used."
echo ""
echo " --n_spatial_neighbors"
echo " type: integer"
echo " default: 6"
echo " Depending on \`--coord_type\`:"
echo " \`grid\` - number of neighboring tiles."
echo " \`generic\` - number of neighborhoods for non-grid data. Only used when"
echo " \`--delaunay False\`."
echo ""
echo " --delaunay"
echo " type: boolean"
echo " default: false"
echo " Whether to use Delaunay triangulation to determine spatial neighborhood"
echo " graph."
echo " Only used when \`--coord_type generic\`."
echo ""
echo "Gene Program Mask:"
echo " --min_genes_per_gp"
echo " type: integer"
@@ -1063,9 +1037,9 @@ function ViashHelp {
echo " default: nichecompass_active_gp_names"
echo " Key of the uns field where the active gene program names will be stored."
echo ""
echo " --output_uns_gene_index"
echo " --output_uns_genes_index"
echo " type: string"
echo " default: nichecompass_gene_idx"
echo " default: nichecompass_genes_idx"
echo " Key of the uns field where the index of a concatenated vector of target"
echo " and source genes that are in the gene program masks will be stored."
echo ""
@@ -1098,6 +1072,13 @@ function ViashHelp {
echo " Key of the obsp field where the aggregation weights of the node label"
echo " aggregator will be stored."
echo ""
echo " --output_compression"
echo " type: string"
echo " example: gzip"
echo " choices: [ gzip, lzf ]"
echo " Compression format to use for the output AnnData and/or Mudata objects."
echo " By default no compression is applied."
echo ""
echo "Viash built in Computational Requirements:"
echo " ---cpus=INT"
echo " Number of CPUs to use"
@@ -1220,39 +1201,6 @@ while [[ $# -gt 0 ]]; do
fi
shift 1
;;
--coord_type)
[ -n "$VIASH_PAR_COORD_TYPE" ] && ViashError Bad arguments for option \'--coord_type\': \'$VIASH_PAR_COORD_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COORD_TYPE="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --coord_type. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--coord_type=*)
[ -n "$VIASH_PAR_COORD_TYPE" ] && ViashError Bad arguments for option \'--coord_type=*\': \'$VIASH_PAR_COORD_TYPE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_COORD_TYPE=$(ViashRemoveFlags "$1")
shift 1
;;
--n_spatial_neighbors)
[ -n "$VIASH_PAR_N_SPATIAL_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_spatial_neighbors\': \'$VIASH_PAR_N_SPATIAL_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_N_SPATIAL_NEIGHBORS="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --n_spatial_neighbors. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--n_spatial_neighbors=*)
[ -n "$VIASH_PAR_N_SPATIAL_NEIGHBORS" ] && ViashError Bad arguments for option \'--n_spatial_neighbors=*\': \'$VIASH_PAR_N_SPATIAL_NEIGHBORS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_N_SPATIAL_NEIGHBORS=$(ViashRemoveFlags "$1")
shift 1
;;
--delaunay)
[ -n "$VIASH_PAR_DELAUNAY" ] && ViashError Bad arguments for option \'--delaunay\': \'$VIASH_PAR_DELAUNAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_DELAUNAY="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --delaunay. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--delaunay=*)
[ -n "$VIASH_PAR_DELAUNAY" ] && ViashError Bad arguments for option \'--delaunay=*\': \'$VIASH_PAR_DELAUNAY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_DELAUNAY=$(ViashRemoveFlags "$1")
shift 1
;;
--min_genes_per_gp)
[ -n "$VIASH_PAR_MIN_GENES_PER_GP" ] && ViashError Bad arguments for option \'--min_genes_per_gp\': \'$VIASH_PAR_MIN_GENES_PER_GP\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_MIN_GENES_PER_GP="$2"
@@ -1893,15 +1841,15 @@ while [[ $# -gt 0 ]]; do
VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES=$(ViashRemoveFlags "$1")
shift 1
;;
--output_uns_gene_index)
[ -n "$VIASH_PAR_OUTPUT_UNS_GENE_INDEX" ] && ViashError Bad arguments for option \'--output_uns_gene_index\': \'$VIASH_PAR_OUTPUT_UNS_GENE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_UNS_GENE_INDEX="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_gene_index. Use "--help" to get more information on the parameters. && exit 1
--output_uns_genes_index)
[ -n "$VIASH_PAR_OUTPUT_UNS_GENES_INDEX" ] && ViashError Bad arguments for option \'--output_uns_genes_index\': \'$VIASH_PAR_OUTPUT_UNS_GENES_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_UNS_GENES_INDEX="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --output_uns_genes_index. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--output_uns_gene_index=*)
[ -n "$VIASH_PAR_OUTPUT_UNS_GENE_INDEX" ] && ViashError Bad arguments for option \'--output_uns_gene_index=*\': \'$VIASH_PAR_OUTPUT_UNS_GENE_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_UNS_GENE_INDEX=$(ViashRemoveFlags "$1")
--output_uns_genes_index=*)
[ -n "$VIASH_PAR_OUTPUT_UNS_GENES_INDEX" ] && ViashError Bad arguments for option \'--output_uns_genes_index=*\': \'$VIASH_PAR_OUTPUT_UNS_GENES_INDEX\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_UNS_GENES_INDEX=$(ViashRemoveFlags "$1")
shift 1
;;
--output_uns_target_genes_index)
@@ -1965,6 +1913,17 @@ while [[ $# -gt 0 ]]; do
VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS=$(ViashRemoveFlags "$1")
shift 1
;;
--output_compression)
[ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_COMPRESSION="$2"
[ $# -lt 2 ] && ViashError Not enough arguments passed to --output_compression. Use "--help" to get more information on the parameters. && exit 1
shift 2
;;
--output_compression=*)
[ -n "$VIASH_PAR_OUTPUT_COMPRESSION" ] && ViashError Bad arguments for option \'--output_compression=*\': \'$VIASH_PAR_OUTPUT_COMPRESSION\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
VIASH_PAR_OUTPUT_COMPRESSION=$(ViashRemoveFlags "$1")
shift 1
;;
---engine)
VIASH_ENGINE_ID="$2"
shift 2
@@ -2185,12 +2144,6 @@ fi
if [ -z ${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES+x} ]; then
VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES="spatial_connectivities"
fi
if [ -z ${VIASH_PAR_N_SPATIAL_NEIGHBORS+x} ]; then
VIASH_PAR_N_SPATIAL_NEIGHBORS="6"
fi
if [ -z ${VIASH_PAR_DELAUNAY+x} ]; then
VIASH_PAR_DELAUNAY="false"
fi
if [ -z ${VIASH_PAR_MIN_GENES_PER_GP+x} ]; then
VIASH_PAR_MIN_GENES_PER_GP="1"
fi
@@ -2326,8 +2279,8 @@ fi
if [ -z ${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES+x} ]; then
VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES="nichecompass_active_gp_names"
fi
if [ -z ${VIASH_PAR_OUTPUT_UNS_GENE_INDEX+x} ]; then
VIASH_PAR_OUTPUT_UNS_GENE_INDEX="nichecompass_gene_idx"
if [ -z ${VIASH_PAR_OUTPUT_UNS_GENES_INDEX+x} ]; then
VIASH_PAR_OUTPUT_UNS_GENES_INDEX="nichecompass_genes_idx"
fi
if [ -z ${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX+x} ]; then
VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX="nichecompass_target_genes_idx"
@@ -2353,18 +2306,6 @@ if [ ! -z "$VIASH_PAR_INPUT_GP_MASK" ] && [ ! -e "$VIASH_PAR_INPUT_GP_MASK" ]; t
fi
# check whether parameters values are of the right type
if [[ -n "$VIASH_PAR_N_SPATIAL_NEIGHBORS" ]]; then
if ! [[ "$VIASH_PAR_N_SPATIAL_NEIGHBORS" =~ ^[-+]?[0-9]+$ ]]; then
ViashError '--n_spatial_neighbors' has to be an integer. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_PAR_DELAUNAY" ]]; then
if ! [[ "$VIASH_PAR_DELAUNAY" =~ ^(true|True|TRUE|false|False|FALSE|yes|Yes|YES|no|No|NO)$ ]]; then
ViashError '--delaunay' has to be a boolean. Use "--help" to get more information on the parameters.
exit 1
fi
fi
if [[ -n "$VIASH_PAR_MIN_GENES_PER_GP" ]]; then
if ! [[ "$VIASH_PAR_MIN_GENES_PER_GP" =~ ^[-+]?[0-9]+$ ]]; then
ViashError '--min_genes_per_gp' has to be an integer. Use "--help" to get more information on the parameters.
@@ -2989,18 +2930,6 @@ if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then
fi
# check whether value is belongs to a set of choices
if [ ! -z "$VIASH_PAR_COORD_TYPE" ]; then
VIASH_PAR_COORD_TYPE_CHOICES=("generic;grid")
IFS=';'
set -f
if ! [[ ";${VIASH_PAR_COORD_TYPE_CHOICES[*]};" =~ ";$VIASH_PAR_COORD_TYPE;" ]]; then
ViashError '--coord_type' specified value of \'$VIASH_PAR_COORD_TYPE\' is not in the list of allowed values. Use "--help" to get more information on the parameters.
exit 1
fi
set +f
unset IFS
fi
if [ ! -z "$VIASH_PAR_COVARIATE_EMBEDDING_INJECTION_LAYERS" ]; then
VIASH_PAR_COVARIATE_EMBEDDING_INJECTION_LAYERS_CHOICES=("encoder;gene_expr_decoder;chrom_access_decoder")
IFS=';'
@@ -3063,6 +2992,18 @@ if [ ! -z "$VIASH_PAR_CONV_LAYER_ENCODER" ]; then
unset IFS
fi
if [ ! -z "$VIASH_PAR_OUTPUT_COMPRESSION" ]; then
VIASH_PAR_OUTPUT_COMPRESSION_CHOICES=("gzip;lzf")
IFS=';'
set -f
if ! [[ ";${VIASH_PAR_OUTPUT_COMPRESSION_CHOICES[*]};" =~ ";$VIASH_PAR_OUTPUT_COMPRESSION;" ]]; then
ViashError '--output_compression' specified value of \'$VIASH_PAR_OUTPUT_COMPRESSION\' is not in the list of allowed values. Use "--help" to get more information on the parameters.
exit 1
fi
set +f
unset IFS
fi
# create parent directories of output files, if so desired
if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then
mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")"
@@ -3185,9 +3126,6 @@ par = {
'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'input_obsm_spatial_connectivities': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'input_obs_covariates': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_COVARIATES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_COVARIATES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ),
'coord_type': $( if [ ! -z ${VIASH_PAR_COORD_TYPE+x} ]; then echo "r'${VIASH_PAR_COORD_TYPE//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'n_spatial_neighbors': $( if [ ! -z ${VIASH_PAR_N_SPATIAL_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_SPATIAL_NEIGHBORS//\'/\'\"\'\"r\'}')"; else echo None; fi ),
'delaunay': $( if [ ! -z ${VIASH_PAR_DELAUNAY+x} ]; then echo "r'${VIASH_PAR_DELAUNAY//\'/\'\"\'\"r\'}'.lower() == 'true'"; else echo None; fi ),
'min_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES_PER_GP//\'/\'\"\'\"r\'}')"; else echo None; fi ),
'min_source_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_SOURCE_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_SOURCE_GENES_PER_GP//\'/\'\"\'\"r\'}')"; else echo None; fi ),
'min_target_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_TARGET_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_TARGET_GENES_PER_GP//\'/\'\"\'\"r\'}')"; else echo None; fi ),
@@ -3244,12 +3182,13 @@ par = {
'output_varm_gp_sources_mask': $( if [ ! -z ${VIASH_PAR_OUTPUT_VARM_GP_SOURCES_MASK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_VARM_GP_SOURCES_MASK//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_uns_gp_names': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GP_NAMES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GP_NAMES//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_uns_active_gp_names': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_uns_gene_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GENE_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GENE_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_uns_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GENES_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_uns_target_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_uns_source_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_SOURCE_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_SOURCE_GENES_INDEX//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_uns_covariate_embeddings': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_COVARIATE_EMBEDDINGS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_COVARIATE_EMBEDDINGS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ),
'output_obsp_reconstructed_adj_edge_proba': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_RECONSTRUCTED_ADJ_EDGE_PROBA+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_RECONSTRUCTED_ADJ_EDGE_PROBA//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_obsp_agg_weights': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS//\'/\'\"\'\"r\'}'"; else echo None; fi )
'output_obsp_agg_weights': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS//\'/\'\"\'\"r\'}'"; else echo None; fi ),
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\'/\'\"\'\"r\'}'"; else echo None; fi )
}
meta = {
'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ),
@@ -3288,21 +3227,19 @@ logger.info("GPU enabled? %s", use_gpu)
## Read in data
adata = mu.read_h5ad(par["input"], mod=par["modality"])
# ## Compute spatial neighbor graph
# logger.info("Computing spatial neighbor graph...")
# # Compute connectivities and distances
# sq.gr.spatial_neighbors(
# adata,
# coord_type=par["coord_type"],
# spatial_key=par["input_obsm_spatial_coords"],
# n_neighs=par["n_spatial_neighbors"],
# delaunay=par["delaunay"],
# )
# # Making the connectivity matrix symmetric
# adata.obsp["spatial_connectivities"] = adata.obsp["spatial_connectivities"].maximum(
# adata.obsp["spatial_connectivities"].T
# )
# Counts need to be float32 to be processed by nichecompass model
# See https://discuss.pytorch.org/t/runtimeerror-mat1-and-mat2-must-have-the-same-dtype/166759
counts_dtype = (
adata.layers[par["layer"]].dtype if par["layer"] is not None else adata.X.dtype
)
if counts_dtype != "float32":
logger.info(
f"Converting count data to float32 from {counts_dtype} for model compatibility..."
)
if par["layer"] is not None:
adata.layers[par["layer"]] = adata.layers[par["layer"]].astype("float32")
else:
adata.X = adata.X.astype("float32")
## Add GP mask to data
logger.info("Adding prior knowledge gene program mask to data...")
@@ -3315,7 +3252,7 @@ add_gps_from_gp_dict_to_adata(
gp_targets_mask_key=par["output_varm_gp_targets_mask"],
gp_sources_mask_key=par["output_varm_gp_sources_mask"],
gp_names_key=par["output_uns_gp_names"],
genes_idx_key=par["output_uns_gene_index"],
genes_idx_key=par["output_uns_genes_index"],
target_genes_idx_key=par["output_uns_target_genes_index"],
source_genes_idx_key=par["output_uns_source_genes_index"],
min_genes_per_gp=par["min_genes_per_gp"],
@@ -3338,12 +3275,12 @@ model = NicheCompass(
gp_sources_mask_key=par["output_varm_gp_sources_mask"],
latent_key=par["output_obsm_embedding"],
cat_covariates_keys=par["input_obs_covariates"],
cat_covariates_no_edges=par["covariates_edges"],
cat_covariates_no_edges=par["covariate_edges"],
cat_covariates_embeds_keys=par["output_uns_covariate_embeddings"],
cat_covariates_embeds_injection_layers=par["covariate_embedding_injection_layers"],
gene_idx_key=par["output_uns_gene_index"],
target_gene_idx_key=par["output_uns_target_genes_index"],
source_gene_idx_key=par["output_uns_source_genes_index"],
cat_covariates_embeds_injection=par["covariate_embedding_injection_layers"],
genes_idx_key=par["output_uns_genes_index"],
target_genes_idx_key=par["output_uns_target_genes_index"],
source_genes_idx_key=par["output_uns_source_genes_index"],
recon_adj_key=par["output_obsp_reconstructed_adj_edge_proba"],
agg_weights_key=par["output_obsp_agg_weights"],
include_edge_recon_loss=par["include_edge_recon_loss"],
@@ -3364,7 +3301,6 @@ model = NicheCompass(
encoder_use_bn=par["encoder_use_bn"],
dropout_rate_encoder=par["dropout_rate_encoder"],
dropout_rate_graph_decoder=par["dropout_rate_graph_decoder"],
cat_covariates_cats=par["cat_covariates_cats"],
n_addon_gp=par["n_addon_gp"],
cat_covariates_embeds_nums=par["cat_covariates_embeds_nums"],
seed=par["random_state"],
@@ -3402,7 +3338,7 @@ model.train(
## Save model and data
logger.info("Saving NicheCompass model and data...")
mdata = mu.MuData({par["modality"]: adata})
mdata.write_h5mu(par["output"])
mdata.write_h5mu(par["output"], compression=par["output_compression"])
model.save(par["output_model"], save_adata=False)
VIASHMAIN

View File

@@ -301,7 +301,7 @@ build_info:
output: "target/nextflow/convert/from_cells2stats_to_h5mu"
executable: "target/nextflow/convert/from_cells2stats_to_h5mu/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3399,7 +3399,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/convert/from_cells2stats_to_h5mu",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -238,7 +238,7 @@ build_info:
output: "target/nextflow/convert/from_cosmx_to_h5mu"
executable: "target/nextflow/convert/from_cosmx_to_h5mu/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3350,7 +3350,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/convert/from_cosmx_to_h5mu",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -234,7 +234,7 @@ build_info:
output: "target/nextflow/convert/from_cosmx_to_spatialexperiment"
executable: "target/nextflow/convert/from_cosmx_to_spatialexperiment/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3326,7 +3326,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/convert/from_cosmx_to_spatialexperiment",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -224,7 +224,7 @@ build_info:
output: "target/nextflow/convert/from_h5mu_to_spatialexperiment"
executable: "target/nextflow/convert/from_h5mu_to_spatialexperiment/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3332,7 +3332,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/convert/from_h5mu_to_spatialexperiment",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -221,7 +221,7 @@ build_info:
output: "target/nextflow/convert/from_spatialdata_to_h5mu"
executable: "target/nextflow/convert/from_spatialdata_to_h5mu/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3331,7 +3331,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/convert/from_spatialdata_to_h5mu",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -244,7 +244,7 @@ build_info:
output: "target/nextflow/convert/from_xenium_to_h5mu"
executable: "target/nextflow/convert/from_xenium_to_h5mu/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3348,7 +3348,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_h5mu",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -326,7 +326,7 @@ build_info:
output: "target/nextflow/convert/from_xenium_to_spatialdata"
executable: "target/nextflow/convert/from_xenium_to_spatialdata/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3443,7 +3443,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_spatialdata",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -224,7 +224,7 @@ build_info:
output: "target/nextflow/convert/from_xenium_to_spatialexperiment"
executable: "target/nextflow/convert/from_xenium_to_spatialexperiment/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3315,7 +3315,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/convert/from_xenium_to_spatialexperiment",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -296,7 +296,7 @@ build_info:
output: "target/nextflow/dataflow/obsp_block_concatenation"
executable: "target/nextflow/dataflow/obsp_block_concatenation/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3400,7 +3400,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/dataflow/obsp_block_concatenation",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -426,7 +426,7 @@ build_info:
output: "target/nextflow/mapping/spaceranger_count"
executable: "target/nextflow/mapping/spaceranger_count/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3548,7 +3548,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/mapping/spaceranger_count",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -1,5 +1,5 @@
name: "neighbors"
namespace: "spatial_neighborhood_graph"
name: "spatial_neighborhood_graph"
namespace: "neighbors"
version: "niche-compass"
authors:
- name: "Dorien Roosen"
@@ -269,10 +269,10 @@ build_info:
config: "src/neighbors/spatial_neighborhood_graph/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/spatial_neighborhood_graph/neighbors"
executable: "target/nextflow/spatial_neighborhood_graph/neighbors/main.nf"
output: "target/nextflow/neighbors/spatial_neighborhood_graph"
executable: "target/nextflow/neighbors/spatial_neighborhood_graph/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -1,4 +1,4 @@
// neighbors niche-compass
// spatial_neighborhood_graph niche-compass
//
// This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative
// work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
@@ -3033,8 +3033,8 @@ nextflow.enable.dsl=2
meta = [
"resources_dir": moduleDir.toRealPath().normalize(),
"config": processConfig(readJsonBlob('''{
"name" : "neighbors",
"namespace" : "spatial_neighborhood_graph",
"name" : "spatial_neighborhood_graph",
"namespace" : "neighbors",
"version" : "niche-compass",
"authors" : [
{
@@ -3380,9 +3380,9 @@ meta = [
"config" : "/workdir/root/repo/src/neighbors/spatial_neighborhood_graph/config.vsh.yaml",
"runner" : "nextflow",
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/spatial_neighborhood_graph/neighbors",
"output" : "/workdir/root/repo/target/nextflow/neighbors/spatial_neighborhood_graph",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {
@@ -3884,7 +3884,7 @@ meta["defaults"] = [
directives: readJsonBlob('''{
"container" : {
"registry" : "images.viash-hub.com",
"image" : "vsh/openpipeline_spatial/spatial_neighborhood_graph/neighbors",
"image" : "vsh/openpipeline_spatial/neighbors/spatial_neighborhood_graph",
"tag" : "niche-compass"
},
"label" : [

View File

@@ -1,5 +1,5 @@
manifest {
name = 'spatial_neighborhood_graph/neighbors'
name = 'neighbors/spatial_neighborhood_graph'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'niche-compass'

View File

@@ -0,0 +1,68 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// CPU resources
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
// Memory resources
withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
// Disk space
// Nextflow apparently can't handle empty directives, i.e.
// withLabel: lowdisk {}
// so for that reason we have to add a dummy directive
withLabel: lowdisk {
dummyDirective = "dummyValue"
}
withLabel: middisk {
dummyDirective = "dummyValue"
}
withLabel: highdisk {
dummyDirective = "dummyValue"
}
withLabel: veryhighdisk {
dummyDirective = "dummyValue"
}
// NOTE: The above labels intentionally do not have an effect by default.
// The user should set the disk space requirements by adding the following
// to the compute environment:
//
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
// withLabel: middisk { disk = { 100.GB * task.attempt } }
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -1,6 +1,6 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "neighbors",
"title": "spatial_neighborhood_graph",
"description": "Calculates a spatial neighborhood graph.",
"type": "object",
"$defs": {

View File

@@ -0,0 +1,12 @@
def setup_logger():
import logging
from sys import stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler(stdout)
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
console_handler.setFormatter(logFormatter)
logger.addHandler(console_handler)
return logger

View File

@@ -0,0 +1,469 @@
name: "gene_program_mask"
namespace: "nichecompass"
version: "niche-compass"
authors:
- name: "Dorien Roosen"
roles:
- "maintainer"
info:
role: "Core Team Member"
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
argument_groups:
- name: "Inputs"
arguments:
- type: "file"
name: "--input_gene_orthologs_mapping_file"
description: "Path to a CSV file mapping human genes to mouse orthologs.\nRequired\
\ for the OmniPath and NicheNet masks if `--species mouse`.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_metabolite_enzymes"
description: "Path to the MeBocost metabolite-enzymes TSV file.\nRequired for\
\ generating the MeBocost gene program mask.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_metabolite_sensors"
description: "Path to the MeBocost metabolite-sensors TSV file.\nRequired for\
\ generating the MeBocost gene program mask.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Parameters"
arguments:
- type: "string"
name: "--species"
description: "Species of the organism (human or mouse)."
info: null
default:
- "human"
required: false
choices:
- "human"
- "mouse"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--create_omnipath_gene_program_mask"
description: "Whether to create the OmniPath gene program mask."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--create_nichenet_gene_program_mask"
description: "Whether to create the NicheNet gene program mask."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--create_mebocost_gene_program_mask"
description: "Whether to create the MeBocost gene program mask."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--create_collectri_tf_gene_program_mask"
description: "Whether to create the CollecTRI TF gene program mask."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--overlap_thresh_target_genes"
description: "The minimum ratio of target genes that need to overlap between a\
\ GP without source genes and another GP for the GP to be dropped.\nGene programs\
\ with different source genes are never combined or dropped.\n"
info: null
default:
- 1.0
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Omnipath Parameters"
arguments:
- type: "integer"
name: "--omnipath_min_curation_effort"
description: "Minimum number of times an interaction has to be described in a\
\ paper and mentioned in a database to be included in the OmniPath gene programs."
info: null
default:
- 2
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "NicheNet Parameters"
arguments:
- type: "string"
name: "--nichenet_version"
description: "Version of the NicheNet ligand receptor network and ligand target\
\ gene regulatory potential matrix.\n´v2´ is an improved version of ´v1´, and\
\ has separate files for mouse and human.\n"
info: null
default:
- "v2"
required: false
choices:
- "v1"
- "v2"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--nichenet_keep_target_genes_ratio"
description: "Ratio of target genes that are kept compared to total target genes.\n\
This ratio is applied over the entire matrix (not on gene program level), and\
\ determines the ´all_gps_score_keep_threshold´, which will be used to filter\
\ target genes according to their regulatory potential scores.\n"
info: null
default:
- 1.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--nichenet_max_n_target_genes_per_gp"
description: "Maximum number of target genes per gene program. If a gene program\
\ has more target genes than ´max_n_target_genes_per_gp´, only the ´max_n_target_genes_per_gp´\
\ gene programs with the highest regulatory potential scores will be kept.\n\
Default value is chosen based on MultiNicheNet specification (s. Browaeys, R.\
\ et al. MultiNicheNet: a flexible framework for differential cell-cell communication\
\ analysis from multi-sample multi-condition single-cell transcriptomics data.\
\ bioRxiv (2023) doi:10.1101/2023.06.13.544751).\n"
info: null
default:
- 250
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Path to the output gene program mask JSON file."
info: null
example:
- "gp_mask.json"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_omnipath_lr_network"
description: "Path to the output OmniPath ligand-receptor network CSV file."
info: null
example:
- "omnipath_lr_network.csv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_nichenet_lr_network"
description: "Path to the output NicheNet ligand-receptor network CSV file."
info: null
example:
- "nichenet_lr_network.csv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_nichenet_ligand_target_matrix"
description: "Path to the output NicheNet ligand-target gene regulatory potential\
\ matrix file."
info: null
example:
- "nichenet_ligand_target_matrix.csv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_collectri_tf_network"
description: "Path to the output CollecTRI TF-target gene regulatory potential\
\ network CSV file."
info: null
example:
- "collectri_tf_network.csv"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_omnipath_gp_gene_count_distributions"
description: "Path to save the OmniPath gene program gene count distributions\
\ plot."
info: null
example:
- "omnipath_gp_gene_count_distributions.svg"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_nichenet_gp_gene_count_distributions"
description: "Path to save the NicheNet gene program gene count distributions\
\ plot."
info: null
example:
- "nichenet_gp_gene_count_distributions.svg"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_mebocost_gp_gene_count_distributions"
description: "Path to save the MeBocost gene program gene count distributions\
\ plot."
info: null
example:
- "mebocost_gp_gene_count_distributions.svg"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_collectri_tf_gp_gene_count_distributions"
description: "Path to save the CollecTRI TF gene program gene count distributions\
\ plot."
info: null
example:
- "collectri_tf_gp_gene_count_distributions.svg"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
is_executable: true
- type: "file"
path: "setup_logger.py"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "Generation of a prior knowledge gene program mask for NicheCompass."
test_resources:
- type: "python_script"
path: "test.py"
is_executable: true
- type: "file"
path: "niche"
info: null
status: "enabled"
scope:
image: "public"
target: "public"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
label:
- "lowcpu"
- "lowmem"
- "lowdisk"
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04"
target_registry: "images.viash-hub.com"
target_tag: "niche-compass"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "libhdf5-dev"
- "python3-pip"
- "python3-dev"
- "python-is-python3"
interactive: false
- type: "docker"
run:
- "pip install torch --index-url https://download.pytorch.org/whl/cu124 \\\n&&\
\ pip install pyg_lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.6.0+cu124.html\
\ \n"
- type: "python"
user: false
packages:
- "numpy<2"
- "nichecompass"
upgrade: true
test_setup:
- type: "python"
user: false
packages:
- "viashpy==0.9.0"
upgrade: true
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/nichecompass/gene_program_mask/config.vsh.yaml"
runner: "nextflow"
engine: "docker|native"
output: "target/nextflow/nichecompass/gene_program_mask"
executable: "target/nextflow/nichecompass/gene_program_mask/main.nf"
viash_version: "0.9.4"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"
version: "niche-compass"
info:
test_resources:
- type: "s3"
path: "s3://openpipelines-bio/openpipeline_spatial/resources_test"
dest: "resources_test"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'niche-compass'"
organization: "vsh"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
manifest {
name = 'nichecompass/gene_program_mask'
mainScript = 'main.nf'
nextflowVersion = '!>=20.12.1-edge'
version = 'niche-compass'
description = 'Generation of a prior knowledge gene program mask for NicheCompass.'
author = 'Dorien Roosen'
}
process.container = 'nextflow/bash:latest'
// detect tempdir
tempDir = java.nio.file.Paths.get(
System.getenv('NXF_TEMP') ?:
System.getenv('VIASH_TEMP') ?:
System.getenv('TEMPDIR') ?:
System.getenv('TMPDIR') ?:
'/tmp'
).toAbsolutePath()
profiles {
no_publish {
process {
withName: '.*' {
publishDir = [
enabled: false
]
}
}
}
mount_temp {
docker.temp = tempDir
podman.temp = tempDir
charliecloud.temp = tempDir
}
docker {
docker.enabled = true
// docker.userEmulation = true
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
docker.enabled = false
podman.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
podman {
podman.enabled = true
docker.enabled = false
singularity.enabled = false
shifter.enabled = false
charliecloud.enabled = false
}
shifter {
shifter.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
charliecloud.enabled = false
}
charliecloud {
charliecloud.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
shifter.enabled = false
}
}
process{
withLabel: mem1gb { memory = 1000000000.B }
withLabel: mem2gb { memory = 2000000000.B }
withLabel: mem5gb { memory = 5000000000.B }
withLabel: mem10gb { memory = 10000000000.B }
withLabel: mem20gb { memory = 20000000000.B }
withLabel: mem50gb { memory = 50000000000.B }
withLabel: mem100gb { memory = 100000000000.B }
withLabel: mem200gb { memory = 200000000000.B }
withLabel: mem500gb { memory = 500000000000.B }
withLabel: mem1tb { memory = 1000000000000.B }
withLabel: mem2tb { memory = 2000000000000.B }
withLabel: mem5tb { memory = 5000000000000.B }
withLabel: mem10tb { memory = 10000000000000.B }
withLabel: mem20tb { memory = 20000000000000.B }
withLabel: mem50tb { memory = 50000000000000.B }
withLabel: mem100tb { memory = 100000000000000.B }
withLabel: mem200tb { memory = 200000000000000.B }
withLabel: mem500tb { memory = 500000000000000.B }
withLabel: mem1gib { memory = 1073741824.B }
withLabel: mem2gib { memory = 2147483648.B }
withLabel: mem4gib { memory = 4294967296.B }
withLabel: mem8gib { memory = 8589934592.B }
withLabel: mem16gib { memory = 17179869184.B }
withLabel: mem32gib { memory = 34359738368.B }
withLabel: mem64gib { memory = 68719476736.B }
withLabel: mem128gib { memory = 137438953472.B }
withLabel: mem256gib { memory = 274877906944.B }
withLabel: mem512gib { memory = 549755813888.B }
withLabel: mem1tib { memory = 1099511627776.B }
withLabel: mem2tib { memory = 2199023255552.B }
withLabel: mem4tib { memory = 4398046511104.B }
withLabel: mem8tib { memory = 8796093022208.B }
withLabel: mem16tib { memory = 17592186044416.B }
withLabel: mem32tib { memory = 35184372088832.B }
withLabel: mem64tib { memory = 70368744177664.B }
withLabel: mem128tib { memory = 140737488355328.B }
withLabel: mem256tib { memory = 281474976710656.B }
withLabel: mem512tib { memory = 562949953421312.B }
withLabel: cpu1 { cpus = 1 }
withLabel: cpu2 { cpus = 2 }
withLabel: cpu5 { cpus = 5 }
withLabel: cpu10 { cpus = 10 }
withLabel: cpu20 { cpus = 20 }
withLabel: cpu50 { cpus = 50 }
withLabel: cpu100 { cpus = 100 }
withLabel: cpu200 { cpus = 200 }
withLabel: cpu500 { cpus = 500 }
withLabel: cpu1000 { cpus = 1000 }
}
includeConfig("nextflow_labels.config")

View File

@@ -0,0 +1,68 @@
process {
// Default resources for components that hardly do any processing
memory = { 2.GB * task.attempt }
cpus = 1
// Retry for exit codes that have something to do with memory issues
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
maxRetries = 3
maxMemory = null
// CPU resources
withLabel: singlecpu { cpus = 1 }
withLabel: lowcpu { cpus = 4 }
withLabel: midcpu { cpus = 10 }
withLabel: highcpu { cpus = 20 }
// Memory resources
withLabel: lowmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: midmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: highmem { memory = { get_memory( 50.GB * task.attempt ) } }
withLabel: veryhighmem { memory = { get_memory( 75.GB * task.attempt ) } }
// Disk space
// Nextflow apparently can't handle empty directives, i.e.
// withLabel: lowdisk {}
// so for that reason we have to add a dummy directive
withLabel: lowdisk {
dummyDirective = "dummyValue"
}
withLabel: middisk {
dummyDirective = "dummyValue"
}
withLabel: highdisk {
dummyDirective = "dummyValue"
}
withLabel: veryhighdisk {
dummyDirective = "dummyValue"
}
// NOTE: The above labels intentionally do not have an effect by default.
// The user should set the disk space requirements by adding the following
// to the compute environment:
//
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
// withLabel: middisk { disk = { 100.GB * task.attempt } }
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
}
def get_memory(to_compare) {
if (!process.containsKey("maxMemory") || !process.maxMemory) {
return to_compare
}
try {
if (process.containsKey("maxRetries") && process.maxRetries && task.attempt == (process.maxRetries as int)) {
return process.maxMemory
}
else if (to_compare.compareTo(process.maxMemory as nextflow.util.MemoryUnit) == 1) {
return max_memory as nextflow.util.MemoryUnit
}
else {
return to_compare
}
} catch (all) {
println "Error processing memory resources. Please check that process.maxMemory '${process.maxMemory}' and process.maxRetries '${process.maxRetries}' are valid!"
System.exit(1)
}
}

View File

@@ -0,0 +1,224 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "gene_program_mask",
"description": "Generation of a prior knowledge gene program mask for NicheCompass.",
"type": "object",
"$defs": {
"inputs": {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input_gene_orthologs_mapping_file": {
"type": "string",
"format": "path",
"description": "Path to a CSV file mapping human genes to mouse orthologs.\nRequired for the OmniPath and NicheNet masks if `--species mouse`.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
},
"input_metabolite_enzymes": {
"type": "string",
"format": "path",
"description": "Path to the MeBocost metabolite-enzymes TSV file.\nRequired for generating the MeBocost gene program mask.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
},
"input_metabolite_sensors": {
"type": "string",
"format": "path",
"description": "Path to the MeBocost metabolite-sensors TSV file.\nRequired for generating the MeBocost gene program mask.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
}
}
},
"outputs": {
"title": "Outputs",
"type": "object",
"description": "No description",
"properties": {
"output": {
"type": "string",
"format": "path",
"description": "Path to the output gene program mask JSON file.",
"help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.output.json\"`, direction: `output`, example: `\"gp_mask.json\"`. ",
"default": "$id.$key.output.json"
},
"output_omnipath_lr_network": {
"type": "string",
"format": "path",
"description": "Path to the output OmniPath ligand-receptor network CSV file.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_omnipath_lr_network.csv\"`, direction: `output`, example: `\"omnipath_lr_network.csv\"`. ",
"default": "$id.$key.output_omnipath_lr_network.csv"
},
"output_nichenet_lr_network": {
"type": "string",
"format": "path",
"description": "Path to the output NicheNet ligand-receptor network CSV file.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_nichenet_lr_network.csv\"`, direction: `output`, example: `\"nichenet_lr_network.csv\"`. ",
"default": "$id.$key.output_nichenet_lr_network.csv"
},
"output_nichenet_ligand_target_matrix": {
"type": "string",
"format": "path",
"description": "Path to the output NicheNet ligand-target gene regulatory potential matrix file.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_nichenet_ligand_target_matrix.csv\"`, direction: `output`, example: `\"nichenet_ligand_target_matrix.csv\"`. ",
"default": "$id.$key.output_nichenet_ligand_target_matrix.csv"
},
"output_collectri_tf_network": {
"type": "string",
"format": "path",
"description": "Path to the output CollecTRI TF-target gene regulatory potential network CSV file.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_collectri_tf_network.csv\"`, direction: `output`, example: `\"collectri_tf_network.csv\"`. ",
"default": "$id.$key.output_collectri_tf_network.csv"
},
"output_omnipath_gp_gene_count_distributions": {
"type": "string",
"format": "path",
"description": "Path to save the OmniPath gene program gene count distributions plot.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_omnipath_gp_gene_count_distributions.svg\"`, direction: `output`, example: `\"omnipath_gp_gene_count_distributions.svg\"`. ",
"default": "$id.$key.output_omnipath_gp_gene_count_distributions.svg"
},
"output_nichenet_gp_gene_count_distributions": {
"type": "string",
"format": "path",
"description": "Path to save the NicheNet gene program gene count distributions plot.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_nichenet_gp_gene_count_distributions.svg\"`, direction: `output`, example: `\"nichenet_gp_gene_count_distributions.svg\"`. ",
"default": "$id.$key.output_nichenet_gp_gene_count_distributions.svg"
},
"output_mebocost_gp_gene_count_distributions": {
"type": "string",
"format": "path",
"description": "Path to save the MeBocost gene program gene count distributions plot.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_mebocost_gp_gene_count_distributions.svg\"`, direction: `output`, example: `\"mebocost_gp_gene_count_distributions.svg\"`. ",
"default": "$id.$key.output_mebocost_gp_gene_count_distributions.svg"
},
"output_collectri_tf_gp_gene_count_distributions": {
"type": "string",
"format": "path",
"description": "Path to save the CollecTRI TF gene program gene count distributions plot.",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.output_collectri_tf_gp_gene_count_distributions.svg\"`, direction: `output`, example: `\"collectri_tf_gp_gene_count_distributions.svg\"`. ",
"default": "$id.$key.output_collectri_tf_gp_gene_count_distributions.svg"
}
}
},
"parameters": {
"title": "Parameters",
"type": "object",
"description": "No description",
"properties": {
"species": {
"type": "string",
"description": "Species of the organism (human or mouse).",
"help_text": "Type: `string`, multiple: `False`, default: `\"human\"`, choices: ``human`, `mouse``. ",
"enum": [
"human",
"mouse"
],
"default": "human"
},
"create_omnipath_gene_program_mask": {
"type": "boolean",
"description": "Whether to create the OmniPath gene program mask.",
"help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
"default": true
},
"create_nichenet_gene_program_mask": {
"type": "boolean",
"description": "Whether to create the NicheNet gene program mask.",
"help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
"default": true
},
"create_mebocost_gene_program_mask": {
"type": "boolean",
"description": "Whether to create the MeBocost gene program mask.",
"help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
"default": true
},
"create_collectri_tf_gene_program_mask": {
"type": "boolean",
"description": "Whether to create the CollecTRI TF gene program mask.",
"help_text": "Type: `boolean`, multiple: `False`, default: `true`. ",
"default": true
},
"overlap_thresh_target_genes": {
"type": "number",
"description": "The minimum ratio of target genes that need to overlap between a GP without source genes and another GP for the GP to be dropped.\nGene programs with different source genes are never combined or dropped.\n",
"help_text": "Type: `double`, multiple: `False`, default: `1.0`. ",
"default": 1.0
}
}
},
"omnipath parameters": {
"title": "Omnipath Parameters",
"type": "object",
"description": "No description",
"properties": {
"omnipath_min_curation_effort": {
"type": "integer",
"description": "Minimum number of times an interaction has to be described in a paper and mentioned in a database to be included in the OmniPath gene programs.",
"help_text": "Type: `integer`, multiple: `False`, default: `2`. ",
"default": 2
}
}
},
"nichenet parameters": {
"title": "NicheNet Parameters",
"type": "object",
"description": "No description",
"properties": {
"nichenet_version": {
"type": "string",
"description": "Version of the NicheNet ligand receptor network and ligand target gene regulatory potential matrix.\n´v2´ is an improved version of ´v1´, and has separate files for mouse and human.\n",
"help_text": "Type: `string`, multiple: `False`, default: `\"v2\"`, choices: ``v1`, `v2``. ",
"enum": [
"v1",
"v2"
],
"default": "v2"
},
"nichenet_keep_target_genes_ratio": {
"type": "number",
"description": "Ratio of target genes that are kept compared to total target genes.\nThis ratio is applied over the entire matrix (not on gene program level), and determines the ´all_gps_score_keep_threshold´, which will be used to filter target genes according to their regulatory potential scores.\n",
"help_text": "Type: `double`, multiple: `False`, default: `1.0`. ",
"default": 1.0
},
"nichenet_max_n_target_genes_per_gp": {
"type": "integer",
"description": "Maximum number of target genes per gene program",
"help_text": "Type: `integer`, multiple: `False`, default: `250`. ",
"default": 250
}
}
},
"nextflow input-output arguments": {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type": "string",
"description": "Path to an output directory.",
"help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
}
}
}
},
"allOf": [
{
"$ref": "#/$defs/inputs"
},
{
"$ref": "#/$defs/outputs"
},
{
"$ref": "#/$defs/parameters"
},
{
"$ref": "#/$defs/omnipath parameters"
},
{
"$ref": "#/$defs/nichenet parameters"
},
{
"$ref": "#/$defs/nextflow input-output arguments"
}
]
}

View File

@@ -0,0 +1,12 @@
def setup_logger():
import logging
from sys import stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler(stdout)
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
console_handler.setFormatter(logFormatter)
logger.addHandler(console_handler)
return logger

View File

@@ -83,45 +83,6 @@ argument_groups:
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Spatial Neighbors Calculation"
arguments:
- type: "string"
name: "--coord_type"
description: "Type of coordinate system. Valid options are:\n`grid` - grid coordinates.\n\
`generic` - generic coordinates.\nIf not provided, `grid` is used if `--input_obsm_spatial_coords`\
\ is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is\
\ used.\n"
info: null
required: false
choices:
- "generic"
- "grid"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_spatial_neighbors"
description: "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n\
`generic` - number of neighborhoods for non-grid data. Only used when `--delaunay\
\ False`.\n"
info: null
default:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--delaunay"
description: "Whether to use Delaunay triangulation to determine spatial neighborhood\
\ graph.\nOnly used when `--coord_type generic`.\n"
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Gene Program Mask"
arguments:
- type: "integer"
@@ -807,12 +768,12 @@ argument_groups:
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_uns_gene_index"
name: "--output_uns_genes_index"
description: "Key of the uns field where the index of a concatenated vector of\
\ target and source genes that are in the gene program masks will be stored.\n"
info: null
default:
- "nichecompass_gene_idx"
- "nichecompass_genes_idx"
required: false
direction: "input"
multiple: false
@@ -870,6 +831,20 @@ argument_groups:
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_compression"
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
By default no compression is applied.\n"
info: null
example:
- "gzip"
required: false
choices:
- "gzip"
- "lzf"
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "python_script"
path: "script.py"
@@ -1028,7 +1003,7 @@ build_info:
output: "target/nextflow/nichecompass/nichecompass"
executable: "target/nextflow/nichecompass/nichecompass/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
package_config:
name: "openpipeline_spatial"

View File

@@ -3132,48 +3132,6 @@ meta = [
}
]
},
{
"name" : "Spatial Neighbors Calculation",
"arguments" : [
{
"type" : "string",
"name" : "--coord_type",
"description" : "Type of coordinate system. Valid options are:\n`grid` - grid coordinates.\n`generic` - generic coordinates.\nIf not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used.\n",
"required" : false,
"choices" : [
"generic",
"grid"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "integer",
"name" : "--n_spatial_neighbors",
"description" : "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n`generic` - number of neighborhoods for non-grid data. Only used when `--delaunay False`.\n",
"default" : [
6
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "boolean",
"name" : "--delaunay",
"description" : "Whether to use Delaunay triangulation to determine spatial neighborhood graph.\nOnly used when `--coord_type generic`.\n",
"default" : [
false
],
"required" : false,
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
},
{
"name" : "Gene Program Mask",
"arguments" : [
@@ -3889,10 +3847,10 @@ meta = [
},
{
"type" : "string",
"name" : "--output_uns_gene_index",
"name" : "--output_uns_genes_index",
"description" : "Key of the uns field where the index of a concatenated vector of target and source genes that are in the gene program masks will be stored.\n",
"default" : [
"nichecompass_gene_idx"
"nichecompass_genes_idx"
],
"required" : false,
"direction" : "input",
@@ -3955,6 +3913,22 @@ meta = [
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
},
{
"type" : "string",
"name" : "--output_compression",
"description" : "Compression format to use for the output AnnData and/or Mudata objects.\nBy default no compression is applied.\n",
"example" : [
"gzip"
],
"required" : false,
"choices" : [
"gzip",
"lzf"
],
"direction" : "input",
"multiple" : false,
"multiple_sep" : ";"
}
]
}
@@ -4164,7 +4138,7 @@ meta = [
"engine" : "docker|native",
"output" : "/workdir/root/repo/target/nextflow/nichecompass/nichecompass",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {
@@ -4231,9 +4205,6 @@ par = {
'layer': $( if [ ! -z ${VIASH_PAR_LAYER+x} ]; then echo "r'${VIASH_PAR_LAYER//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'input_obsm_spatial_connectivities': $( if [ ! -z ${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBSM_SPATIAL_CONNECTIVITIES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'input_obs_covariates': $( if [ ! -z ${VIASH_PAR_INPUT_OBS_COVARIATES+x} ]; then echo "r'${VIASH_PAR_INPUT_OBS_COVARIATES//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ),
'coord_type': $( if [ ! -z ${VIASH_PAR_COORD_TYPE+x} ]; then echo "r'${VIASH_PAR_COORD_TYPE//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'n_spatial_neighbors': $( if [ ! -z ${VIASH_PAR_N_SPATIAL_NEIGHBORS+x} ]; then echo "int(r'${VIASH_PAR_N_SPATIAL_NEIGHBORS//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'delaunay': $( if [ ! -z ${VIASH_PAR_DELAUNAY+x} ]; then echo "r'${VIASH_PAR_DELAUNAY//\\'/\\'\\"\\'\\"r\\'}'.lower() == 'true'"; else echo None; fi ),
'min_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_GENES_PER_GP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'min_source_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_SOURCE_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_SOURCE_GENES_PER_GP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
'min_target_genes_per_gp': $( if [ ! -z ${VIASH_PAR_MIN_TARGET_GENES_PER_GP+x} ]; then echo "int(r'${VIASH_PAR_MIN_TARGET_GENES_PER_GP//\\'/\\'\\"\\'\\"r\\'}')"; else echo None; fi ),
@@ -4290,12 +4261,13 @@ par = {
'output_varm_gp_sources_mask': $( if [ ! -z ${VIASH_PAR_OUTPUT_VARM_GP_SOURCES_MASK+x} ]; then echo "r'${VIASH_PAR_OUTPUT_VARM_GP_SOURCES_MASK//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_uns_gp_names': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GP_NAMES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GP_NAMES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_uns_active_gp_names': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_ACTIVE_GP_NAMES//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_uns_gene_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GENE_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GENE_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_uns_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_GENES_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_uns_target_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_TARGET_GENES_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_uns_source_genes_index': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_SOURCE_GENES_INDEX+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_SOURCE_GENES_INDEX//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_uns_covariate_embeddings': $( if [ ! -z ${VIASH_PAR_OUTPUT_UNS_COVARIATE_EMBEDDINGS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_UNS_COVARIATE_EMBEDDINGS//\\'/\\'\\"\\'\\"r\\'}'.split(';')"; else echo None; fi ),
'output_obsp_reconstructed_adj_edge_proba': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_RECONSTRUCTED_ADJ_EDGE_PROBA+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_RECONSTRUCTED_ADJ_EDGE_PROBA//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_obsp_agg_weights': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
'output_obsp_agg_weights': $( if [ ! -z ${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS+x} ]; then echo "r'${VIASH_PAR_OUTPUT_OBSP_AGG_WEIGHTS//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
'output_compression': $( if [ ! -z ${VIASH_PAR_OUTPUT_COMPRESSION+x} ]; then echo "r'${VIASH_PAR_OUTPUT_COMPRESSION//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi )
}
meta = {
'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\\'/\\'\\"\\'\\"r\\'}'"; else echo None; fi ),
@@ -4334,21 +4306,19 @@ logger.info("GPU enabled? %s", use_gpu)
## Read in data
adata = mu.read_h5ad(par["input"], mod=par["modality"])
# ## Compute spatial neighbor graph
# logger.info("Computing spatial neighbor graph...")
# # Compute connectivities and distances
# sq.gr.spatial_neighbors(
# adata,
# coord_type=par["coord_type"],
# spatial_key=par["input_obsm_spatial_coords"],
# n_neighs=par["n_spatial_neighbors"],
# delaunay=par["delaunay"],
# )
# # Making the connectivity matrix symmetric
# adata.obsp["spatial_connectivities"] = adata.obsp["spatial_connectivities"].maximum(
# adata.obsp["spatial_connectivities"].T
# )
# Counts need to be float32 to be processed by nichecompass model
# See https://discuss.pytorch.org/t/runtimeerror-mat1-and-mat2-must-have-the-same-dtype/166759
counts_dtype = (
adata.layers[par["layer"]].dtype if par["layer"] is not None else adata.X.dtype
)
if counts_dtype != "float32":
logger.info(
f"Converting count data to float32 from {counts_dtype} for model compatibility..."
)
if par["layer"] is not None:
adata.layers[par["layer"]] = adata.layers[par["layer"]].astype("float32")
else:
adata.X = adata.X.astype("float32")
## Add GP mask to data
logger.info("Adding prior knowledge gene program mask to data...")
@@ -4361,7 +4331,7 @@ add_gps_from_gp_dict_to_adata(
gp_targets_mask_key=par["output_varm_gp_targets_mask"],
gp_sources_mask_key=par["output_varm_gp_sources_mask"],
gp_names_key=par["output_uns_gp_names"],
genes_idx_key=par["output_uns_gene_index"],
genes_idx_key=par["output_uns_genes_index"],
target_genes_idx_key=par["output_uns_target_genes_index"],
source_genes_idx_key=par["output_uns_source_genes_index"],
min_genes_per_gp=par["min_genes_per_gp"],
@@ -4384,12 +4354,12 @@ model = NicheCompass(
gp_sources_mask_key=par["output_varm_gp_sources_mask"],
latent_key=par["output_obsm_embedding"],
cat_covariates_keys=par["input_obs_covariates"],
cat_covariates_no_edges=par["covariates_edges"],
cat_covariates_no_edges=par["covariate_edges"],
cat_covariates_embeds_keys=par["output_uns_covariate_embeddings"],
cat_covariates_embeds_injection_layers=par["covariate_embedding_injection_layers"],
gene_idx_key=par["output_uns_gene_index"],
target_gene_idx_key=par["output_uns_target_genes_index"],
source_gene_idx_key=par["output_uns_source_genes_index"],
cat_covariates_embeds_injection=par["covariate_embedding_injection_layers"],
genes_idx_key=par["output_uns_genes_index"],
target_genes_idx_key=par["output_uns_target_genes_index"],
source_genes_idx_key=par["output_uns_source_genes_index"],
recon_adj_key=par["output_obsp_reconstructed_adj_edge_proba"],
agg_weights_key=par["output_obsp_agg_weights"],
include_edge_recon_loss=par["include_edge_recon_loss"],
@@ -4410,7 +4380,6 @@ model = NicheCompass(
encoder_use_bn=par["encoder_use_bn"],
dropout_rate_encoder=par["dropout_rate_encoder"],
dropout_rate_graph_decoder=par["dropout_rate_graph_decoder"],
cat_covariates_cats=par["cat_covariates_cats"],
n_addon_gp=par["n_addon_gp"],
cat_covariates_embeds_nums=par["cat_covariates_embeds_nums"],
seed=par["random_state"],
@@ -4448,7 +4417,7 @@ model.train(
## Save model and data
logger.info("Saving NicheCompass model and data...")
mdata = mu.MuData({par["modality"]: adata})
mdata.write_h5mu(par["output"])
mdata.write_h5mu(par["output"], compression=par["output_compression"])
model.save(par["output_model"], save_adata=False)
VIASHMAIN

View File

@@ -640,7 +640,7 @@ build_info:
output: "target/nextflow/workflows/multiomics/spatial_process_samples"
executable: "target/nextflow/workflows/multiomics/spatial_process_samples/main.nf"
viash_version: "0.9.4"
git_commit: "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
dependencies:
- "target/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/workflows/multiomics/process_samples"

View File

@@ -3807,7 +3807,7 @@ meta = [
"engine" : "native",
"output" : "/workdir/root/repo/target/nextflow/workflows/multiomics/spatial_process_samples",
"viash_version" : "0.9.4",
"git_commit" : "0c1677bb93680d39ec2fb2f6bc68a2fcfae0e831",
"git_commit" : "9151204629228da14d7c82f49f24c607efb9251e",
"git_remote" : "https://github.com/openpipelines-bio/openpipeline_spatial"
},
"package_config" : {

View File

@@ -0,0 +1,816 @@
name: "nichecompass_leiden"
namespace: "workflows/niche"
version: "niche-compass"
authors:
- name: "Dorien Roosen"
roles:
- "author"
- "maintainer"
info:
role: "Core Team Member"
links:
email: "dorien@data-intuitive.com"
github: "dorien-er"
linkedin: "dorien-roosen"
organizations:
- name: "Data Intuitive"
href: "https://www.data-intuitive.com"
role: "Data Scientist"
- name: "Weiwei Schultz"
roles:
- "contributor"
info:
role: "Contributor"
organizations:
- name: "Janssen R&D US"
role: "Associate Director Data Sciences"
argument_groups:
- name: "Inputs"
arguments:
- type: "string"
name: "--id"
description: "ID of the sample."
info: null
example:
- "foo"
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input"
alternatives:
- "-i"
description: "Path to the sample."
info: null
example:
- "input.h5mu"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--input_gp_mask"
description: "JSON file containing a nested dictionary containing the gene programs,\n\
with keys being gene program names and values being dictionaries with keys `targets`\
\ and `sources`,\nwhere `targets` contains a list of the names of genes in the\
\ gene program for the reconstruction of the gene expression of the node itself\
\ (receiving node)\nand `sources` contains a list of the names of genes in the\
\ gene program for the reconstruction of the gene expression of the node's neighbors\
\ (transmitting nodes).\n"
info: null
example:
- "prior_knowledge_gp_mask.json"
must_exist: true
create_parent: true
required: true
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--modality"
description: "Which modality to process."
info: null
default:
- "rna"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--layer"
description: "Use specified layer for calculation of qc metrics. If not specified,\
\ adata.X is used."
info: null
example:
- "raw_counts"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--input_obs_covariates"
description: "Keys of the adata.obs fields to use as covariates."
info: null
default:
- "sample_id"
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--input_obsm_spatial_coords"
description: "Key in adata.obsm where spatial coordinates are stored"
info: null
default:
- "spatial"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Sample ID options"
description: "Options for adding the id to .obs on the MuData object. Having a sample\
\ \nid present in a requirement of several components for this pipeline.\n"
arguments:
- type: "boolean"
name: "--include_sample_as_covariate"
description: "Whether to include the sample information as a categorical covariate\
\ for the \nNicheCompass model.\n"
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--add_id_to_obs"
description: "Add the value passed with --id to .obs."
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--add_id_obs_output"
description: ".Obs column to add the sample IDs to. Required and only used when\
\ \n--add_id_to_obs is set to 'true'\n"
info: null
default:
- "sample_id"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--add_id_make_observation_keys_unique"
description: "Join the id to the .obs index (.obs_names). \nOnly used when --add_id_to_obs\
\ is set to 'true'.\n"
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Spatial Neighbors Calculation"
description: "Options for the calculation of the spatial neighborhood graph.\n"
arguments:
- type: "string"
name: "--coord_type"
description: "Type of coordinate system provided by `--input_obsm_spatial_coords`.\
\ Valid options are:\n`grid` - grid coordinates.\n`generic` - generic coordinates.\n\
If not provided, `grid` is used if `--input_obsm_spatial_coords` is in --input\
\ .uns with `--n_neighs` = 6 (Visium), otherwise `generic` is used.\n"
info: null
required: false
choices:
- "generic"
- "grid"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_spatial_neighbors"
description: "Depending on `--coord_type`:\n`grid` - number of neighboring tiles.\n\
`generic` - number of neighborhoods for non-grid data. Only used when `--delaunay\
\ False`.\n"
info: null
default:
- 6
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--delaunay"
description: "Whether to use Delaunay triangulation to determine spatial neighborhood\
\ graph.\nOnly used when `--coord_type generic`.\n"
info: null
default:
- false
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Gene Program Mask"
description: "Options for filtering gene programs based on the number of genes available\
\ in the data."
arguments:
- type: "integer"
name: "--min_genes_per_gp"
description: "Minimum number of genes in a gene program inluding both target and\
\ source genes that need to be available in the input data (gene expression\
\ has been probed) for a gene program not to be discarded.\n"
info: null
default:
- 1
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_source_genes_per_gp"
description: "Minimum number of source genes in a gene program that need to be\
\ available in the input data (gene expression has been probed) for a gene program\
\ not to be discarded.\n"
info: null
default:
- 0
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--min_target_genes_per_gp"
description: "Minimum number of target genes in a gene program that need to be\
\ available in the input data (gene expression has been probed) for a gene program\
\ not to be discarded.\n"
info: null
default:
- 0
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_genes_per_gp"
description: "Maximum number of genes in a gene program inluding both target and\
\ source genes that can be available in the input data (gene expression has\
\ been probed) for a gene program not to be discarded.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_source_genes_per_gp"
description: "Maximum number of source genes in a gene program that can be available\
\ in the input data (gene expression has been probed) for a gene program not\
\ to be discarded.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--max_target_genes_per_gp"
description: "Maximum number of target genes in a gene program that can be available\
\ in the input data (gene expression has been probed) for a gene program not\
\ to be discarded.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--filter_genes_not_in_masks"
description: "Whether to remove the genes that are not in the gp masks from the\
\ input data.\n"
info: null
direction: "input"
- name: "NicheCompass Model Architecture"
description: "Options for the NicheCompass model architecture."
arguments:
- type: "boolean"
name: "--covariate_edges"
description: "List of booleans that indicate whether there can be edges between\
\ different categories of the categorical covariates.\nIf this is `True` for\
\ a specific categorical covariate, this covariate will be excluded from the\
\ edge reconstruction loss.\nNeeds to match the length and order of `--input_obs_covariates`.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "string"
name: "--gene_expr_recon_dist"
description: "The distribution used for gene expression reconstruction. \nIf `nb`,\
\ uses a negative binomial distribution. \nIf `zinb`, uses a zero-inflated negative\
\ binomial distribution.\n"
info: null
default:
- "nb"
required: false
choices:
- "nb"
- "zinb"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean"
name: "--log_variational"
description: "Whether to transform x by log(x+1) prior to encoding for numerical\
\ stability (not for normalization).\n"
info: null
default:
- true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--node_label_method"
description: "Node label method that will be used for omics reconstruction.\n\
If `one-hop-sum`, uses a concatenation of the node's input features with the\
\ sum of the input features of all nodes in the node's one-hop neighborhood.\n\
If `one-hop-norm`, uses a concatenation of the node's input features with the\
\ node's one-hop neighbors input features normalized as per Kipf, T. N. & Welling,\
\ M. Semi-Supervised Classification with Graph Convolutional Networks. arXiv\
\ [cs.LG] (2016).\nIf `one-hop-attention`, uses a concatenation of the node's\
\ input features with the node's one-hop neighbors input features weighted by\
\ an attention mechanism.\n"
info: null
default:
- "one-hop-norm"
required: false
choices:
- "one-hop-norm"
- "two-hop-norm"
- "one-hop-attention"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--active_gp_thresh_ratio"
description: "Ratio that determines which gene programs are considered active\
\ and are used in the latent representation after model training.\nAll inactive\
\ gene programs will be dropped during model training after a determined number\
\ of epochs.\nAggregations of the absolute values of the gene weights of the\
\ gene expression decoder per gene program are calculated.\nThe maximum value,\
\ i.e. the value of the gene program with the highest aggregated value will\
\ be used as a benchmark and all gene programs whose aggregated value is smaller\
\ than `--active_gp_thresh_ratio` times this maximum value will be set to inactive.\n\
If set to 0, all gene programs will be considered active.\n"
info: null
default:
- 0.1
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--active_gp_type"
description: "Type to determine active gene programs. \nCan be `mixed`, in which\
\ case active gene programs are determined across prior and add-on gene programs\
\ jointly,\nor `separate` in which case they are determined separately for prior\
\ and add-on gene programs.\n"
info: null
default:
- "separate"
required: false
choices:
- "mixed"
- "separate"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_addon_gp"
description: "Number of addon gene programs (i.e. gene programs that are not included\
\ in masks but can be learned de novo).\n"
info: null
default:
- 100
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--cat_covariates_embeds_nums"
description: "Number of embedding nodes for all categorical covariates.\nMust\
\ be the same length as `--input_obs_covariates`.\n"
info: null
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "integer"
name: "--random_state"
description: "Random seed for reproducibility.\n"
info: null
default:
- 0
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- name: "NicheCompass Training Parameters"
description: "Options for training the NicheCompass model."
arguments:
- type: "integer"
name: "--n_epochs"
description: "Number of training epochs"
info: null
default:
- 100
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_epochs_all_gps"
description: "Number of epochs during which all gene programs are used for model\
\ training.\nAfter that only active gene programs are retained.\n"
info: null
default:
- 25
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_epochs_no_edge_recon"
description: "Number of epochs during which the edge reconstruction loss is excluded\
\ from backpropagation for pretraining using the other loss components.\n"
info: null
default:
- 0
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_epochs_no_cat_covariates_contrastive"
description: "Number of epochs during which the categorical covariates contrastive\
\ loss is excluded from backpropagation for pretraining using the other loss\
\ components.\n"
info: null
default:
- 5
required: false
min: 0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--lr"
description: "Learning rate"
info: null
default:
- 0.001
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--weight_decay"
description: "Weight decay (L2 penalty)."
info: null
default:
- 0.001
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--edge_val_ratio"
description: "Fraction of the data that is used as validation set on edge-level.\
\ The rest of the data will be used as training set on edge-level.\n"
info: null
default:
- 0.1
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--node_val_ratio"
description: "Fraction of the data that is used as validation set on node-level.\
\ The rest of the data will be used as training set on node-level.\n"
info: null
default:
- 0.1
required: false
min: 0.0
max: 1.0
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--edge_batch_size"
description: "Batch size for the edge-level dataloaders.\n"
info: null
default:
- 256
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--node_batch_size"
description: "Batch size for the node-level dataloaders.\nIf not provided, is\
\ automatically determined based on `--edge_batch_size`.\n"
info: null
required: false
min: 1
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--n_sampled_neighbors"
description: "Number of neighbors that are sampled during model training from\
\ the spatial neighborhood graph.\nIf set to -1, all direct neighbors are included.\n"
info: null
default:
- -1
required: false
min: -1
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Clustering options"
arguments:
- type: "string"
name: "--obs_cluster"
description: "Prefix for the .obs keys under which to add the cluster labels.\
\ Newly created columns in .obs will \nbe created from the specified value for\
\ '--obs_cluster' suffixed with an underscore and one of the resolutions\nresolutions\
\ specified in '--leiden_resolution'.\n"
info: null
default:
- "nichecompass_leiden"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--leiden_resolution"
description: "Control the coarseness of the clustering. Higher values lead to\
\ more clusters."
info: null
default:
- 1.0
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- name: "Umap options"
arguments:
- type: "string"
name: "--obsm_umap"
description: "In which .obsm slot to store the resulting UMAP embedding."
info: null
default:
- "X_leiden_nichecompass_umap"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Neighbour calculation"
arguments:
- type: "string"
name: "--uns_neighbors"
description: "In which .uns slot to store various neighbor output objects."
info: null
default:
- "nichecompass_neighbors"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--obsp_neighbor_distances"
description: "In which .obsp slot to store the distance matrix between the resulting\
\ neighbors."
info: null
default:
- "nichecompass_distances"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--obsp_neighbor_connectivities"
description: "In which .obsp slot to store the connectivities matrix between the\
\ resulting neighbors."
info: null
default:
- "nichecompass_connectivities"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Outputs"
arguments:
- type: "file"
name: "--output"
description: "Destination path to the output."
info: null
example:
- "output.h5mu"
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--output_model"
description: "Directory to save the trained NicheCompass model."
info: null
must_exist: true
create_parent: true
required: true
direction: "output"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--output_obsm_embedding"
description: "Key of the obsm field where the latent / gene program representation\
\ of active gene programs will be stored after NicheCompass model training.\n"
info: null
default:
- "nichecompass_latent"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "nextflow_script"
path: "main.nf"
is_executable: true
entrypoint: "run_wf"
- type: "file"
path: "nextflow_labels.config"
dest: "nextflow_labels.config"
description: "A pipeline to compute the spatial neighborhood graph, perform nichecompass\
\ embedding followed by Leiden clustering."
test_resources:
- type: "nextflow_script"
path: "test.nf"
is_executable: true
entrypoint: "test_wf"
- type: "file"
path: "xenium_tiny.h5mu"
- type: "file"
path: "Lung5_Rep2_tiny.h5mu"
- type: "file"
path: "prior_knowledge_gp_mask.json"
info:
test_dependencies:
- name: "nichecompass_leiden_test"
namespace: "test_workflows/niche"
status: "enabled"
scope:
image: "public"
target: "public"
dependencies:
- name: "dataflow/obsp_block_concatenation"
repository:
type: "local"
- name: "neighbors/spatial_neighborhood_graph"
repository:
type: "local"
- name: "nichecompass/nichecompass"
repository:
type: "local"
- name: "metadata/add_id"
repository:
type: "vsh"
repo: "openpipeline"
tag: "v3.0.0"
- name: "workflows/multiomics/neighbors_leiden_umap"
repository:
type: "vsh"
repo: "openpipeline"
tag: "v3.0.0"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"
runners:
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
script:
- "includeConfig(\"nextflow_labels.config\")"
debug: false
container: "docker"
engines:
- type: "native"
id: "native"
build_info:
config: "src/workflows/niche/nichecompass_leiden/config.vsh.yaml"
runner: "nextflow"
engine: "native"
output: "target/nextflow/workflows/niche/nichecompass_leiden"
executable: "target/nextflow/workflows/niche/nichecompass_leiden/main.nf"
viash_version: "0.9.4"
git_commit: "9151204629228da14d7c82f49f24c607efb9251e"
git_remote: "https://github.com/openpipelines-bio/openpipeline_spatial"
dependencies:
- "target/nextflow/dataflow/obsp_block_concatenation"
- "target/nextflow/neighbors/spatial_neighborhood_graph"
- "target/nextflow/nichecompass/nichecompass"
- "target/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/metadata/add_id"
- "target/dependencies/vsh/vsh/openpipeline/v3.0.0/nextflow/workflows/multiomics/neighbors_leiden_umap"
package_config:
name: "openpipeline_spatial"
version: "niche-compass"
info:
test_resources:
- type: "s3"
path: "s3://openpipelines-bio/openpipeline_spatial/resources_test"
dest: "resources_test"
repositories:
- type: "vsh"
name: "openpipeline"
repo: "openpipeline"
tag: "v3.0.0"
viash_version: "0.9.4"
source: "src"
target: "target"
config_mods:
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
)'"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'niche-compass'"
organization: "vsh"
links:
repository: "https://github.com/openpipelines-bio/openpipeline_spatial"
docker_registry: "ghcr.io"

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More