Build branch main with version main (173327cc)
Build pipeline: vsh-ci-build-template-k4qzr
Source commit: 173327cc56
Source message: Cellranger multi conversion: fix combined AB + CB probe experiments (#1062)
This commit is contained in:
322
target/executable/integrate/harmonypy/.config.vsh.yaml
Normal file
322
target/executable/integrate/harmonypy/.config.vsh.yaml
Normal file
@@ -0,0 +1,322 @@
|
||||
name: "harmonypy"
|
||||
namespace: "integrate"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Robrecht Cannoodt"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "robrecht@data-intuitive.com"
|
||||
github: "rcannood"
|
||||
orcid: "0000-0003-3641-729X"
|
||||
linkedin: "robrechtcannoodt"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Science Engineer"
|
||||
- name: "Open Problems"
|
||||
href: "https://openproblems.bio"
|
||||
role: "Core Member"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "Input h5mu file"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
alternatives:
|
||||
- "-o"
|
||||
description: "Output h5mu file."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--modality"
|
||||
description: "Which modality from the input MuData file to process.\n"
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_input"
|
||||
description: "Which .obsm slot to use as a starting PCA embedding."
|
||||
info: null
|
||||
default:
|
||||
- "X_pca"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_output"
|
||||
description: "In which .obsm slot to store the resulting integrated embedding."
|
||||
info: null
|
||||
default:
|
||||
- "X_pca_integrated"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--theta"
|
||||
description: "Diversity clustering penalty parameter. Can be set as a single value\
|
||||
\ for all batch observations or as multiple values, one for each observation\
|
||||
\ in the batches defined by --obs_covariates. theta=0 does not encourage any\
|
||||
\ diversity. Larger values of theta result in more diverse clusters."
|
||||
info: null
|
||||
default:
|
||||
- 2.0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_covariates"
|
||||
description: "The .obs field(s) that define the covariate(s) to regress out."
|
||||
info: null
|
||||
example:
|
||||
- "batch"
|
||||
- "sample"
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "setup_logger.py"
|
||||
- type: "file"
|
||||
path: "compress_h5mu.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Performs Harmony integration based as described in https://github.com/immunogenomics/harmony.\
|
||||
\ Based on an implementation in python from https://github.com/slowkow/harmonypy"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "pbmc_1k_protein_v3_mms.h5mu"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
label:
|
||||
- "highmem"
|
||||
- "highcpu"
|
||||
- "highdisk"
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.11.1"
|
||||
- "mudata~=0.3.1"
|
||||
- "scanpy~=1.10.4"
|
||||
- "harmonypy~=0.0.6"
|
||||
script:
|
||||
- "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\
|
||||
nelse: exit(1)\")"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "git"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/integrate/harmonypy/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integrate/harmonypy"
|
||||
executable: "target/executable/integrate/harmonypy/harmonypy"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
git_tag: "0.2.0-2055-g173327cc"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "main"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
\nIn terms of workflows, the following has been made available, but keep in mind\
|
||||
\ that\nindividual tools and functionality can be executed as standalone components\
|
||||
\ as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
|
||||
\ * Ingestion: Read mapping and generating a count matrix.\n * Single sample\
|
||||
\ processing: cell filtering and doublet detection.\n * Multisample processing:\
|
||||
\ Count transformation, normalization, QC metric calulations.\n * Integration:\
|
||||
\ Clustering, integration and batch correction using single and multimodal methods.\n\
|
||||
\ * Downstream analysis workflows\n"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
nextflow_labels_ci:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
|
||||
)'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
87
target/executable/integrate/harmonypy/compress_h5mu.py
Normal file
87
target/executable/integrate/harmonypy/compress_h5mu.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import shutil
|
||||
from anndata import AnnData
|
||||
from mudata import write_h5ad
|
||||
from h5py import File as H5File
|
||||
from h5py import Group, Dataset
|
||||
from pathlib import Path
|
||||
from typing import Union, Literal
|
||||
from functools import partial
|
||||
|
||||
|
||||
def compress_h5mu(
|
||||
input_path: Union[str, Path],
|
||||
output_path: Union[str, Path],
|
||||
compression: Union[Literal["gzip"], Literal["lzf"]],
|
||||
):
|
||||
input_path, output_path = str(input_path), str(output_path)
|
||||
|
||||
def copy_attributes(in_object, out_object):
|
||||
for key, value in in_object.attrs.items():
|
||||
out_object.attrs[key] = value
|
||||
|
||||
def visit_path(
|
||||
output_h5: H5File,
|
||||
compression: Union[Literal["gzip"], Literal["lzf"]],
|
||||
name: str,
|
||||
object: Union[Group, Dataset],
|
||||
):
|
||||
if isinstance(object, Group):
|
||||
new_group = output_h5.create_group(name)
|
||||
copy_attributes(object, new_group)
|
||||
elif isinstance(object, Dataset):
|
||||
# Compression only works for non-scalar Dataset objects
|
||||
# Scalar objects dont have a shape defined
|
||||
if not object.compression and object.shape not in [None, ()]:
|
||||
new_dataset = output_h5.create_dataset(
|
||||
name, data=object, compression=compression
|
||||
)
|
||||
copy_attributes(object, new_dataset)
|
||||
else:
|
||||
output_h5.copy(object, name)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Could not copy element {name}, "
|
||||
f"type has not been implemented yet: {type(object)}"
|
||||
)
|
||||
|
||||
with (
|
||||
H5File(input_path, "r") as input_h5,
|
||||
H5File(output_path, "w", userblock_size=512) as output_h5,
|
||||
):
|
||||
copy_attributes(input_h5, output_h5)
|
||||
input_h5.visititems(partial(visit_path, output_h5, compression))
|
||||
|
||||
with open(input_path, "rb") as input_bytes:
|
||||
# Mudata puts metadata like this in the first 512 bytes:
|
||||
# MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
|
||||
# See mudata/_core/io.py, read_h5mu() function
|
||||
starting_metadata = input_bytes.read(100)
|
||||
# The metadata is padded with extra null bytes up until 512 bytes
|
||||
truncate_location = starting_metadata.find(b"\x00")
|
||||
starting_metadata = starting_metadata[:truncate_location]
|
||||
with open(output_path, "br+") as f:
|
||||
nbytes = f.write(starting_metadata)
|
||||
f.write(b"\0" * (512 - nbytes))
|
||||
|
||||
|
||||
def write_h5ad_to_h5mu_with_compression(
|
||||
output_file: Union[str, Path],
|
||||
h5mu: Union[str, Path],
|
||||
modality_name: str,
|
||||
modality_data: AnnData,
|
||||
output_compression=None,
|
||||
):
|
||||
output_file = Path(output_file)
|
||||
h5mu = Path(h5mu)
|
||||
output_file_uncompressed = (
|
||||
output_file.with_name(output_file.stem + "_uncompressed.h5mu")
|
||||
if output_compression
|
||||
else output_file
|
||||
)
|
||||
shutil.copyfile(h5mu, output_file_uncompressed)
|
||||
write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
|
||||
if output_compression:
|
||||
compress_h5mu(
|
||||
output_file_uncompressed, output_file, compression=output_compression
|
||||
)
|
||||
output_file_uncompressed.unlink()
|
||||
1330
target/executable/integrate/harmonypy/harmonypy
Executable file
1330
target/executable/integrate/harmonypy/harmonypy
Executable file
File diff suppressed because it is too large
Load Diff
48
target/executable/integrate/harmonypy/nextflow_labels.config
Normal file
48
target/executable/integrate/harmonypy/nextflow_labels.config
Normal file
@@ -0,0 +1,48 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
|
||||
// The memory a task is assinged increases with each attempt
|
||||
// uncomment the line below and adjust the value to set a global upper limit on the memory.
|
||||
// resourceLimits = [ memory: 240.Gb ]
|
||||
|
||||
// CPU resources
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
// Memory resources
|
||||
withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
|
||||
withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
|
||||
withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
|
||||
withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
|
||||
|
||||
// Disk space
|
||||
withLabel: lowdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: middisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: highdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: veryhighdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
|
||||
// NOTE: The above labels intentionally do not have an effect by default.
|
||||
// The user should set the disk space requirements by adding the following
|
||||
// to the compute environment:
|
||||
//
|
||||
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
|
||||
// withLabel: middisk { disk = { 100.GB * task.attempt } }
|
||||
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
|
||||
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
|
||||
}
|
||||
12
target/executable/integrate/harmonypy/setup_logger.py
Normal file
12
target/executable/integrate/harmonypy/setup_logger.py
Normal file
@@ -0,0 +1,12 @@
|
||||
def setup_logger():
|
||||
import logging
|
||||
from sys import stdout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler(stdout)
|
||||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
||||
console_handler.setFormatter(logFormatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
360
target/executable/integrate/scanorama/.config.vsh.yaml
Normal file
360
target/executable/integrate/scanorama/.config.vsh.yaml
Normal file
@@ -0,0 +1,360 @@
|
||||
name: "scanorama"
|
||||
namespace: "integrate"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Dries De Maeyer"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "ddemaeyer@gmail.com"
|
||||
github: "ddemaeyer"
|
||||
linkedin: "dries-de-maeyer-b46a814"
|
||||
organizations:
|
||||
- name: "Janssen Pharmaceuticals"
|
||||
href: "https://www.janssen.com"
|
||||
role: "Principal Scientist"
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
argument_groups:
|
||||
- name: "Arguments"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "Input h5mu file"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--modality"
|
||||
description: "Which modality from the input MuData file to process.\n"
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
alternatives:
|
||||
- "-o"
|
||||
description: "Output .h5mu file"
|
||||
info: null
|
||||
default:
|
||||
- "output.h5ad"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_batch"
|
||||
description: "Column name discriminating between your batches."
|
||||
info: null
|
||||
default:
|
||||
- "batch"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_input"
|
||||
description: "Basis obsm slot to run scanorama on."
|
||||
info: null
|
||||
default:
|
||||
- "X_pca"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_output"
|
||||
description: "The name of the field in adata.obsm where the integrated embeddings\
|
||||
\ will be stored after running this function. Defaults to X_scanorama."
|
||||
info: null
|
||||
default:
|
||||
- "X_scanorama"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--knn"
|
||||
description: "Number of nearest neighbors to use for matching."
|
||||
info: null
|
||||
default:
|
||||
- 20
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--batch_size"
|
||||
description: "The batch size used in the alignment vector computation. Useful\
|
||||
\ when integrating very large (>100k samples) datasets. Set to large value that\
|
||||
\ runs within available memory."
|
||||
info: null
|
||||
default:
|
||||
- 5000
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--sigma"
|
||||
description: "Correction smoothing parameter on Gaussian kernel."
|
||||
info: null
|
||||
default:
|
||||
- 15.0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean"
|
||||
name: "--approx"
|
||||
description: "Use approximate nearest neighbors with Python annoy; greatly speeds\
|
||||
\ up matching runtime."
|
||||
info: null
|
||||
default:
|
||||
- true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--alpha"
|
||||
description: "Alignment score minimum cutoff"
|
||||
info: null
|
||||
default:
|
||||
- 0.1
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "setup_logger.py"
|
||||
- type: "file"
|
||||
path: "compress_h5mu.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Use Scanorama to integrate different experiments.\n"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "pbmc_1k_protein_v3_mms.h5mu"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
label:
|
||||
- "midcpu"
|
||||
- "highmem"
|
||||
- "highdisk"
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "python:3.12-slim"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "procps"
|
||||
- "build-essential"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.11.1"
|
||||
- "mudata~=0.3.1"
|
||||
- "scanpy~=1.10.4"
|
||||
- "scanorama"
|
||||
script:
|
||||
- "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\
|
||||
nelse: exit(1)\")"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "git"
|
||||
interactive: false
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
github:
|
||||
- "openpipelines-bio/core#subdirectory=packages/python/openpipeline_testutils"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/integrate/scanorama/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integrate/scanorama"
|
||||
executable: "target/executable/integrate/scanorama/scanorama"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
git_tag: "0.2.0-2055-g173327cc"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "main"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
\nIn terms of workflows, the following has been made available, but keep in mind\
|
||||
\ that\nindividual tools and functionality can be executed as standalone components\
|
||||
\ as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
|
||||
\ * Ingestion: Read mapping and generating a count matrix.\n * Single sample\
|
||||
\ processing: cell filtering and doublet detection.\n * Multisample processing:\
|
||||
\ Count transformation, normalization, QC metric calulations.\n * Integration:\
|
||||
\ Clustering, integration and batch correction using single and multimodal methods.\n\
|
||||
\ * Downstream analysis workflows\n"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
nextflow_labels_ci:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
|
||||
)'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
87
target/executable/integrate/scanorama/compress_h5mu.py
Normal file
87
target/executable/integrate/scanorama/compress_h5mu.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import shutil
|
||||
from anndata import AnnData
|
||||
from mudata import write_h5ad
|
||||
from h5py import File as H5File
|
||||
from h5py import Group, Dataset
|
||||
from pathlib import Path
|
||||
from typing import Union, Literal
|
||||
from functools import partial
|
||||
|
||||
|
||||
def compress_h5mu(
|
||||
input_path: Union[str, Path],
|
||||
output_path: Union[str, Path],
|
||||
compression: Union[Literal["gzip"], Literal["lzf"]],
|
||||
):
|
||||
input_path, output_path = str(input_path), str(output_path)
|
||||
|
||||
def copy_attributes(in_object, out_object):
|
||||
for key, value in in_object.attrs.items():
|
||||
out_object.attrs[key] = value
|
||||
|
||||
def visit_path(
|
||||
output_h5: H5File,
|
||||
compression: Union[Literal["gzip"], Literal["lzf"]],
|
||||
name: str,
|
||||
object: Union[Group, Dataset],
|
||||
):
|
||||
if isinstance(object, Group):
|
||||
new_group = output_h5.create_group(name)
|
||||
copy_attributes(object, new_group)
|
||||
elif isinstance(object, Dataset):
|
||||
# Compression only works for non-scalar Dataset objects
|
||||
# Scalar objects dont have a shape defined
|
||||
if not object.compression and object.shape not in [None, ()]:
|
||||
new_dataset = output_h5.create_dataset(
|
||||
name, data=object, compression=compression
|
||||
)
|
||||
copy_attributes(object, new_dataset)
|
||||
else:
|
||||
output_h5.copy(object, name)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Could not copy element {name}, "
|
||||
f"type has not been implemented yet: {type(object)}"
|
||||
)
|
||||
|
||||
with (
|
||||
H5File(input_path, "r") as input_h5,
|
||||
H5File(output_path, "w", userblock_size=512) as output_h5,
|
||||
):
|
||||
copy_attributes(input_h5, output_h5)
|
||||
input_h5.visititems(partial(visit_path, output_h5, compression))
|
||||
|
||||
with open(input_path, "rb") as input_bytes:
|
||||
# Mudata puts metadata like this in the first 512 bytes:
|
||||
# MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
|
||||
# See mudata/_core/io.py, read_h5mu() function
|
||||
starting_metadata = input_bytes.read(100)
|
||||
# The metadata is padded with extra null bytes up until 512 bytes
|
||||
truncate_location = starting_metadata.find(b"\x00")
|
||||
starting_metadata = starting_metadata[:truncate_location]
|
||||
with open(output_path, "br+") as f:
|
||||
nbytes = f.write(starting_metadata)
|
||||
f.write(b"\0" * (512 - nbytes))
|
||||
|
||||
|
||||
def write_h5ad_to_h5mu_with_compression(
|
||||
output_file: Union[str, Path],
|
||||
h5mu: Union[str, Path],
|
||||
modality_name: str,
|
||||
modality_data: AnnData,
|
||||
output_compression=None,
|
||||
):
|
||||
output_file = Path(output_file)
|
||||
h5mu = Path(h5mu)
|
||||
output_file_uncompressed = (
|
||||
output_file.with_name(output_file.stem + "_uncompressed.h5mu")
|
||||
if output_compression
|
||||
else output_file
|
||||
)
|
||||
shutil.copyfile(h5mu, output_file_uncompressed)
|
||||
write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
|
||||
if output_compression:
|
||||
compress_h5mu(
|
||||
output_file_uncompressed, output_file, compression=output_compression
|
||||
)
|
||||
output_file_uncompressed.unlink()
|
||||
48
target/executable/integrate/scanorama/nextflow_labels.config
Normal file
48
target/executable/integrate/scanorama/nextflow_labels.config
Normal file
@@ -0,0 +1,48 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
|
||||
// The memory a task is assinged increases with each attempt
|
||||
// uncomment the line below and adjust the value to set a global upper limit on the memory.
|
||||
// resourceLimits = [ memory: 240.Gb ]
|
||||
|
||||
// CPU resources
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
// Memory resources
|
||||
withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
|
||||
withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
|
||||
withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
|
||||
withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
|
||||
|
||||
// Disk space
|
||||
withLabel: lowdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: middisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: highdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: veryhighdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
|
||||
// NOTE: The above labels intentionally do not have an effect by default.
|
||||
// The user should set the disk space requirements by adding the following
|
||||
// to the compute environment:
|
||||
//
|
||||
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
|
||||
// withLabel: middisk { disk = { 100.GB * task.attempt } }
|
||||
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
|
||||
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
|
||||
}
|
||||
1418
target/executable/integrate/scanorama/scanorama
Executable file
1418
target/executable/integrate/scanorama/scanorama
Executable file
File diff suppressed because it is too large
Load Diff
12
target/executable/integrate/scanorama/setup_logger.py
Normal file
12
target/executable/integrate/scanorama/setup_logger.py
Normal file
@@ -0,0 +1,12 @@
|
||||
def setup_logger():
|
||||
import logging
|
||||
from sys import stdout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler(stdout)
|
||||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
||||
console_handler.setFormatter(logFormatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
482
target/executable/integrate/scarches/.config.vsh.yaml
Normal file
482
target/executable/integrate/scarches/.config.vsh.yaml
Normal file
@@ -0,0 +1,482 @@
|
||||
name: "scarches"
|
||||
namespace: "integrate"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Vladimir Shitov"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
role: "Contributor"
|
||||
links:
|
||||
email: "vladimir.shitov@helmholtz-muenchen.de"
|
||||
github: "vladimirshitov"
|
||||
orcid: "0000-0002-1960-8812"
|
||||
linkedin: "vladimir-shitov-9a659513b"
|
||||
organizations:
|
||||
- name: "Helmholtz Munich"
|
||||
href: "https://www.helmholtz-munich.de"
|
||||
role: "PhD Candidate"
|
||||
- name: "Dorien Roosen"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "dorien@data-intuitive.com"
|
||||
github: "dorien-er"
|
||||
linkedin: "dorien-roosen"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
argument_groups:
|
||||
- name: "Inputs"
|
||||
description: "Arguments related to the input (query) dataset"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "Input h5mu file to use as a query"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--layer"
|
||||
description: "Layer to be used for scArches, if .X is not to be used."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--modality"
|
||||
description: "Which modality from the input MuData file to process.\n"
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_obs_batch"
|
||||
description: "Name of the .obs column with batch information."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_obs_label"
|
||||
description: "Name of the .obs column with celltype information."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_var_gene_names"
|
||||
description: "Name of the .var column with gene names, if the var .index is not\
|
||||
\ to be used."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_obs_size_factor"
|
||||
description: "Key in adata.obs for size factor information. Instead of using library\
|
||||
\ size as a size factor,\nthe provided size factor column will be used as offset\
|
||||
\ in the mean of the likelihood.\nAssumed to be on linear scale.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Reference"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--reference"
|
||||
alternatives:
|
||||
- "-r"
|
||||
description: "Path to the directory with reference model or a web link."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_class"
|
||||
description: "For legacy models; the type of model (where the type of model was\
|
||||
\ not saved with it; e.g. when they were generated with scvi-tools versions\
|
||||
\ < 0.15)."
|
||||
info: null
|
||||
example:
|
||||
- "SCANVI"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
alternatives:
|
||||
- "-o"
|
||||
description: "Output h5mu file."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--model_output"
|
||||
description: "Output directory for model"
|
||||
info: null
|
||||
default:
|
||||
- "model"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_output"
|
||||
description: "In which .obsm slot to store the resulting integrated embedding."
|
||||
info: null
|
||||
default:
|
||||
- "X_integrated_scanvi"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_output_predictions"
|
||||
description: "In which .obs slot to store the resulting label predictions. Only\
|
||||
\ relevant if a scANVI model was provided."
|
||||
info: null
|
||||
default:
|
||||
- "scanvi_pred"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_output_probabilities"
|
||||
description: "In which .obs slot to store the probabilities of the label predictions.\
|
||||
\ Only relevant if a scANVI model was provided."
|
||||
info: null
|
||||
default:
|
||||
- "scanvi_proba"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Early stopping arguments"
|
||||
arguments:
|
||||
- type: "boolean"
|
||||
name: "--early_stopping"
|
||||
description: "Whether to perform early stopping with respect to the validation\
|
||||
\ set."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--early_stopping_monitor"
|
||||
description: "Metric logged during validation set epoch."
|
||||
info: null
|
||||
default:
|
||||
- "elbo_validation"
|
||||
required: false
|
||||
choices:
|
||||
- "elbo_validation"
|
||||
- "reconstruction_loss_validation"
|
||||
- "kl_local_validation"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--early_stopping_patience"
|
||||
description: "Number of validation epochs with no improvement after which training\
|
||||
\ will be stopped."
|
||||
info: null
|
||||
default:
|
||||
- 45
|
||||
required: false
|
||||
min: 1
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--early_stopping_min_delta"
|
||||
description: "Minimum change in the monitored quantity to qualify as an improvement,\
|
||||
\ i.e. an absolute change of less than min_delta, will count as no improvement."
|
||||
info: null
|
||||
default:
|
||||
- 0.0
|
||||
required: false
|
||||
min: 0.0
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Learning parameters"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--max_epochs"
|
||||
description: "Number of passes through the dataset, defaults to (20000 / number\
|
||||
\ of cells) * 400 or 400; whichever is smallest."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean"
|
||||
name: "--reduce_lr_on_plateau"
|
||||
description: "Whether to monitor validation loss and reduce learning rate when\
|
||||
\ validation set `lr_scheduler_metric` plateaus."
|
||||
info: null
|
||||
default:
|
||||
- true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--lr_factor"
|
||||
description: "Factor to reduce learning rate."
|
||||
info: null
|
||||
default:
|
||||
- 0.6
|
||||
required: false
|
||||
min: 0.0
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--lr_patience"
|
||||
description: "Number of epochs with no improvement after which learning rate will\
|
||||
\ be reduced."
|
||||
info: null
|
||||
default:
|
||||
- 30.0
|
||||
required: false
|
||||
min: 0.0
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "setup_logger.py"
|
||||
- type: "file"
|
||||
path: "compress_h5mu.py"
|
||||
- type: "file"
|
||||
path: "set_var_index.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Performs reference mapping with scArches"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "scanvi_model"
|
||||
- type: "file"
|
||||
path: "scvi_model"
|
||||
- type: "file"
|
||||
path: "pbmc_1k_protein_v3_mms.h5mu"
|
||||
- type: "file"
|
||||
path: "HLCA_reference_model.zip"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
label:
|
||||
- "highmem"
|
||||
- "highcpu"
|
||||
- "highdisk"
|
||||
- "gpu"
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "nvcr.io/nvidia/pytorch:25.05-py3"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.11.1"
|
||||
- "mudata~=0.3.1"
|
||||
- "jax[cuda]"
|
||||
- "scvi-tools~=1.3.1"
|
||||
script:
|
||||
- "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\
|
||||
nelse: exit(1)\")"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/integrate/scarches/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integrate/scarches"
|
||||
executable: "target/executable/integrate/scarches/scarches"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
git_tag: "0.2.0-2055-g173327cc"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "main"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
\nIn terms of workflows, the following has been made available, but keep in mind\
|
||||
\ that\nindividual tools and functionality can be executed as standalone components\
|
||||
\ as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
|
||||
\ * Ingestion: Read mapping and generating a count matrix.\n * Single sample\
|
||||
\ processing: cell filtering and doublet detection.\n * Multisample processing:\
|
||||
\ Count transformation, normalization, QC metric calulations.\n * Integration:\
|
||||
\ Clustering, integration and batch correction using single and multimodal methods.\n\
|
||||
\ * Downstream analysis workflows\n"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
nextflow_labels_ci:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
|
||||
)'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
87
target/executable/integrate/scarches/compress_h5mu.py
Normal file
87
target/executable/integrate/scarches/compress_h5mu.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import shutil
|
||||
from anndata import AnnData
|
||||
from mudata import write_h5ad
|
||||
from h5py import File as H5File
|
||||
from h5py import Group, Dataset
|
||||
from pathlib import Path
|
||||
from typing import Union, Literal
|
||||
from functools import partial
|
||||
|
||||
|
||||
def compress_h5mu(
|
||||
input_path: Union[str, Path],
|
||||
output_path: Union[str, Path],
|
||||
compression: Union[Literal["gzip"], Literal["lzf"]],
|
||||
):
|
||||
input_path, output_path = str(input_path), str(output_path)
|
||||
|
||||
def copy_attributes(in_object, out_object):
|
||||
for key, value in in_object.attrs.items():
|
||||
out_object.attrs[key] = value
|
||||
|
||||
def visit_path(
|
||||
output_h5: H5File,
|
||||
compression: Union[Literal["gzip"], Literal["lzf"]],
|
||||
name: str,
|
||||
object: Union[Group, Dataset],
|
||||
):
|
||||
if isinstance(object, Group):
|
||||
new_group = output_h5.create_group(name)
|
||||
copy_attributes(object, new_group)
|
||||
elif isinstance(object, Dataset):
|
||||
# Compression only works for non-scalar Dataset objects
|
||||
# Scalar objects dont have a shape defined
|
||||
if not object.compression and object.shape not in [None, ()]:
|
||||
new_dataset = output_h5.create_dataset(
|
||||
name, data=object, compression=compression
|
||||
)
|
||||
copy_attributes(object, new_dataset)
|
||||
else:
|
||||
output_h5.copy(object, name)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Could not copy element {name}, "
|
||||
f"type has not been implemented yet: {type(object)}"
|
||||
)
|
||||
|
||||
with (
|
||||
H5File(input_path, "r") as input_h5,
|
||||
H5File(output_path, "w", userblock_size=512) as output_h5,
|
||||
):
|
||||
copy_attributes(input_h5, output_h5)
|
||||
input_h5.visititems(partial(visit_path, output_h5, compression))
|
||||
|
||||
with open(input_path, "rb") as input_bytes:
|
||||
# Mudata puts metadata like this in the first 512 bytes:
|
||||
# MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
|
||||
# See mudata/_core/io.py, read_h5mu() function
|
||||
starting_metadata = input_bytes.read(100)
|
||||
# The metadata is padded with extra null bytes up until 512 bytes
|
||||
truncate_location = starting_metadata.find(b"\x00")
|
||||
starting_metadata = starting_metadata[:truncate_location]
|
||||
with open(output_path, "br+") as f:
|
||||
nbytes = f.write(starting_metadata)
|
||||
f.write(b"\0" * (512 - nbytes))
|
||||
|
||||
|
||||
def write_h5ad_to_h5mu_with_compression(
|
||||
output_file: Union[str, Path],
|
||||
h5mu: Union[str, Path],
|
||||
modality_name: str,
|
||||
modality_data: AnnData,
|
||||
output_compression=None,
|
||||
):
|
||||
output_file = Path(output_file)
|
||||
h5mu = Path(h5mu)
|
||||
output_file_uncompressed = (
|
||||
output_file.with_name(output_file.stem + "_uncompressed.h5mu")
|
||||
if output_compression
|
||||
else output_file
|
||||
)
|
||||
shutil.copyfile(h5mu, output_file_uncompressed)
|
||||
write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
|
||||
if output_compression:
|
||||
compress_h5mu(
|
||||
output_file_uncompressed, output_file, compression=output_compression
|
||||
)
|
||||
output_file_uncompressed.unlink()
|
||||
48
target/executable/integrate/scarches/nextflow_labels.config
Normal file
48
target/executable/integrate/scarches/nextflow_labels.config
Normal file
@@ -0,0 +1,48 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
|
||||
// The memory a task is assinged increases with each attempt
|
||||
// uncomment the line below and adjust the value to set a global upper limit on the memory.
|
||||
// resourceLimits = [ memory: 240.Gb ]
|
||||
|
||||
// CPU resources
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
// Memory resources
|
||||
withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
|
||||
withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
|
||||
withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
|
||||
withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
|
||||
|
||||
// Disk space
|
||||
withLabel: lowdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: middisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: highdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: veryhighdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
|
||||
// NOTE: The above labels intentionally do not have an effect by default.
|
||||
// The user should set the disk space requirements by adding the following
|
||||
// to the compute environment:
|
||||
//
|
||||
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
|
||||
// withLabel: middisk { disk = { 100.GB * task.attempt } }
|
||||
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
|
||||
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
|
||||
}
|
||||
2018
target/executable/integrate/scarches/scarches
Executable file
2018
target/executable/integrate/scarches/scarches
Executable file
File diff suppressed because it is too large
Load Diff
24
target/executable/integrate/scarches/set_var_index.py
Normal file
24
target/executable/integrate/scarches/set_var_index.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import anndata as ad
|
||||
import re
|
||||
|
||||
|
||||
def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
|
||||
"""Sanitize gene names and set the index of the .var DataFrame.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
adata : AnnData
|
||||
Annotated data object
|
||||
var_name : str | None
|
||||
Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AnnData
|
||||
Copy of `adata` with sanitized and replaced index
|
||||
"""
|
||||
if var_name:
|
||||
adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
|
||||
else:
|
||||
adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
|
||||
return adata
|
||||
12
target/executable/integrate/scarches/setup_logger.py
Normal file
12
target/executable/integrate/scarches/setup_logger.py
Normal file
@@ -0,0 +1,12 @@
|
||||
def setup_logger():
|
||||
import logging
|
||||
from sys import stdout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler(stdout)
|
||||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
||||
console_handler.setFormatter(logFormatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
638
target/executable/integrate/scvi/.config.vsh.yaml
Normal file
638
target/executable/integrate/scvi/.config.vsh.yaml
Normal file
@@ -0,0 +1,638 @@
|
||||
name: "scvi"
|
||||
namespace: "integrate"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Malte D. Luecken"
|
||||
roles:
|
||||
- "author"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "malte.luecken@helmholtz-muenchen.de"
|
||||
github: "LuckyMD"
|
||||
orcid: "0000-0001-7464-7921"
|
||||
linkedin: "malte-l%C3%BCcken-b8b21049"
|
||||
twitter: "MDLuecken"
|
||||
organizations:
|
||||
- name: "Helmholtz Munich"
|
||||
href: "https://www.helmholtz-munich.de"
|
||||
role: "Group Leader"
|
||||
- name: "Open Problems"
|
||||
href: "https://openproblems.bio"
|
||||
role: "Core Member"
|
||||
- name: "Dries Schaumont"
|
||||
roles:
|
||||
- "maintainer"
|
||||
info:
|
||||
role: "Core Team Member"
|
||||
links:
|
||||
email: "dries@data-intuitive.com"
|
||||
github: "DriesSchaumont"
|
||||
orcid: "0000-0002-4389-0440"
|
||||
linkedin: "dries-schaumont"
|
||||
organizations:
|
||||
- name: "Data Intuitive"
|
||||
href: "https://www.data-intuitive.com"
|
||||
role: "Data Scientist"
|
||||
- name: "Matthias Beyens"
|
||||
roles:
|
||||
- "contributor"
|
||||
info:
|
||||
role: "Contributor"
|
||||
links:
|
||||
github: "MatthiasBeyens"
|
||||
orcid: "0000-0003-3304-0706"
|
||||
email: "matthias.beyens@gmail.com"
|
||||
linkedin: "mbeyens"
|
||||
organizations:
|
||||
- name: "Janssen Pharmaceuticals"
|
||||
href: "https://www.janssen.com"
|
||||
role: "Principal Scientist"
|
||||
argument_groups:
|
||||
- name: "Inputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "Input h5mu file"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--modality"
|
||||
description: "Which modality from the input MuData file to process.\n"
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_layer"
|
||||
description: "Input layer to use. If None, X is used"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_batch"
|
||||
description: "Column name discriminating between your batches."
|
||||
info: null
|
||||
default:
|
||||
- "sample_id"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--var_gene_names"
|
||||
description: ".var column containing gene names. By default, use the index."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--var_input"
|
||||
description: ".var column containing highly variable genes. By default, do not\
|
||||
\ subset genes."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_labels"
|
||||
description: "Key in adata.obs for label information. Categories will automatically\
|
||||
\ be \nconverted into integer categories and saved to adata.obs['_scvi_labels'].\n\
|
||||
If None, assigns the same label to all the data.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_size_factor"
|
||||
description: "Key in adata.obs for size factor information. Instead of using library\
|
||||
\ size as a size factor,\nthe provided size factor column will be used as offset\
|
||||
\ in the mean of the likelihood.\nAssumed to be on linear scale.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_categorical_covariate"
|
||||
description: "Keys in adata.obs that correspond to categorical data. These covariates\
|
||||
\ can be added in\naddition to the batch covariate and are also treated as nuisance\
|
||||
\ factors\n(i.e., the model tries to minimize their effects on the latent space).\n\
|
||||
Thus, these should not be used for biologically-relevant factors that you do\
|
||||
\ _not_ want to correct for.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_continuous_covariate"
|
||||
description: "Keys in adata.obs that correspond to continuous data. These covariates\
|
||||
\ can be added in\naddition to the batch covariate and are also treated as nuisance\
|
||||
\ factors\n(i.e., the model tries to minimize their effects on the latent space).\
|
||||
\ Thus, these should not be\nused for biologically-relevant factors that you\
|
||||
\ do _not_ want to correct for.\n"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
alternatives:
|
||||
- "-o"
|
||||
description: "Output h5mu file."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--output_model"
|
||||
description: "Folder where the state of the trained model will be saved to."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_output"
|
||||
description: "In which .obsm slot to store the resulting integrated embedding."
|
||||
info: null
|
||||
default:
|
||||
- "X_scvi_integrated"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--output_compression"
|
||||
description: "Compression format to use for the output AnnData and/or Mudata objects.\n\
|
||||
By default no compression is applied.\n"
|
||||
info: null
|
||||
example:
|
||||
- "gzip"
|
||||
required: false
|
||||
choices:
|
||||
- "gzip"
|
||||
- "lzf"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "SCVI options"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--n_hidden_nodes"
|
||||
description: "Number of nodes per hidden layer."
|
||||
info: null
|
||||
default:
|
||||
- 128
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--n_dimensions_latent_space"
|
||||
description: "Dimensionality of the latent space."
|
||||
info: null
|
||||
default:
|
||||
- 30
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--n_hidden_layers"
|
||||
description: "Number of hidden layers used for encoder and decoder neural-networks."
|
||||
info: null
|
||||
default:
|
||||
- 2
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--dropout_rate"
|
||||
description: "Dropout rate for the neural networks."
|
||||
info: null
|
||||
default:
|
||||
- 0.1
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--dispersion"
|
||||
description: "Set the behavior for the dispersion for negative binomial distributions:\n\
|
||||
- gene: dispersion parameter of negative binomial is constant per gene across\
|
||||
\ cells\n- gene-batch: dispersion can differ between different batches\n- gene-label:\
|
||||
\ dispersion can differ between different labels\n- gene-cell: dispersion can\
|
||||
\ differ for every gene in every cell\n"
|
||||
info: null
|
||||
default:
|
||||
- "gene"
|
||||
required: false
|
||||
choices:
|
||||
- "gene"
|
||||
- "gene-batch"
|
||||
- "gene-label"
|
||||
- "gene-cell"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--gene_likelihood"
|
||||
description: "Model used to generate the expression data from a count-based likelihood\
|
||||
\ distribution.\n- nb: Negative binomial distribution\n- zinb: Zero-inflated\
|
||||
\ negative binomial distribution\n- poisson: Poisson distribution\n"
|
||||
info: null
|
||||
default:
|
||||
- "nb"
|
||||
required: false
|
||||
choices:
|
||||
- "nb"
|
||||
- "zinb"
|
||||
- "poisson"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Variational auto-encoder model options"
|
||||
arguments:
|
||||
- type: "string"
|
||||
name: "--use_layer_normalization"
|
||||
description: "Neural networks for which to enable layer normalization. \n"
|
||||
info: null
|
||||
default:
|
||||
- "both"
|
||||
required: false
|
||||
choices:
|
||||
- "encoder"
|
||||
- "decoder"
|
||||
- "none"
|
||||
- "both"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--use_batch_normalization"
|
||||
description: "Neural networks for which to enable batch normalization. \n"
|
||||
info: null
|
||||
default:
|
||||
- "none"
|
||||
required: false
|
||||
choices:
|
||||
- "encoder"
|
||||
- "decoder"
|
||||
- "none"
|
||||
- "both"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean_false"
|
||||
name: "--encode_covariates"
|
||||
description: "Whether to concatenate covariates to expression in encoder"
|
||||
info: null
|
||||
direction: "input"
|
||||
- type: "boolean_true"
|
||||
name: "--deeply_inject_covariates"
|
||||
description: "Whether to concatenate covariates into output of hidden layers in\
|
||||
\ encoder/decoder. \nThis option only applies when n_layers > 1. The covariates\
|
||||
\ are concatenated to\nthe input of subsequent hidden layers.\n"
|
||||
info: null
|
||||
direction: "input"
|
||||
- type: "boolean_true"
|
||||
name: "--use_observed_lib_size"
|
||||
description: "Use observed library size for RNA as scaling factor in mean of conditional\
|
||||
\ distribution.\n"
|
||||
info: null
|
||||
direction: "input"
|
||||
- name: "Early stopping arguments"
|
||||
arguments:
|
||||
- type: "boolean"
|
||||
name: "--early_stopping"
|
||||
description: "Whether to perform early stopping with respect to the validation\
|
||||
\ set."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--early_stopping_monitor"
|
||||
description: "Metric logged during validation set epoch."
|
||||
info: null
|
||||
default:
|
||||
- "elbo_validation"
|
||||
required: false
|
||||
choices:
|
||||
- "elbo_validation"
|
||||
- "reconstruction_loss_validation"
|
||||
- "kl_local_validation"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--early_stopping_patience"
|
||||
description: "Number of validation epochs with no improvement after which training\
|
||||
\ will be stopped."
|
||||
info: null
|
||||
default:
|
||||
- 45
|
||||
required: false
|
||||
min: 1
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--early_stopping_min_delta"
|
||||
description: "Minimum change in the monitored quantity to qualify as an improvement,\
|
||||
\ i.e. an absolute change of less than min_delta, will count as no improvement."
|
||||
info: null
|
||||
default:
|
||||
- 0.0
|
||||
required: false
|
||||
min: 0.0
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Learning parameters"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--max_epochs"
|
||||
description: "Number of passes through the dataset, defaults to (20000 / number\
|
||||
\ of cells) * 400 or 400; whichever is smallest."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean"
|
||||
name: "--reduce_lr_on_plateau"
|
||||
description: "Whether to monitor validation loss and reduce learning rate when\
|
||||
\ validation set `lr_scheduler_metric` plateaus."
|
||||
info: null
|
||||
default:
|
||||
- true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--lr_factor"
|
||||
description: "Factor to reduce learning rate."
|
||||
info: null
|
||||
default:
|
||||
- 0.6
|
||||
required: false
|
||||
min: 0.0
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--lr_patience"
|
||||
description: "Number of epochs with no improvement after which learning rate will\
|
||||
\ be reduced."
|
||||
info: null
|
||||
default:
|
||||
- 30.0
|
||||
required: false
|
||||
min: 0.0
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Data validition"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--n_obs_min_count"
|
||||
description: "Minimum number of cells threshold ensuring that every obs_batch\
|
||||
\ category has sufficient observations (cells) for model training."
|
||||
info: null
|
||||
default:
|
||||
- 0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--n_var_min_count"
|
||||
description: "Minimum number of genes threshold ensuring that every var_input\
|
||||
\ filter has sufficient observations (genes) for model training."
|
||||
info: null
|
||||
default:
|
||||
- 0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "subset_vars.py"
|
||||
- type: "file"
|
||||
path: "compress_h5mu.py"
|
||||
- type: "file"
|
||||
path: "set_var_index.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Performs scvi integration as done in the human lung cell atlas https://github.com/LungCellAtlas/HLCA"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "pbmc_1k_protein_v3_mms.h5mu"
|
||||
- type: "file"
|
||||
path: "TS_Blood_filtered.h5mu"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
label:
|
||||
- "midcpu"
|
||||
- "midmem"
|
||||
- "gpu"
|
||||
- "highdisk"
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "nvcr.io/nvidia/pytorch:25.05-py3"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.11.1"
|
||||
- "mudata~=0.3.1"
|
||||
- "scanpy~=1.10.4"
|
||||
script:
|
||||
- "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\
|
||||
nelse: exit(1)\")"
|
||||
upgrade: true
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "jax[cuda]"
|
||||
- "scvi-tools~=1.3.1"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/integrate/scvi/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integrate/scvi"
|
||||
executable: "target/executable/integrate/scvi/scvi"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
git_tag: "0.2.0-2055-g173327cc"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "main"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
\nIn terms of workflows, the following has been made available, but keep in mind\
|
||||
\ that\nindividual tools and functionality can be executed as standalone components\
|
||||
\ as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
|
||||
\ * Ingestion: Read mapping and generating a count matrix.\n * Single sample\
|
||||
\ processing: cell filtering and doublet detection.\n * Multisample processing:\
|
||||
\ Count transformation, normalization, QC metric calulations.\n * Integration:\
|
||||
\ Clustering, integration and batch correction using single and multimodal methods.\n\
|
||||
\ * Downstream analysis workflows\n"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
nextflow_labels_ci:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
|
||||
)'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
87
target/executable/integrate/scvi/compress_h5mu.py
Normal file
87
target/executable/integrate/scvi/compress_h5mu.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import shutil
|
||||
from anndata import AnnData
|
||||
from mudata import write_h5ad
|
||||
from h5py import File as H5File
|
||||
from h5py import Group, Dataset
|
||||
from pathlib import Path
|
||||
from typing import Union, Literal
|
||||
from functools import partial
|
||||
|
||||
|
||||
def compress_h5mu(
|
||||
input_path: Union[str, Path],
|
||||
output_path: Union[str, Path],
|
||||
compression: Union[Literal["gzip"], Literal["lzf"]],
|
||||
):
|
||||
input_path, output_path = str(input_path), str(output_path)
|
||||
|
||||
def copy_attributes(in_object, out_object):
|
||||
for key, value in in_object.attrs.items():
|
||||
out_object.attrs[key] = value
|
||||
|
||||
def visit_path(
|
||||
output_h5: H5File,
|
||||
compression: Union[Literal["gzip"], Literal["lzf"]],
|
||||
name: str,
|
||||
object: Union[Group, Dataset],
|
||||
):
|
||||
if isinstance(object, Group):
|
||||
new_group = output_h5.create_group(name)
|
||||
copy_attributes(object, new_group)
|
||||
elif isinstance(object, Dataset):
|
||||
# Compression only works for non-scalar Dataset objects
|
||||
# Scalar objects dont have a shape defined
|
||||
if not object.compression and object.shape not in [None, ()]:
|
||||
new_dataset = output_h5.create_dataset(
|
||||
name, data=object, compression=compression
|
||||
)
|
||||
copy_attributes(object, new_dataset)
|
||||
else:
|
||||
output_h5.copy(object, name)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"Could not copy element {name}, "
|
||||
f"type has not been implemented yet: {type(object)}"
|
||||
)
|
||||
|
||||
with (
|
||||
H5File(input_path, "r") as input_h5,
|
||||
H5File(output_path, "w", userblock_size=512) as output_h5,
|
||||
):
|
||||
copy_attributes(input_h5, output_h5)
|
||||
input_h5.visititems(partial(visit_path, output_h5, compression))
|
||||
|
||||
with open(input_path, "rb") as input_bytes:
|
||||
# Mudata puts metadata like this in the first 512 bytes:
|
||||
# MuData (format-version=0.1.0;creator=muon;creator-version=0.2.0)
|
||||
# See mudata/_core/io.py, read_h5mu() function
|
||||
starting_metadata = input_bytes.read(100)
|
||||
# The metadata is padded with extra null bytes up until 512 bytes
|
||||
truncate_location = starting_metadata.find(b"\x00")
|
||||
starting_metadata = starting_metadata[:truncate_location]
|
||||
with open(output_path, "br+") as f:
|
||||
nbytes = f.write(starting_metadata)
|
||||
f.write(b"\0" * (512 - nbytes))
|
||||
|
||||
|
||||
def write_h5ad_to_h5mu_with_compression(
|
||||
output_file: Union[str, Path],
|
||||
h5mu: Union[str, Path],
|
||||
modality_name: str,
|
||||
modality_data: AnnData,
|
||||
output_compression=None,
|
||||
):
|
||||
output_file = Path(output_file)
|
||||
h5mu = Path(h5mu)
|
||||
output_file_uncompressed = (
|
||||
output_file.with_name(output_file.stem + "_uncompressed.h5mu")
|
||||
if output_compression
|
||||
else output_file
|
||||
)
|
||||
shutil.copyfile(h5mu, output_file_uncompressed)
|
||||
write_h5ad(filename=output_file_uncompressed, mod=modality_name, data=modality_data)
|
||||
if output_compression:
|
||||
compress_h5mu(
|
||||
output_file_uncompressed, output_file, compression=output_compression
|
||||
)
|
||||
output_file_uncompressed.unlink()
|
||||
48
target/executable/integrate/scvi/nextflow_labels.config
Normal file
48
target/executable/integrate/scvi/nextflow_labels.config
Normal file
@@ -0,0 +1,48 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
|
||||
// The memory a task is assinged increases with each attempt
|
||||
// uncomment the line below and adjust the value to set a global upper limit on the memory.
|
||||
// resourceLimits = [ memory: 240.Gb ]
|
||||
|
||||
// CPU resources
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
// Memory resources
|
||||
withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
|
||||
withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
|
||||
withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
|
||||
withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
|
||||
|
||||
// Disk space
|
||||
withLabel: lowdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: middisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: highdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: veryhighdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
|
||||
// NOTE: The above labels intentionally do not have an effect by default.
|
||||
// The user should set the disk space requirements by adding the following
|
||||
// to the compute environment:
|
||||
//
|
||||
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
|
||||
// withLabel: middisk { disk = { 100.GB * task.attempt } }
|
||||
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
|
||||
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
|
||||
}
|
||||
2142
target/executable/integrate/scvi/scvi
Executable file
2142
target/executable/integrate/scvi/scvi
Executable file
File diff suppressed because it is too large
Load Diff
24
target/executable/integrate/scvi/set_var_index.py
Normal file
24
target/executable/integrate/scvi/set_var_index.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import anndata as ad
|
||||
import re
|
||||
|
||||
|
||||
def set_var_index(adata: ad.AnnData, var_name: str | None = None) -> ad.AnnData:
|
||||
"""Sanitize gene names and set the index of the .var DataFrame.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
adata : AnnData
|
||||
Annotated data object
|
||||
var_name : str | None
|
||||
Name of the column in `adata.var` that contains the gene names, if None, the existing index will be sanitized but not replaced.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AnnData
|
||||
Copy of `adata` with sanitized and replaced index
|
||||
"""
|
||||
if var_name:
|
||||
adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var[var_name]]
|
||||
else:
|
||||
adata.var.index = [re.sub("\\.[0-9]+$", "", s) for s in adata.var.index]
|
||||
return adata
|
||||
31
target/executable/integrate/scvi/subset_vars.py
Normal file
31
target/executable/integrate/scvi/subset_vars.py
Normal file
@@ -0,0 +1,31 @@
|
||||
def subset_vars(adata, subset_col):
|
||||
"""Subset AnnData object on highly variable genes
|
||||
|
||||
Parameters
|
||||
----------
|
||||
adata : AnnData
|
||||
Annotated data object
|
||||
subset_col : str
|
||||
Name of the boolean column in `adata.var` that contains the information if features should be used or not
|
||||
|
||||
Returns
|
||||
-------
|
||||
AnnData
|
||||
Copy of `adata` with subsetted features
|
||||
"""
|
||||
if subset_col not in adata.var.columns:
|
||||
raise ValueError(
|
||||
f"Requested to use .var column '{subset_col}' as a selection of genes, but the column is not available."
|
||||
)
|
||||
|
||||
if adata.var[subset_col].dtype == "boolean":
|
||||
assert adata.var[subset_col].isna().sum() == 0, (
|
||||
f"The .var column `{subset_col}` contains NaN values. Can not subset data."
|
||||
)
|
||||
adata.var[subset_col] = adata.var[subset_col].astype("bool")
|
||||
|
||||
assert adata.var[subset_col].dtype == "bool", (
|
||||
f"Expected dtype of .var column '{subset_col}' to be `bool`, but found {adata.var[subset_col].dtype}. Can not subset data."
|
||||
)
|
||||
|
||||
return adata[:, adata.var[subset_col]].copy()
|
||||
395
target/executable/integrate/totalvi/.config.vsh.yaml
Normal file
395
target/executable/integrate/totalvi/.config.vsh.yaml
Normal file
@@ -0,0 +1,395 @@
|
||||
name: "totalvi"
|
||||
namespace: "integrate"
|
||||
version: "main"
|
||||
authors:
|
||||
- name: "Vladimir Shitov"
|
||||
info:
|
||||
role: "Contributor"
|
||||
links:
|
||||
email: "vladimir.shitov@helmholtz-muenchen.de"
|
||||
github: "vladimirshitov"
|
||||
orcid: "0000-0002-1960-8812"
|
||||
linkedin: "vladimir-shitov-9a659513b"
|
||||
organizations:
|
||||
- name: "Helmholtz Munich"
|
||||
href: "https://www.helmholtz-munich.de"
|
||||
role: "PhD Candidate"
|
||||
argument_groups:
|
||||
- name: "Inputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--input"
|
||||
alternatives:
|
||||
- "-i"
|
||||
description: "Input h5mu file with query data to integrate with reference."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--reference"
|
||||
alternatives:
|
||||
- "-r"
|
||||
description: "Input h5mu file with reference data to train the TOTALVI model."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "boolean_true"
|
||||
name: "--force_retrain"
|
||||
alternatives:
|
||||
- "-f"
|
||||
description: "If true, retrain the model and save it to reference_model_path"
|
||||
info: null
|
||||
direction: "input"
|
||||
- type: "string"
|
||||
name: "--query_modality"
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--query_proteins_modality"
|
||||
description: "Name of the modality in the input (query) h5mu file containing protein\
|
||||
\ data"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_modality"
|
||||
info: null
|
||||
default:
|
||||
- "rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--reference_proteins_modality"
|
||||
description: "Name of the modality containing proteins in the reference"
|
||||
info: null
|
||||
default:
|
||||
- "prot"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--input_layer"
|
||||
description: "Input layer to use. If None, X is used"
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obs_batch"
|
||||
description: "Column name discriminating between your batches."
|
||||
info: null
|
||||
default:
|
||||
- "sample_id"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--var_input"
|
||||
description: ".var column containing highly variable genes. By default, do not\
|
||||
\ subset genes."
|
||||
info: null
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Outputs"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--output"
|
||||
alternatives:
|
||||
- "-o"
|
||||
description: "Output h5mu file."
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: true
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_output"
|
||||
description: "In which .obsm slot to store the resulting integrated embedding."
|
||||
info: null
|
||||
default:
|
||||
- "X_integrated_totalvi"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_normalized_rna_output"
|
||||
description: "In which .obsm slot to store the normalized RNA from TOTALVI."
|
||||
info: null
|
||||
default:
|
||||
- "X_totalvi_normalized_rna"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--obsm_normalized_protein_output"
|
||||
description: "In which .obsm slot to store the normalized protein data from TOTALVI."
|
||||
info: null
|
||||
default:
|
||||
- "X_totalvi_normalized_protein"
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--reference_model_path"
|
||||
description: "Directory with the reference model. If not exists, trained model\
|
||||
\ will be saved there"
|
||||
info: null
|
||||
default:
|
||||
- "totalvi_model_reference"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--query_model_path"
|
||||
description: "Directory, where the query model will be saved"
|
||||
info: null
|
||||
default:
|
||||
- "totalvi_model_query"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Learning parameters"
|
||||
arguments:
|
||||
- type: "integer"
|
||||
name: "--max_epochs"
|
||||
description: "Number of passes through the dataset"
|
||||
info: null
|
||||
default:
|
||||
- 400
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "integer"
|
||||
name: "--max_query_epochs"
|
||||
description: "Number of passes through the dataset, when fine-tuning model for\
|
||||
\ query"
|
||||
info: null
|
||||
default:
|
||||
- 200
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "double"
|
||||
name: "--weight_decay"
|
||||
description: "Weight decay, when fine-tuning model for query"
|
||||
info: null
|
||||
default:
|
||||
- 0.0
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "python_script"
|
||||
path: "script.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "setup_logger.py"
|
||||
- type: "file"
|
||||
path: "nextflow_labels.config"
|
||||
dest: "nextflow_labels.config"
|
||||
description: "Performs mapping to the reference by totalvi model: https://docs.scvi-tools.org/en/stable/tutorials/notebooks/scarches_scvi_tools.html#Reference-mapping-with-TOTALVI"
|
||||
test_resources:
|
||||
- type: "python_script"
|
||||
path: "test.py"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "pbmc_1k_protein_v3_mms.h5mu"
|
||||
info: null
|
||||
status: "enabled"
|
||||
scope:
|
||||
image: "public"
|
||||
target: "public"
|
||||
license: "MIT"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
label:
|
||||
- "highmem"
|
||||
- "highcpu"
|
||||
- "highdisk"
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
script:
|
||||
- "includeConfig(\"nextflow_labels.config\")"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "nvcr.io/nvidia/pytorch:25.05-py3"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "anndata~=0.11.1"
|
||||
- "mudata~=0.3.1"
|
||||
- "jax[cuda]"
|
||||
- "scvi-tools~=1.3.1"
|
||||
script:
|
||||
- "exec(\"try:\\n import awkward\\nexcept ModuleNotFoundError:\\n exit(0)\\\
|
||||
nelse: exit(1)\")"
|
||||
upgrade: true
|
||||
test_setup:
|
||||
- type: "python"
|
||||
user: false
|
||||
packages:
|
||||
- "viashpy==0.8.0"
|
||||
upgrade: true
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/integrate/totalvi/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/integrate/totalvi"
|
||||
executable: "target/executable/integrate/totalvi/totalvi"
|
||||
viash_version: "0.9.4"
|
||||
git_commit: "173327cc5670aa8bd5cf473827de80b602c90092"
|
||||
git_remote: "https://github.com/openpipelines-bio/openpipeline"
|
||||
git_tag: "0.2.0-2055-g173327cc"
|
||||
package_config:
|
||||
name: "openpipeline"
|
||||
version: "main"
|
||||
summary: "Best-practice workflows for single-cell multi-omics analyses.\n"
|
||||
description: "OpenPipelines are extensible single cell analysis pipelines for reproducible\
|
||||
\ and large-scale single cell processing using [Viash](https://viash.io) and [Nextflow](https://www.nextflow.io/).\n\
|
||||
\nIn terms of workflows, the following has been made available, but keep in mind\
|
||||
\ that\nindividual tools and functionality can be executed as standalone components\
|
||||
\ as well.\n\n * Demultiplexing: conversion of raw sequencing data to FASTQ objects.\n\
|
||||
\ * Ingestion: Read mapping and generating a count matrix.\n * Single sample\
|
||||
\ processing: cell filtering and doublet detection.\n * Multisample processing:\
|
||||
\ Count transformation, normalization, QC metric calulations.\n * Integration:\
|
||||
\ Clustering, integration and batch correction using single and multimodal methods.\n\
|
||||
\ * Downstream analysis workflows\n"
|
||||
info:
|
||||
test_resources:
|
||||
- type: "s3"
|
||||
path: "s3://openpipelines-data"
|
||||
dest: "resources_test"
|
||||
nextflow_labels_ci:
|
||||
- path: "src/workflows/utils/labels_ci.config"
|
||||
description: "Adds the correct memory and CPU labels when running on the Viash\
|
||||
\ Hub CI."
|
||||
viash_version: "0.9.4"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}\n\
|
||||
.runners[.type == 'nextflow'].config.script := 'includeConfig(\"nextflow_labels.config\"\
|
||||
)'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
keywords:
|
||||
- "single-cell"
|
||||
- "multimodal"
|
||||
license: "MIT"
|
||||
organization: "vsh"
|
||||
links:
|
||||
repository: "https://github.com/openpipelines-bio/openpipeline"
|
||||
docker_registry: "ghcr.io"
|
||||
homepage: "https://openpipelines.bio"
|
||||
documentation: "https://openpipelines.bio/fundamentals"
|
||||
issue_tracker: "https://github.com/openpipelines-bio/openpipeline/issues"
|
||||
48
target/executable/integrate/totalvi/nextflow_labels.config
Normal file
48
target/executable/integrate/totalvi/nextflow_labels.config
Normal file
@@ -0,0 +1,48 @@
|
||||
process {
|
||||
// Default resources for components that hardly do any processing
|
||||
memory = { 2.GB * task.attempt }
|
||||
cpus = 1
|
||||
|
||||
// Retry for exit codes that have something to do with memory issues
|
||||
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
|
||||
maxRetries = 3
|
||||
|
||||
// The memory a task is assinged increases with each attempt
|
||||
// uncomment the line below and adjust the value to set a global upper limit on the memory.
|
||||
// resourceLimits = [ memory: 240.Gb ]
|
||||
|
||||
// CPU resources
|
||||
withLabel: singlecpu { cpus = 1 }
|
||||
withLabel: lowcpu { cpus = 4 }
|
||||
withLabel: midcpu { cpus = 10 }
|
||||
withLabel: highcpu { cpus = 20 }
|
||||
|
||||
// Memory resources
|
||||
withLabel: lowmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 4.GB * task.attempt } }
|
||||
withLabel: midmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 25.GB * task.attempt } }
|
||||
withLabel: highmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 50.GB * task.attempt } }
|
||||
withLabel: veryhighmem { memory = { task?.resourceLimits?.memory && task?.maxRetries && task.attempt >= task.maxRetries ? task.resourceLimits.memory : 75.GB * task.attempt } }
|
||||
|
||||
// Disk space
|
||||
withLabel: lowdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: middisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: highdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
withLabel: veryhighdisk {
|
||||
disk = {process.disk ? process.disk : null}
|
||||
}
|
||||
|
||||
// NOTE: The above labels intentionally do not have an effect by default.
|
||||
// The user should set the disk space requirements by adding the following
|
||||
// to the compute environment:
|
||||
//
|
||||
// withLabel: lowdisk { disk = { 20.GB * task.attempt } }
|
||||
// withLabel: middisk { disk = { 100.GB * task.attempt } }
|
||||
// withLabel: highdisk { disk = { 200.GB * task.attempt } }
|
||||
// withLabel: veryhighdisk { disk = { 500.GB * task.attempt } }
|
||||
}
|
||||
12
target/executable/integrate/totalvi/setup_logger.py
Normal file
12
target/executable/integrate/totalvi/setup_logger.py
Normal file
@@ -0,0 +1,12 @@
|
||||
def setup_logger():
|
||||
import logging
|
||||
from sys import stdout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
console_handler = logging.StreamHandler(stdout)
|
||||
logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
|
||||
console_handler.setFormatter(logFormatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
1712
target/executable/integrate/totalvi/totalvi
Executable file
1712
target/executable/integrate/totalvi/totalvi
Executable file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user