Build branch openpipeline_spatial/v0.3 with version v0.3.0 to openpipeline_spatial on branch v0.3 (84e2b7c)

Build pipeline: openpipelines-bio.openpipeline-spatial.v0.3.0-t5q65

Source commit: 84e2b7cc63

Source message: update version
This commit is contained in:
CI
2026-02-18 11:25:05 +00:00
commit fd31f6f50d
505 changed files with 265583 additions and 0 deletions

View File

@@ -0,0 +1,116 @@
#!/bin/bash
set -eo pipefail
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"
ID=aviti
DIR=resources_test/$ID/
OUT=$DIR/teton_cells2stats_tiny/
# Create directories
[ -d "$DIR" ] || mkdir -p "$DIR"
[ -d "$OUT" ] || mkdir -p "$OUT"
echo "> Downloading Aviti Teton data"
wget "https://go.elementbiosciences.com/l/938263/28kddnj7/d59cp" -O "${DIR}/PLUT-0105.tar.gz"
tar -xzf "${DIR}/PLUT-0105.tar.gz" -C "$DIR"
rm "${DIR}/PLUT-0105.tar.gz"
echo "> Processing and subsetting Aviti Teton data"
python <<HEREDOC
import os
import shutil
import pandas as pd
import glob
import json
src_dir = "${DIR}/PLUT-0105"
dest_dir = "${OUT}"
subset_image_dirs = False
wells_to_keep = ["A1"]
max_cells_per_well = 1000
os.makedirs(dest_dir, exist_ok=True)
print(f"Processing data from {src_dir} to {dest_dir}")
# Copy images
if subset_image_dirs:
image_dirs = ["CellSegmentation", "Projection"]
for image_dir in image_dirs:
image_dir_path = os.path.join(src_dir, image_dir)
if not os.path.exists(image_dir_path):
print(f"Warning: Image directory not found: {image_dir_path}")
continue
if not os.path.isdir(image_dir_path):
print(f"Warning: Path exists but is not a directory: {image_dir_path}")
continue
print(f"Processing image directory: {image_dir}")
for well in wells_to_keep:
dest_path = f"{dest_dir}/{image_dir}/Well{well}"
os.makedirs(dest_path, exist_ok=True)
src_path = glob.glob(os.path.join(src_dir, image_dir, f"Well{well}"))
if len(src_path) != 1:
print(f"Warning: Expected 1 path for Well{well}, found {len(src_path)}")
continue
shutil.copytree(src_path[0], os.path.join(dest_path), dirs_exist_ok=True)
# Copy count matrix
src_path = os.path.join(src_dir, "Cytoprofiling", "Instrument", "RawCellStats.parquet")
if os.path.exists(src_path):
print(f"Processing count matrix: {src_path}")
df = pd.read_parquet(src_path)
print(f"Original data: {len(df)} rows")
# Filter by wells
df = df[df["Well"].isin(wells_to_keep)]
print(f"After well filtering: {len(df)} rows")
if max_cells_per_well:
# Limit the number of cells per well
df = df.head(max_cells_per_well)
print(f"After cell limit: {len(df)} rows")
dest_path = os.path.join(dest_dir, "Cytoprofiling", "Instrument")
os.makedirs(dest_path, exist_ok=True)
dest_file = os.path.join(dest_path, "RawCellStats.parquet")
df.to_parquet(dest_file, engine="pyarrow")
print(f"Saved processed count matrix to {dest_file}")
else:
print(f"Warning: Count matrix not found at {src_path}")
# Copy Panel Metadata
panel_src_path = os.path.join(src_dir, "Panel.json")
if os.path.exists(panel_src_path):
panel_dest_path = os.path.join(dest_dir, "Panel.json")
shutil.copy2(panel_src_path, panel_dest_path)
print(f"Copied Panel.json")
else:
print(f"Warning: Panel.json not found at {panel_src_path}")
print("Processing complete!")
HEREDOC
echo "> Removing original aviti_teton folder"
rm -rf "$DIR/PLUT-0105"
echo "> Aviti Teton tiny dataset created successfully at $OUT"
viash run src/convert/from_cells2stats_to_h5mu/config.vsh.yaml -- \
--input "${OUT}" \
--output "$DIR/aviti_teton_tiny.h5mu" \
--output_compression "gzip"
echo "> Conversion to H5MU complete"
aws s3 sync \
--profile di \
"$DIR" \
s3://openpipelines-bio/openpipeline_spatial/resources_test/aviti \
--delete \
--dryrun

View File

@@ -0,0 +1,52 @@
#!/bin/bash
set -eo pipefail
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"
DIR="resources_test/cosmx"
ID="Lung5_Rep2"
OUT="$DIR/$ID/"
# create tempdir
MY_TEMP="${VIASH_TEMP:-/tmp}"
TMPDIR=$(mktemp -d "$MY_TEMP/$ID-XXXXXX")
function clean_up {
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
}
trap clean_up EXIT
if [ ! -d "$OUT" ]; then
flat_dataset="https://nanostring-public-share.s3.us-west-2.amazonaws.com/SMI-Compressed/Lung5_Rep2/Lung5_Rep2+SMI+Flat+data.tar.gz"
wget "$flat_dataset" -O "$TMPDIR/Lung5_Rep2.tar.gz"
mkdir -p "$TMPDIR/Lung5_Rep2"
tar -xzf "$TMPDIR/Lung5_Rep2.tar.gz" -C "$TMPDIR/Lung5_Rep2"
mkdir -p "$OUT"
mv "$TMPDIR/Lung5_Rep2/Lung5_Rep2/Lung5_Rep2-Flat_files_and_images/"* "$OUT/"
fi
viash run src/filter/subset_cosmx/config.vsh.yaml -- \
--input "$OUT" \
--num_fovs 3 \
--subset_transcripts_file True \
--subset_polygons_file False \
--output "${DIR}/${ID}_tiny"
viash run src/convert/from_cosmx_to_h5mu/config.vsh.yaml -- \
--input ${DIR}/${ID}_tiny \
--output "$DIR/${ID}_tiny.h5mu" \
--output_compression "gzip"
rm -rf "$OUT"
# Sync to S3
aws s3 sync \
--profile di \
"$DIR" \
s3://openpipelines-bio/openpipeline_spatial/resources_test/cosmx \
--delete \
--dryrun

View File

@@ -0,0 +1,19 @@
#!/bin/bash
set -eo pipefail
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# ensure that the command below is run from the root of the repository
cd "$REPO_ROOT"
DIR="resources_test/GRCh38"
mkdir -p $DIR
aws s3 sync \
--profile di \
s3://openpipelines-bio/openpipeline_spatial/resources_test/GRCh38 \
"$DIR" \
--delete \
--dryrun

View File

@@ -0,0 +1,60 @@
#!/bin/bash
set -eo pipefail
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# Define absolute directory path
DIR="$REPO_ROOT/resources_test/visium"
ID="Visium_FFPE_Human_Ovarian_Cancer_tiny"
# from https://www.10xgenomics.com/resources/datasets/human-ovarian-cancer-1-standard
mkdir -p "$DIR"
# Input Files - download to the specific directory
curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar
curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_image.jpg
curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv
# Extract in the specific directory
tar xvf "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar" -C "$DIR"
# Create subsampled dataset with ImageMagick
# https://imagemagick.org/index.php
mkdir -p "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny"
convert "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" -resize 2000x2000 "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image_tiny.jpg"
for f in "$DIR"/Visium_FFPE_Human_Ovarian_Cancer_fastqs/*L001*R*; do
gzip -cdf "$f" | head -n 40000 | gzip -c > "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny/$(basename "$f")";
done
echo "> Downloading and subsampling of datasets complete"
# Run spaceranger
viash run src/mapping/spaceranger_count/config.vsh.yaml -- \
--input "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny" \
--gex_reference "$REPO_ROOT/resources_test/GRCh38/" \
--probe_set "$DIR/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv" \
--image "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image_tiny.jpg" \
--slide "V10L13-020" \
--area "D1" \
--create_bam "false" \
--output "Visium_FFPE_Human_Ovarian_Cancer_tiny_spaceranger"
mv
echo "> Running spaceranger complete"
rm -rf "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs"
rm -f "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg"
viash run "$REPO_ROOT/src/convert/from_spaceranger_to_h5mu/config.vsh.yaml" -- \
--input "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny_spaceranger" \
--output "$DIR/$ID.h5mu"
aws s3 sync \
--profile di \
--exclude "*.yaml" \
"$DIR" \
s3://openpipelines-bio/openpipeline_spatial/resources_test/visium \
--delete \
--dryrun

View File

@@ -0,0 +1,44 @@
#!/bin/bash
set -eo pipefail
# get the root of the directory
REPO_ROOT=$(git rev-parse --show-toplevel)
# Define absolute directory paths
DIR="$REPO_ROOT/resources_test/xenium"
ID="xenium_tiny"
OUT="$DIR/$ID"
# create tempdir
MY_TEMP="${VIASH_TEMP:-/tmp}"
TMPDIR=$(mktemp -d "$MY_TEMP/$ID-XXXXXX")
function clean_up {
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
}
trap clean_up EXIT
if [ ! -d "$OUT" ]; then
tiny_dataset="https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip"
wget "$tiny_dataset" -O "$TMPDIR/xenium_tiny.zip"
unzip -q "$TMPDIR/xenium_tiny.zip" -d "$TMPDIR/xenium_tiny"
mkdir -p "$OUT"
mv "$TMPDIR/xenium_tiny/Xenium_Prime_Mouse_Ileum_tiny_outs/"* "$OUT/"
fi
viash run "$REPO_ROOT/src/convert/from_xenium_to_spatialdata/config.vsh.yaml" -- \
--input "$OUT" \
--output "$DIR/$ID.zarr"
viash run "$REPO_ROOT/src/convert/from_spatialdata_to_h5mu/config.vsh.yaml" -- \
--input "$DIR/$ID.zarr" \
--output "$DIR/$ID.h5mu"
# Sync to S3
aws s3 sync \
--profile di \
"$DIR" \
s3://openpipelines-bio/openpipeline_spatial/resources_test/xenium \
--delete \
--dryrun