Build branch openpipeline_spatial/v0.3 with version v0.3.0 to openpipeline_spatial on branch v0.3 (84e2b7c)

Build pipeline: openpipelines-bio.openpipeline-spatial.v0.3.0-t5q65 Source commit: 84e2b7cc63 Source message: update version
2026-02-18 11:25:05 +00:00
commit fd31f6f50d
505 changed files with 265583 additions and 0 deletions
--- a/resources_test_scripts/aviti_teton_tiny.sh
+++ b/resources_test_scripts/aviti_teton_tiny.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+set -eo pipefail
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+ID=aviti
+DIR=resources_test/$ID/
+OUT=$DIR/teton_cells2stats_tiny/
+
+# Create directories
+[ -d "$DIR" ] || mkdir -p "$DIR"
+[ -d "$OUT" ] || mkdir -p "$OUT"
+
+echo "> Downloading Aviti Teton data"
+wget "https://go.elementbiosciences.com/l/938263/28kddnj7/d59cp" -O "${DIR}/PLUT-0105.tar.gz"
+tar -xzf "${DIR}/PLUT-0105.tar.gz" -C "$DIR"
+rm "${DIR}/PLUT-0105.tar.gz"
+
+echo "> Processing and subsetting Aviti Teton data"
+python <<HEREDOC
+import os
+import shutil
+import pandas as pd
+import glob
+import json
+
+src_dir = "${DIR}/PLUT-0105"
+dest_dir = "${OUT}"
+subset_image_dirs = False
+wells_to_keep = ["A1"]
+max_cells_per_well = 1000
+
+os.makedirs(dest_dir, exist_ok=True)
+
+print(f"Processing data from {src_dir} to {dest_dir}")
+
+# Copy images
+if subset_image_dirs:
+    image_dirs = ["CellSegmentation", "Projection"]
+    for image_dir in image_dirs:
+        image_dir_path = os.path.join(src_dir, image_dir)
+        if not os.path.exists(image_dir_path):
+            print(f"Warning: Image directory not found: {image_dir_path}")
+            continue
+        if not os.path.isdir(image_dir_path):
+            print(f"Warning: Path exists but is not a directory: {image_dir_path}")
+            continue
+        print(f"Processing image directory: {image_dir}")
+        
+        for well in wells_to_keep:
+            dest_path = f"{dest_dir}/{image_dir}/Well{well}"
+            os.makedirs(dest_path, exist_ok=True)
+            src_path = glob.glob(os.path.join(src_dir, image_dir, f"Well{well}"))
+            if len(src_path) != 1:
+                print(f"Warning: Expected 1 path for Well{well}, found {len(src_path)}")
+                continue
+            shutil.copytree(src_path[0], os.path.join(dest_path), dirs_exist_ok=True)
+
+# Copy count matrix
+src_path = os.path.join(src_dir, "Cytoprofiling", "Instrument", "RawCellStats.parquet")
+if os.path.exists(src_path):
+    print(f"Processing count matrix: {src_path}")
+    df = pd.read_parquet(src_path)
+    print(f"Original data: {len(df)} rows")
+    
+    # Filter by wells
+    df = df[df["Well"].isin(wells_to_keep)]
+    print(f"After well filtering: {len(df)} rows")
+    
+    if max_cells_per_well:
+        # Limit the number of cells per well
+        df = df.head(max_cells_per_well)
+        print(f"After cell limit: {len(df)} rows")
+
+    dest_path = os.path.join(dest_dir, "Cytoprofiling", "Instrument")
+    os.makedirs(dest_path, exist_ok=True)
+    dest_file = os.path.join(dest_path, "RawCellStats.parquet")
+    df.to_parquet(dest_file, engine="pyarrow")
+    print(f"Saved processed count matrix to {dest_file}")
+else:
+    print(f"Warning: Count matrix not found at {src_path}")
+
+# Copy Panel Metadata
+panel_src_path = os.path.join(src_dir, "Panel.json")
+if os.path.exists(panel_src_path):
+    panel_dest_path = os.path.join(dest_dir, "Panel.json")
+    shutil.copy2(panel_src_path, panel_dest_path)
+    print(f"Copied Panel.json")
+else:
+    print(f"Warning: Panel.json not found at {panel_src_path}")
+print("Processing complete!")
+HEREDOC
+
+echo "> Removing original aviti_teton folder"
+rm -rf "$DIR/PLUT-0105"
+
+echo "> Aviti Teton tiny dataset created successfully at $OUT"
+
+viash run src/convert/from_cells2stats_to_h5mu/config.vsh.yaml -- \
+    --input "${OUT}" \
+    --output "$DIR/aviti_teton_tiny.h5mu" \
+    --output_compression "gzip"
+
+echo "> Conversion to H5MU complete"
+
+aws s3 sync \
+    --profile di \
+    "$DIR" \
+    s3://openpipelines-bio/openpipeline_spatial/resources_test/aviti \
+    --delete \
+    --dryrun
--- a/resources_test_scripts/cosmx_tiny.sh
+++ b/resources_test_scripts/cosmx_tiny.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+set -eo pipefail
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+
+DIR="resources_test/cosmx"
+ID="Lung5_Rep2"
+OUT="$DIR/$ID/"
+
+# create tempdir
+MY_TEMP="${VIASH_TEMP:-/tmp}"
+TMPDIR=$(mktemp -d "$MY_TEMP/$ID-XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+if [ ! -d "$OUT" ]; then
+    flat_dataset="https://nanostring-public-share.s3.us-west-2.amazonaws.com/SMI-Compressed/Lung5_Rep2/Lung5_Rep2+SMI+Flat+data.tar.gz"
+    wget  "$flat_dataset" -O "$TMPDIR/Lung5_Rep2.tar.gz"
+    mkdir -p "$TMPDIR/Lung5_Rep2"
+    tar -xzf "$TMPDIR/Lung5_Rep2.tar.gz" -C "$TMPDIR/Lung5_Rep2"
+    mkdir -p "$OUT"
+    mv "$TMPDIR/Lung5_Rep2/Lung5_Rep2/Lung5_Rep2-Flat_files_and_images/"* "$OUT/"
+fi
+
+viash run src/filter/subset_cosmx/config.vsh.yaml -- \
+    --input "$OUT" \
+    --num_fovs 3 \
+    --subset_transcripts_file True \
+    --subset_polygons_file False \
+    --output "${DIR}/${ID}_tiny"
+
+viash run src/convert/from_cosmx_to_h5mu/config.vsh.yaml -- \
+    --input ${DIR}/${ID}_tiny \
+    --output "$DIR/${ID}_tiny.h5mu" \
+    --output_compression "gzip"
+
+rm -rf "$OUT"
+
+# Sync to S3
+aws s3 sync \
+    --profile di \
+    "$DIR" \
+    s3://openpipelines-bio/openpipeline_spatial/resources_test/cosmx \
+    --delete \
+    --dryrun
--- a/resources_test_scripts/reference_tiny.sh
+++ b/resources_test_scripts/reference_tiny.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+set -eo pipefail
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# ensure that the command below is run from the root of the repository
+cd "$REPO_ROOT"
+DIR="resources_test/GRCh38"
+
+mkdir -p $DIR
+
+aws s3 sync \
+    --profile di \
+    s3://openpipelines-bio/openpipeline_spatial/resources_test/GRCh38 \
+    "$DIR" \
+    --delete \
+    --dryrun
--- a/resources_test_scripts/visium_tiny.sh
+++ b/resources_test_scripts/visium_tiny.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+set -eo pipefail
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# Define absolute directory path
+DIR="$REPO_ROOT/resources_test/visium"
+ID="Visium_FFPE_Human_Ovarian_Cancer_tiny"
+
+# from https://www.10xgenomics.com/resources/datasets/human-ovarian-cancer-1-standard
+mkdir -p "$DIR"
+
+# Input Files - download to the specific directory
+curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar
+curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_image.jpg
+curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv
+
+# Extract in the specific directory
+tar xvf "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar" -C "$DIR"
+
+# Create subsampled dataset with ImageMagick
+# https://imagemagick.org/index.php
+mkdir -p "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny"
+convert "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" -resize 2000x2000 "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image_tiny.jpg"
+for f in "$DIR"/Visium_FFPE_Human_Ovarian_Cancer_fastqs/*L001*R*; do 
+  gzip -cdf "$f" | head -n 40000 | gzip -c > "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny/$(basename "$f")"; 
+done
+
+echo "> Downloading and subsampling of datasets complete"
+
+# Run spaceranger
+viash run src/mapping/spaceranger_count/config.vsh.yaml -- \
+  --input "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny" \
+  --gex_reference "$REPO_ROOT/resources_test/GRCh38/" \
+  --probe_set "$DIR/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv" \
+  --image "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image_tiny.jpg" \
+  --slide "V10L13-020" \
+  --area "D1" \
+  --create_bam "false" \
+  --output "Visium_FFPE_Human_Ovarian_Cancer_tiny_spaceranger"
+
+mv 
+echo "> Running spaceranger complete"
+
+rm -rf "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs"
+rm -f "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg"
+
+viash run "$REPO_ROOT/src/convert/from_spaceranger_to_h5mu/config.vsh.yaml" -- \
+    --input "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny_spaceranger" \
+    --output "$DIR/$ID.h5mu"
+
+aws s3 sync \
+    --profile di \
+    --exclude "*.yaml" \
+    "$DIR" \
+    s3://openpipelines-bio/openpipeline_spatial/resources_test/visium \
+    --delete \
+    --dryrun
--- a/resources_test_scripts/xenium_tiny.sh
+++ b/resources_test_scripts/xenium_tiny.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+set -eo pipefail
+
+# get the root of the directory
+REPO_ROOT=$(git rev-parse --show-toplevel)
+
+# Define absolute directory paths
+DIR="$REPO_ROOT/resources_test/xenium"
+ID="xenium_tiny"
+OUT="$DIR/$ID"
+
+# create tempdir
+MY_TEMP="${VIASH_TEMP:-/tmp}"
+TMPDIR=$(mktemp -d "$MY_TEMP/$ID-XXXXXX")
+function clean_up {
+  [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
+}
+trap clean_up EXIT
+
+if [ ! -d "$OUT" ]; then
+    tiny_dataset="https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip"
+    wget "$tiny_dataset" -O "$TMPDIR/xenium_tiny.zip"
+
+    unzip -q "$TMPDIR/xenium_tiny.zip" -d "$TMPDIR/xenium_tiny"
+    mkdir -p "$OUT"
+    mv "$TMPDIR/xenium_tiny/Xenium_Prime_Mouse_Ileum_tiny_outs/"* "$OUT/"
+fi
+
+viash run "$REPO_ROOT/src/convert/from_xenium_to_spatialdata/config.vsh.yaml" -- \
+    --input "$OUT" \
+    --output "$DIR/$ID.zarr"
+
+viash run "$REPO_ROOT/src/convert/from_spatialdata_to_h5mu/config.vsh.yaml" -- \
+    --input "$DIR/$ID.zarr" \
+    --output "$DIR/$ID.h5mu"
+
+# Sync to S3
+aws s3 sync \
+    --profile di \
+    "$DIR" \
+    s3://openpipelines-bio/openpipeline_spatial/resources_test/xenium \
+    --delete \
+    --dryrun