Build branch openpipeline_spatial/v0.3 with version v0.3.0 to openpipeline_spatial on branch v0.3 (84e2b7c)
Build pipeline: openpipelines-bio.openpipeline-spatial.v0.3.0-t5q65
Source commit: 84e2b7cc63
Source message: update version
This commit is contained in:
116
resources_test_scripts/aviti_teton_tiny.sh
Normal file
116
resources_test_scripts/aviti_teton_tiny.sh
Normal file
@@ -0,0 +1,116 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
ID=aviti
|
||||
DIR=resources_test/$ID/
|
||||
OUT=$DIR/teton_cells2stats_tiny/
|
||||
|
||||
# Create directories
|
||||
[ -d "$DIR" ] || mkdir -p "$DIR"
|
||||
[ -d "$OUT" ] || mkdir -p "$OUT"
|
||||
|
||||
echo "> Downloading Aviti Teton data"
|
||||
wget "https://go.elementbiosciences.com/l/938263/28kddnj7/d59cp" -O "${DIR}/PLUT-0105.tar.gz"
|
||||
tar -xzf "${DIR}/PLUT-0105.tar.gz" -C "$DIR"
|
||||
rm "${DIR}/PLUT-0105.tar.gz"
|
||||
|
||||
echo "> Processing and subsetting Aviti Teton data"
|
||||
python <<HEREDOC
|
||||
import os
|
||||
import shutil
|
||||
import pandas as pd
|
||||
import glob
|
||||
import json
|
||||
|
||||
src_dir = "${DIR}/PLUT-0105"
|
||||
dest_dir = "${OUT}"
|
||||
subset_image_dirs = False
|
||||
wells_to_keep = ["A1"]
|
||||
max_cells_per_well = 1000
|
||||
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
|
||||
print(f"Processing data from {src_dir} to {dest_dir}")
|
||||
|
||||
# Copy images
|
||||
if subset_image_dirs:
|
||||
image_dirs = ["CellSegmentation", "Projection"]
|
||||
for image_dir in image_dirs:
|
||||
image_dir_path = os.path.join(src_dir, image_dir)
|
||||
if not os.path.exists(image_dir_path):
|
||||
print(f"Warning: Image directory not found: {image_dir_path}")
|
||||
continue
|
||||
if not os.path.isdir(image_dir_path):
|
||||
print(f"Warning: Path exists but is not a directory: {image_dir_path}")
|
||||
continue
|
||||
print(f"Processing image directory: {image_dir}")
|
||||
|
||||
for well in wells_to_keep:
|
||||
dest_path = f"{dest_dir}/{image_dir}/Well{well}"
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
src_path = glob.glob(os.path.join(src_dir, image_dir, f"Well{well}"))
|
||||
if len(src_path) != 1:
|
||||
print(f"Warning: Expected 1 path for Well{well}, found {len(src_path)}")
|
||||
continue
|
||||
shutil.copytree(src_path[0], os.path.join(dest_path), dirs_exist_ok=True)
|
||||
|
||||
# Copy count matrix
|
||||
src_path = os.path.join(src_dir, "Cytoprofiling", "Instrument", "RawCellStats.parquet")
|
||||
if os.path.exists(src_path):
|
||||
print(f"Processing count matrix: {src_path}")
|
||||
df = pd.read_parquet(src_path)
|
||||
print(f"Original data: {len(df)} rows")
|
||||
|
||||
# Filter by wells
|
||||
df = df[df["Well"].isin(wells_to_keep)]
|
||||
print(f"After well filtering: {len(df)} rows")
|
||||
|
||||
if max_cells_per_well:
|
||||
# Limit the number of cells per well
|
||||
df = df.head(max_cells_per_well)
|
||||
print(f"After cell limit: {len(df)} rows")
|
||||
|
||||
dest_path = os.path.join(dest_dir, "Cytoprofiling", "Instrument")
|
||||
os.makedirs(dest_path, exist_ok=True)
|
||||
dest_file = os.path.join(dest_path, "RawCellStats.parquet")
|
||||
df.to_parquet(dest_file, engine="pyarrow")
|
||||
print(f"Saved processed count matrix to {dest_file}")
|
||||
else:
|
||||
print(f"Warning: Count matrix not found at {src_path}")
|
||||
|
||||
# Copy Panel Metadata
|
||||
panel_src_path = os.path.join(src_dir, "Panel.json")
|
||||
if os.path.exists(panel_src_path):
|
||||
panel_dest_path = os.path.join(dest_dir, "Panel.json")
|
||||
shutil.copy2(panel_src_path, panel_dest_path)
|
||||
print(f"Copied Panel.json")
|
||||
else:
|
||||
print(f"Warning: Panel.json not found at {panel_src_path}")
|
||||
print("Processing complete!")
|
||||
HEREDOC
|
||||
|
||||
echo "> Removing original aviti_teton folder"
|
||||
rm -rf "$DIR/PLUT-0105"
|
||||
|
||||
echo "> Aviti Teton tiny dataset created successfully at $OUT"
|
||||
|
||||
viash run src/convert/from_cells2stats_to_h5mu/config.vsh.yaml -- \
|
||||
--input "${OUT}" \
|
||||
--output "$DIR/aviti_teton_tiny.h5mu" \
|
||||
--output_compression "gzip"
|
||||
|
||||
echo "> Conversion to H5MU complete"
|
||||
|
||||
aws s3 sync \
|
||||
--profile di \
|
||||
"$DIR" \
|
||||
s3://openpipelines-bio/openpipeline_spatial/resources_test/aviti \
|
||||
--delete \
|
||||
--dryrun
|
||||
52
resources_test_scripts/cosmx_tiny.sh
Executable file
52
resources_test_scripts/cosmx_tiny.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
|
||||
DIR="resources_test/cosmx"
|
||||
ID="Lung5_Rep2"
|
||||
OUT="$DIR/$ID/"
|
||||
|
||||
# create tempdir
|
||||
MY_TEMP="${VIASH_TEMP:-/tmp}"
|
||||
TMPDIR=$(mktemp -d "$MY_TEMP/$ID-XXXXXX")
|
||||
function clean_up {
|
||||
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
|
||||
}
|
||||
trap clean_up EXIT
|
||||
|
||||
if [ ! -d "$OUT" ]; then
|
||||
flat_dataset="https://nanostring-public-share.s3.us-west-2.amazonaws.com/SMI-Compressed/Lung5_Rep2/Lung5_Rep2+SMI+Flat+data.tar.gz"
|
||||
wget "$flat_dataset" -O "$TMPDIR/Lung5_Rep2.tar.gz"
|
||||
mkdir -p "$TMPDIR/Lung5_Rep2"
|
||||
tar -xzf "$TMPDIR/Lung5_Rep2.tar.gz" -C "$TMPDIR/Lung5_Rep2"
|
||||
mkdir -p "$OUT"
|
||||
mv "$TMPDIR/Lung5_Rep2/Lung5_Rep2/Lung5_Rep2-Flat_files_and_images/"* "$OUT/"
|
||||
fi
|
||||
|
||||
viash run src/filter/subset_cosmx/config.vsh.yaml -- \
|
||||
--input "$OUT" \
|
||||
--num_fovs 3 \
|
||||
--subset_transcripts_file True \
|
||||
--subset_polygons_file False \
|
||||
--output "${DIR}/${ID}_tiny"
|
||||
|
||||
viash run src/convert/from_cosmx_to_h5mu/config.vsh.yaml -- \
|
||||
--input ${DIR}/${ID}_tiny \
|
||||
--output "$DIR/${ID}_tiny.h5mu" \
|
||||
--output_compression "gzip"
|
||||
|
||||
rm -rf "$OUT"
|
||||
|
||||
# Sync to S3
|
||||
aws s3 sync \
|
||||
--profile di \
|
||||
"$DIR" \
|
||||
s3://openpipelines-bio/openpipeline_spatial/resources_test/cosmx \
|
||||
--delete \
|
||||
--dryrun
|
||||
19
resources_test_scripts/reference_tiny.sh
Executable file
19
resources_test_scripts/reference_tiny.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# ensure that the command below is run from the root of the repository
|
||||
cd "$REPO_ROOT"
|
||||
DIR="resources_test/GRCh38"
|
||||
|
||||
mkdir -p $DIR
|
||||
|
||||
aws s3 sync \
|
||||
--profile di \
|
||||
s3://openpipelines-bio/openpipeline_spatial/resources_test/GRCh38 \
|
||||
"$DIR" \
|
||||
--delete \
|
||||
--dryrun
|
||||
60
resources_test_scripts/visium_tiny.sh
Executable file
60
resources_test_scripts/visium_tiny.sh
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# Define absolute directory path
|
||||
DIR="$REPO_ROOT/resources_test/visium"
|
||||
ID="Visium_FFPE_Human_Ovarian_Cancer_tiny"
|
||||
|
||||
# from https://www.10xgenomics.com/resources/datasets/human-ovarian-cancer-1-standard
|
||||
mkdir -p "$DIR"
|
||||
|
||||
# Input Files - download to the specific directory
|
||||
curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar
|
||||
curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_image.jpg
|
||||
curl -o "$DIR/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv" https://cf.10xgenomics.com/samples/spatial-exp/1.3.0/Visium_FFPE_Human_Ovarian_Cancer/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv
|
||||
|
||||
# Extract in the specific directory
|
||||
tar xvf "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs.tar" -C "$DIR"
|
||||
|
||||
# Create subsampled dataset with ImageMagick
|
||||
# https://imagemagick.org/index.php
|
||||
mkdir -p "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny"
|
||||
convert "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg" -resize 2000x2000 "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image_tiny.jpg"
|
||||
for f in "$DIR"/Visium_FFPE_Human_Ovarian_Cancer_fastqs/*L001*R*; do
|
||||
gzip -cdf "$f" | head -n 40000 | gzip -c > "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny/$(basename "$f")";
|
||||
done
|
||||
|
||||
echo "> Downloading and subsampling of datasets complete"
|
||||
|
||||
# Run spaceranger
|
||||
viash run src/mapping/spaceranger_count/config.vsh.yaml -- \
|
||||
--input "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny" \
|
||||
--gex_reference "$REPO_ROOT/resources_test/GRCh38/" \
|
||||
--probe_set "$DIR/Visium_FFPE_Human_Ovarian_Cancer_probe_set.csv" \
|
||||
--image "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image_tiny.jpg" \
|
||||
--slide "V10L13-020" \
|
||||
--area "D1" \
|
||||
--create_bam "false" \
|
||||
--output "Visium_FFPE_Human_Ovarian_Cancer_tiny_spaceranger"
|
||||
|
||||
mv
|
||||
echo "> Running spaceranger complete"
|
||||
|
||||
rm -rf "$DIR/Visium_FFPE_Human_Ovarian_Cancer_fastqs"
|
||||
rm -f "$DIR/Visium_FFPE_Human_Ovarian_Cancer_image.jpg"
|
||||
|
||||
viash run "$REPO_ROOT/src/convert/from_spaceranger_to_h5mu/config.vsh.yaml" -- \
|
||||
--input "$DIR/Visium_FFPE_Human_Ovarian_Cancer_tiny_spaceranger" \
|
||||
--output "$DIR/$ID.h5mu"
|
||||
|
||||
aws s3 sync \
|
||||
--profile di \
|
||||
--exclude "*.yaml" \
|
||||
"$DIR" \
|
||||
s3://openpipelines-bio/openpipeline_spatial/resources_test/visium \
|
||||
--delete \
|
||||
--dryrun
|
||||
44
resources_test_scripts/xenium_tiny.sh
Executable file
44
resources_test_scripts/xenium_tiny.sh
Executable file
@@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# get the root of the directory
|
||||
REPO_ROOT=$(git rev-parse --show-toplevel)
|
||||
|
||||
# Define absolute directory paths
|
||||
DIR="$REPO_ROOT/resources_test/xenium"
|
||||
ID="xenium_tiny"
|
||||
OUT="$DIR/$ID"
|
||||
|
||||
# create tempdir
|
||||
MY_TEMP="${VIASH_TEMP:-/tmp}"
|
||||
TMPDIR=$(mktemp -d "$MY_TEMP/$ID-XXXXXX")
|
||||
function clean_up {
|
||||
[[ -d "$TMPDIR" ]] && rm -r "$TMPDIR"
|
||||
}
|
||||
trap clean_up EXIT
|
||||
|
||||
if [ ! -d "$OUT" ]; then
|
||||
tiny_dataset="https://raw.githubusercontent.com/nf-core/test-datasets/spatialxe/Xenium_Prime_Mouse_Ileum_tiny_outs.zip"
|
||||
wget "$tiny_dataset" -O "$TMPDIR/xenium_tiny.zip"
|
||||
|
||||
unzip -q "$TMPDIR/xenium_tiny.zip" -d "$TMPDIR/xenium_tiny"
|
||||
mkdir -p "$OUT"
|
||||
mv "$TMPDIR/xenium_tiny/Xenium_Prime_Mouse_Ileum_tiny_outs/"* "$OUT/"
|
||||
fi
|
||||
|
||||
viash run "$REPO_ROOT/src/convert/from_xenium_to_spatialdata/config.vsh.yaml" -- \
|
||||
--input "$OUT" \
|
||||
--output "$DIR/$ID.zarr"
|
||||
|
||||
viash run "$REPO_ROOT/src/convert/from_spatialdata_to_h5mu/config.vsh.yaml" -- \
|
||||
--input "$DIR/$ID.zarr" \
|
||||
--output "$DIR/$ID.h5mu"
|
||||
|
||||
# Sync to S3
|
||||
aws s3 sync \
|
||||
--profile di \
|
||||
"$DIR" \
|
||||
s3://openpipelines-bio/openpipeline_spatial/resources_test/xenium \
|
||||
--delete \
|
||||
--dryrun
|
||||
Reference in New Issue
Block a user