Build pipeline: viash-hub.htrnaseq.main-mjsqg
Source commit: cb58990a33
Source message: Fix create_report build after R update (#52)
1513 lines
58 KiB
Bash
Executable File
1513 lines
58 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# combine_star_logs main
|
|
#
|
|
# This wrapper script is auto-generated by viash 0.9.2 and is thus a derivative
|
|
# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data
|
|
# Intuitive.
|
|
#
|
|
# The component may contain files which fall under a different license. The
|
|
# authors of this component should specify the license in the header of such
|
|
# files, or include a separate license file detailing the licenses of all included
|
|
# files.
|
|
#
|
|
# Component authors:
|
|
# * Dries Schaumont (author, maintainer)
|
|
|
|
set -e
|
|
|
|
if [ -z "$VIASH_TEMP" ]; then
|
|
VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR}
|
|
VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR}
|
|
VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP}
|
|
VIASH_TEMP=${VIASH_TEMP:-$TMPDIR}
|
|
VIASH_TEMP=${VIASH_TEMP:-$TMP}
|
|
VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR}
|
|
VIASH_TEMP=${VIASH_TEMP:-$TEMP}
|
|
VIASH_TEMP=${VIASH_TEMP:-/tmp}
|
|
fi
|
|
|
|
# define helper functions
|
|
# ViashQuote: put quotes around non flag values
|
|
# $1 : unquoted string
|
|
# return : possibly quoted string
|
|
# examples:
|
|
# ViashQuote --foo # returns --foo
|
|
# ViashQuote bar # returns 'bar'
|
|
# Viashquote --foo=bar # returns --foo='bar'
|
|
function ViashQuote {
|
|
if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then
|
|
echo "$1" | sed "s#=\(.*\)#='\1'#"
|
|
elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then
|
|
echo "$1"
|
|
else
|
|
echo "'$1'"
|
|
fi
|
|
}
|
|
# ViashRemoveFlags: Remove leading flag
|
|
# $1 : string with a possible leading flag
|
|
# return : string without possible leading flag
|
|
# examples:
|
|
# ViashRemoveFlags --foo=bar # returns bar
|
|
function ViashRemoveFlags {
|
|
echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//'
|
|
}
|
|
# ViashSourceDir: return the path of a bash file, following symlinks
|
|
# usage : ViashSourceDir ${BASH_SOURCE[0]}
|
|
# $1 : Should always be set to ${BASH_SOURCE[0]}
|
|
# returns : The absolute path of the bash file
|
|
function ViashSourceDir {
|
|
local source="$1"
|
|
while [ -h "$source" ]; do
|
|
local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )"
|
|
source="$(readlink "$source")"
|
|
[[ $source != /* ]] && source="$dir/$source"
|
|
done
|
|
cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd
|
|
}
|
|
# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks
|
|
# usage : ViashFindTargetDir 'ScriptPath'
|
|
# $1 : The location from where to start the upward search
|
|
# returns : The absolute path of the '.build.yaml' file
|
|
function ViashFindTargetDir {
|
|
local source="$1"
|
|
while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do
|
|
source=${source%/*}
|
|
done
|
|
echo $source
|
|
}
|
|
# see https://en.wikipedia.org/wiki/Syslog#Severity_level
|
|
VIASH_LOGCODE_EMERGENCY=0
|
|
VIASH_LOGCODE_ALERT=1
|
|
VIASH_LOGCODE_CRITICAL=2
|
|
VIASH_LOGCODE_ERROR=3
|
|
VIASH_LOGCODE_WARNING=4
|
|
VIASH_LOGCODE_NOTICE=5
|
|
VIASH_LOGCODE_INFO=6
|
|
VIASH_LOGCODE_DEBUG=7
|
|
VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE
|
|
|
|
# ViashLog: Log events depending on the verbosity level
|
|
# usage: ViashLog 1 alert Oh no something went wrong!
|
|
# $1: required verbosity level
|
|
# $2: display tag
|
|
# $3+: messages to display
|
|
# stdout: Your input, prepended by '[$2] '.
|
|
function ViashLog {
|
|
local required_level="$1"
|
|
local display_tag="$2"
|
|
shift 2
|
|
if [ $VIASH_VERBOSITY -ge $required_level ]; then
|
|
>&2 echo "[$display_tag]" "$@"
|
|
fi
|
|
}
|
|
|
|
# ViashEmergency: log events when the system is unstable
|
|
# usage: ViashEmergency Oh no something went wrong.
|
|
# stdout: Your input, prepended by '[emergency] '.
|
|
function ViashEmergency {
|
|
ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@"
|
|
}
|
|
|
|
# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database)
|
|
# usage: ViashAlert Oh no something went wrong.
|
|
# stdout: Your input, prepended by '[alert] '.
|
|
function ViashAlert {
|
|
ViashLog $VIASH_LOGCODE_ALERT alert "$@"
|
|
}
|
|
|
|
# ViashCritical: log events when a critical condition occurs
|
|
# usage: ViashCritical Oh no something went wrong.
|
|
# stdout: Your input, prepended by '[critical] '.
|
|
function ViashCritical {
|
|
ViashLog $VIASH_LOGCODE_CRITICAL critical "$@"
|
|
}
|
|
|
|
# ViashError: log events when an error condition occurs
|
|
# usage: ViashError Oh no something went wrong.
|
|
# stdout: Your input, prepended by '[error] '.
|
|
function ViashError {
|
|
ViashLog $VIASH_LOGCODE_ERROR error "$@"
|
|
}
|
|
|
|
# ViashWarning: log potentially abnormal events
|
|
# usage: ViashWarning Something may have gone wrong.
|
|
# stdout: Your input, prepended by '[warning] '.
|
|
function ViashWarning {
|
|
ViashLog $VIASH_LOGCODE_WARNING warning "$@"
|
|
}
|
|
|
|
# ViashNotice: log significant but normal events
|
|
# usage: ViashNotice This just happened.
|
|
# stdout: Your input, prepended by '[notice] '.
|
|
function ViashNotice {
|
|
ViashLog $VIASH_LOGCODE_NOTICE notice "$@"
|
|
}
|
|
|
|
# ViashInfo: log normal events
|
|
# usage: ViashInfo This just happened.
|
|
# stdout: Your input, prepended by '[info] '.
|
|
function ViashInfo {
|
|
ViashLog $VIASH_LOGCODE_INFO info "$@"
|
|
}
|
|
|
|
# ViashDebug: log all events, for debugging purposes
|
|
# usage: ViashDebug This just happened.
|
|
# stdout: Your input, prepended by '[debug] '.
|
|
function ViashDebug {
|
|
ViashLog $VIASH_LOGCODE_DEBUG debug "$@"
|
|
}
|
|
|
|
# find source folder of this component
|
|
VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}`
|
|
|
|
# find the root of the built components & dependencies
|
|
VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR`
|
|
|
|
# define meta fields
|
|
VIASH_META_NAME="combine_star_logs"
|
|
VIASH_META_FUNCTIONALITY_NAME="combine_star_logs"
|
|
VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME"
|
|
VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml"
|
|
VIASH_META_TEMP_DIR="$VIASH_TEMP"
|
|
|
|
|
|
|
|
# initialise variables
|
|
VIASH_MODE='run'
|
|
VIASH_ENGINE_ID='docker'
|
|
|
|
######## Helper functions for setting up Docker images for viash ########
|
|
# expects: ViashDockerBuild
|
|
|
|
# ViashDockerInstallationCheck: check whether Docker is installed correctly
|
|
#
|
|
# examples:
|
|
# ViashDockerInstallationCheck
|
|
function ViashDockerInstallationCheck {
|
|
ViashDebug "Checking whether Docker is installed"
|
|
if [ ! command -v docker &> /dev/null ]; then
|
|
ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions."
|
|
exit 1
|
|
fi
|
|
|
|
ViashDebug "Checking whether the Docker daemon is running"
|
|
local save=$-; set +e
|
|
local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null)
|
|
local out=$?
|
|
[[ $save =~ e ]] && set -e
|
|
if [ $out -ne 0 ]; then
|
|
ViashCritical "Docker daemon does not seem to be running. Try one of the following:"
|
|
ViashCritical "- Try running 'dockerd' in the command line"
|
|
ViashCritical "- See https://docs.docker.com/config/daemon/"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# ViashDockerRemoteTagCheck: check whether a Docker image is available
|
|
# on a remote. Assumes `docker login` has been performed, if relevant.
|
|
#
|
|
# $1 : image identifier with format `[registry/]image[:tag]`
|
|
# exit code $? : whether or not the image was found
|
|
# examples:
|
|
# ViashDockerRemoteTagCheck python:latest
|
|
# echo $? # returns '0'
|
|
# ViashDockerRemoteTagCheck sdaizudceahifu
|
|
# echo $? # returns '1'
|
|
function ViashDockerRemoteTagCheck {
|
|
docker manifest inspect $1 > /dev/null 2> /dev/null
|
|
}
|
|
|
|
# ViashDockerLocalTagCheck: check whether a Docker image is available locally
|
|
#
|
|
# $1 : image identifier with format `[registry/]image[:tag]`
|
|
# exit code $? : whether or not the image was found
|
|
# examples:
|
|
# docker pull python:latest
|
|
# ViashDockerLocalTagCheck python:latest
|
|
# echo $? # returns '0'
|
|
# ViashDockerLocalTagCheck sdaizudceahifu
|
|
# echo $? # returns '1'
|
|
function ViashDockerLocalTagCheck {
|
|
[ -n "$(docker images -q $1)" ]
|
|
}
|
|
|
|
# ViashDockerPull: pull a Docker image
|
|
#
|
|
# $1 : image identifier with format `[registry/]image[:tag]`
|
|
# exit code $? : whether or not the image was found
|
|
# examples:
|
|
# ViashDockerPull python:latest
|
|
# echo $? # returns '0'
|
|
# ViashDockerPull sdaizudceahifu
|
|
# echo $? # returns '1'
|
|
function ViashDockerPull {
|
|
ViashNotice "Checking if Docker image is available at '$1'"
|
|
if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then
|
|
docker pull $1 && return 0 || return 1
|
|
else
|
|
local save=$-; set +e
|
|
docker pull $1 2> /dev/null > /dev/null
|
|
local out=$?
|
|
[[ $save =~ e ]] && set -e
|
|
if [ $out -ne 0 ]; then
|
|
ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible."
|
|
fi
|
|
return $out
|
|
fi
|
|
}
|
|
|
|
# ViashDockerPush: push a Docker image
|
|
#
|
|
# $1 : image identifier with format `[registry/]image[:tag]`
|
|
# exit code $? : whether or not the image was found
|
|
# examples:
|
|
# ViashDockerPush python:latest
|
|
# echo $? # returns '0'
|
|
# ViashDockerPush sdaizudceahifu
|
|
# echo $? # returns '1'
|
|
function ViashDockerPush {
|
|
ViashNotice "Pushing image to '$1'"
|
|
local save=$-; set +e
|
|
local out
|
|
if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then
|
|
docker push $1
|
|
out=$?
|
|
else
|
|
docker push $1 2> /dev/null > /dev/null
|
|
out=$?
|
|
fi
|
|
[[ $save =~ e ]] && set -e
|
|
if [ $out -eq 0 ]; then
|
|
ViashNotice "Container '$1' push succeeded."
|
|
else
|
|
ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions."
|
|
fi
|
|
return $out
|
|
}
|
|
|
|
# ViashDockerPullElseBuild: pull a Docker image, else build it
|
|
#
|
|
# $1 : image identifier with format `[registry/]image[:tag]`
|
|
# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument.
|
|
# examples:
|
|
# ViashDockerPullElseBuild mynewcomponent
|
|
function ViashDockerPullElseBuild {
|
|
local save=$-; set +e
|
|
ViashDockerPull $1
|
|
local out=$?
|
|
[[ $save =~ e ]] && set -e
|
|
if [ $out -ne 0 ]; then
|
|
ViashDockerBuild $@
|
|
fi
|
|
}
|
|
|
|
# ViashDockerSetup: create a Docker image, according to specified docker setup strategy
|
|
#
|
|
# $1 : image identifier with format `[registry/]image[:tag]`
|
|
# $2 : docker setup strategy, see DockerSetupStrategy.scala
|
|
# examples:
|
|
# ViashDockerSetup mynewcomponent alwaysbuild
|
|
function ViashDockerSetup {
|
|
local image_id="$1"
|
|
local setup_strategy="$2"
|
|
if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then
|
|
ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id")
|
|
elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then
|
|
ViashDockerPull $image_id
|
|
elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then
|
|
ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id")
|
|
elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then
|
|
ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id")
|
|
elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then
|
|
ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id")
|
|
elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then
|
|
local save=$-; set +e
|
|
ViashDockerLocalTagCheck $image_id
|
|
local outCheck=$?
|
|
[[ $save =~ e ]] && set -e
|
|
if [ $outCheck -eq 0 ]; then
|
|
ViashInfo "Image $image_id already exists"
|
|
elif [ "$setup_strategy" == "ifneedbebuild" ]; then
|
|
ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id")
|
|
elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then
|
|
ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id")
|
|
elif [ "$setup_strategy" == "ifneedbepull" ]; then
|
|
ViashDockerPull $image_id
|
|
elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then
|
|
ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id")
|
|
elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then
|
|
ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id")
|
|
else
|
|
ViashError "Unrecognised Docker strategy: $setup_strategy"
|
|
exit 1
|
|
fi
|
|
elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then
|
|
ViashDockerPush "$image_id"
|
|
elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then
|
|
local save=$-; set +e
|
|
ViashDockerRemoteTagCheck $image_id
|
|
local outCheck=$?
|
|
[[ $save =~ e ]] && set -e
|
|
if [ $outCheck -eq 0 ]; then
|
|
ViashNotice "Container '$image_id' exists, doing nothing."
|
|
else
|
|
ViashNotice "Container '$image_id' does not yet exist."
|
|
ViashDockerPush "$image_id"
|
|
fi
|
|
elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then
|
|
ViashNotice "Skipping setup."
|
|
else
|
|
ViashError "Unrecognised Docker strategy: $setup_strategy"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# ViashDockerCheckCommands: Check whether a docker container has the required commands
|
|
#
|
|
# $1 : image identifier with format `[registry/]image[:tag]`
|
|
# $@ : commands to verify being present
|
|
# examples:
|
|
# ViashDockerCheckCommands bash:4.0 bash ps foo
|
|
function ViashDockerCheckCommands {
|
|
local image_id="$1"
|
|
shift 1
|
|
local commands="$@"
|
|
local save=$-; set +e
|
|
local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0'
|
|
missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done")
|
|
local outCheck=$?
|
|
[[ $save =~ e ]] && set -e
|
|
if [ $outCheck -ne 0 ]; then
|
|
ViashError "Docker container '$image_id' does not contain command '$missing'."
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# ViashDockerBuild: build a docker image
|
|
# $1 : image identifier with format `[registry/]image[:tag]`
|
|
# $... : additional arguments to pass to docker build
|
|
# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in
|
|
# $VIASH_META_NAME : name of the component
|
|
# $VIASH_META_RESOURCES_DIR : directory containing the resources
|
|
# $VIASH_VERBOSITY : verbosity level
|
|
# exit code $? : whether or not the image was built successfully
|
|
function ViashDockerBuild {
|
|
local image_id="$1"
|
|
shift 1
|
|
|
|
# create temporary directory to store dockerfile & optional resources in
|
|
local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX")
|
|
local dockerfile="$tmpdir/Dockerfile"
|
|
function clean_up {
|
|
rm -rf "$tmpdir"
|
|
}
|
|
trap clean_up EXIT
|
|
|
|
# store dockerfile and resources
|
|
ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile"
|
|
|
|
# generate the build command
|
|
local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'"
|
|
|
|
# build the container
|
|
ViashNotice "Building container '$image_id' with Dockerfile"
|
|
ViashInfo "$docker_build_cmd"
|
|
local save=$-; set +e
|
|
if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then
|
|
eval $docker_build_cmd
|
|
else
|
|
eval $docker_build_cmd &> "$tmpdir/docker_build.log"
|
|
fi
|
|
|
|
# check exit code
|
|
local out=$?
|
|
[[ $save =~ e ]] && set -e
|
|
if [ $out -ne 0 ]; then
|
|
ViashError "Error occurred while building container '$image_id'"
|
|
if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then
|
|
ViashError "Transcript: --------------------------------"
|
|
cat "$tmpdir/docker_build.log"
|
|
ViashError "End of transcript --------------------------"
|
|
fi
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
######## End of helper functions for setting up Docker images for viash ########
|
|
|
|
# ViashDockerFile: print the dockerfile to stdout
|
|
# $1 : engine identifier
|
|
# return : dockerfile required to run this component
|
|
# examples:
|
|
# ViashDockerFile
|
|
function ViashDockerfile {
|
|
local engine_id="$1"
|
|
|
|
if [[ "$engine_id" == "docker" ]]; then
|
|
cat << 'VIASHDOCKER'
|
|
FROM python:3.12-slim
|
|
ENTRYPOINT []
|
|
RUN apt-get update && \
|
|
DEBIAN_FRONTEND=noninteractive apt-get install -y procps && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN pip install --upgrade pip && \
|
|
pip install --upgrade --no-cache-dir "pandas"
|
|
|
|
LABEL org.opencontainers.image.authors="Dries Schaumont"
|
|
LABEL org.opencontainers.image.description="Companion container for running component stats combine_star_logs"
|
|
LABEL org.opencontainers.image.created="2025-04-29T11:22:39Z"
|
|
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
|
LABEL org.opencontainers.image.revision="cb58990a33f6ea6e46e474f2095b10218cf08912"
|
|
LABEL org.opencontainers.image.version="main"
|
|
|
|
VIASHDOCKER
|
|
fi
|
|
}
|
|
|
|
# ViashDockerBuildArgs: return the arguments to pass to docker build
|
|
# $1 : engine identifier
|
|
# return : arguments to pass to docker build
|
|
function ViashDockerBuildArgs {
|
|
local engine_id="$1"
|
|
|
|
if [[ "$engine_id" == "docker" ]]; then
|
|
echo ""
|
|
fi
|
|
}
|
|
|
|
# ViashAbsolutePath: generate absolute path from relative path
|
|
# borrowed from https://stackoverflow.com/a/21951256
|
|
# $1 : relative filename
|
|
# return : absolute path
|
|
# examples:
|
|
# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt
|
|
# ViashAbsolutePath /foo/bar/.. # returns /foo
|
|
function ViashAbsolutePath {
|
|
local thePath
|
|
local parr
|
|
local outp
|
|
local len
|
|
if [[ ! "$1" =~ ^/ ]]; then
|
|
thePath="$PWD/$1"
|
|
else
|
|
thePath="$1"
|
|
fi
|
|
echo "$thePath" | (
|
|
IFS=/
|
|
read -a parr
|
|
declare -a outp
|
|
for i in "${parr[@]}"; do
|
|
case "$i" in
|
|
''|.) continue ;;
|
|
..)
|
|
len=${#outp[@]}
|
|
if ((len==0)); then
|
|
continue
|
|
else
|
|
unset outp[$((len-1))]
|
|
fi
|
|
;;
|
|
*)
|
|
len=${#outp[@]}
|
|
outp[$len]="$i"
|
|
;;
|
|
esac
|
|
done
|
|
echo /"${outp[*]}"
|
|
)
|
|
}
|
|
# ViashDockerAutodetectMount: auto configuring docker mounts from parameters
|
|
# $1 : The parameter value
|
|
# returns : New parameter
|
|
# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker
|
|
# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts
|
|
# examples:
|
|
# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar'
|
|
# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"'
|
|
function ViashDockerAutodetectMount {
|
|
local abs_path=$(ViashAbsolutePath "$1")
|
|
local mount_source
|
|
local base_name
|
|
if [ -d "$abs_path" ]; then
|
|
mount_source="$abs_path"
|
|
base_name=""
|
|
else
|
|
mount_source=`dirname "$abs_path"`
|
|
base_name=`basename "$abs_path"`
|
|
fi
|
|
local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source"
|
|
if [ -z "$base_name" ]; then
|
|
echo "$mount_target"
|
|
else
|
|
echo "$mount_target/$base_name"
|
|
fi
|
|
}
|
|
function ViashDockerAutodetectMountArg {
|
|
local abs_path=$(ViashAbsolutePath "$1")
|
|
local mount_source
|
|
local base_name
|
|
if [ -d "$abs_path" ]; then
|
|
mount_source="$abs_path"
|
|
base_name=""
|
|
else
|
|
mount_source=`dirname "$abs_path"`
|
|
base_name=`basename "$abs_path"`
|
|
fi
|
|
local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source"
|
|
ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target"
|
|
echo "--volume=\"$mount_source:$mount_target\""
|
|
}
|
|
function ViashDockerStripAutomount {
|
|
local abs_path=$(ViashAbsolutePath "$1")
|
|
echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}"
|
|
}
|
|
# initialise variables
|
|
VIASH_DIRECTORY_MOUNTS=()
|
|
|
|
# configure default docker automount prefix if it is unset
|
|
if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then
|
|
VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount"
|
|
fi
|
|
|
|
# initialise docker variables
|
|
VIASH_DOCKER_RUN_ARGS=(-i --rm)
|
|
|
|
|
|
# ViashHelp: Display helpful explanation about this executable
|
|
function ViashHelp {
|
|
echo "combine_star_logs main"
|
|
echo ""
|
|
echo "Arguments:"
|
|
echo " --barcodes"
|
|
echo " type: string, required parameter, multiple values allowed"
|
|
echo " Barcodes responding to the respective log files."
|
|
echo ""
|
|
echo " --star_logs"
|
|
echo " type: file, required parameter, multiple values allowed, file must exist"
|
|
echo " example: Log.final.out"
|
|
echo " Paths to the STAR log files (most frequently called Log.final.out)"
|
|
echo ""
|
|
echo " --gene_summary_logs"
|
|
echo " type: file, required parameter, multiple values allowed, file must exist"
|
|
echo " example: Summary.txt"
|
|
echo " Paths to the Summary.csv files from the STAR Solo output. Can be found"
|
|
echo " in"
|
|
echo " the 'Solo.out/Gene' folder relative to the root of the STAR output"
|
|
echo " directory."
|
|
echo ""
|
|
echo " --reads_per_gene_logs"
|
|
echo " type: file, required parameter, multiple values allowed, file must exist"
|
|
echo " Paths to the 'ReadsPerGene.out.tab' files as output by STAR."
|
|
echo ""
|
|
echo " --output"
|
|
echo " type: file, output, file must exist"
|
|
echo " default: starLogs.txt"
|
|
echo " Tab-delimited file describing for each barcode (as the rows), the"
|
|
echo " metrics (as columns)"
|
|
echo " gathered from the different input files."
|
|
echo ""
|
|
echo "Viash built in Computational Requirements:"
|
|
echo " ---cpus=INT"
|
|
echo " Number of CPUs to use"
|
|
echo " ---memory=STRING"
|
|
echo " Amount of memory to use. Examples: 4GB, 3MiB."
|
|
echo ""
|
|
echo "Viash built in Docker:"
|
|
echo " ---setup=STRATEGY"
|
|
echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing."
|
|
echo " Default: ifneedbepullelsecachedbuild"
|
|
echo " ---dockerfile"
|
|
echo " Print the dockerfile to stdout."
|
|
echo " ---docker_run_args=ARG"
|
|
echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information."
|
|
echo " ---docker_image_id"
|
|
echo " Print the docker image id to stdout."
|
|
echo " ---debug"
|
|
echo " Enter the docker container for debugging purposes."
|
|
echo ""
|
|
echo "Viash built in Engines:"
|
|
echo " ---engine=ENGINE_ID"
|
|
echo " Specify the engine to use. Options are: docker, native."
|
|
echo " Default: docker"
|
|
}
|
|
|
|
# initialise array
|
|
VIASH_POSITIONAL_ARGS=''
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-h|--help)
|
|
ViashHelp
|
|
exit
|
|
;;
|
|
---v|---verbose)
|
|
let "VIASH_VERBOSITY=VIASH_VERBOSITY+1"
|
|
shift 1
|
|
;;
|
|
---verbosity)
|
|
VIASH_VERBOSITY="$2"
|
|
shift 2
|
|
;;
|
|
---verbosity=*)
|
|
VIASH_VERBOSITY="$(ViashRemoveFlags "$1")"
|
|
shift 1
|
|
;;
|
|
--version)
|
|
echo "combine_star_logs main"
|
|
exit
|
|
;;
|
|
--barcodes)
|
|
if [ -z "$VIASH_PAR_BARCODES" ]; then
|
|
VIASH_PAR_BARCODES="$2"
|
|
else
|
|
VIASH_PAR_BARCODES="$VIASH_PAR_BARCODES;""$2"
|
|
fi
|
|
[ $# -lt 2 ] && ViashError Not enough arguments passed to --barcodes. Use "--help" to get more information on the parameters. && exit 1
|
|
shift 2
|
|
;;
|
|
--barcodes=*)
|
|
if [ -z "$VIASH_PAR_BARCODES" ]; then
|
|
VIASH_PAR_BARCODES=$(ViashRemoveFlags "$1")
|
|
else
|
|
VIASH_PAR_BARCODES="$VIASH_PAR_BARCODES;"$(ViashRemoveFlags "$1")
|
|
fi
|
|
shift 1
|
|
;;
|
|
--star_logs)
|
|
if [ -z "$VIASH_PAR_STAR_LOGS" ]; then
|
|
VIASH_PAR_STAR_LOGS="$2"
|
|
else
|
|
VIASH_PAR_STAR_LOGS="$VIASH_PAR_STAR_LOGS;""$2"
|
|
fi
|
|
[ $# -lt 2 ] && ViashError Not enough arguments passed to --star_logs. Use "--help" to get more information on the parameters. && exit 1
|
|
shift 2
|
|
;;
|
|
--star_logs=*)
|
|
if [ -z "$VIASH_PAR_STAR_LOGS" ]; then
|
|
VIASH_PAR_STAR_LOGS=$(ViashRemoveFlags "$1")
|
|
else
|
|
VIASH_PAR_STAR_LOGS="$VIASH_PAR_STAR_LOGS;"$(ViashRemoveFlags "$1")
|
|
fi
|
|
shift 1
|
|
;;
|
|
--gene_summary_logs)
|
|
if [ -z "$VIASH_PAR_GENE_SUMMARY_LOGS" ]; then
|
|
VIASH_PAR_GENE_SUMMARY_LOGS="$2"
|
|
else
|
|
VIASH_PAR_GENE_SUMMARY_LOGS="$VIASH_PAR_GENE_SUMMARY_LOGS;""$2"
|
|
fi
|
|
[ $# -lt 2 ] && ViashError Not enough arguments passed to --gene_summary_logs. Use "--help" to get more information on the parameters. && exit 1
|
|
shift 2
|
|
;;
|
|
--gene_summary_logs=*)
|
|
if [ -z "$VIASH_PAR_GENE_SUMMARY_LOGS" ]; then
|
|
VIASH_PAR_GENE_SUMMARY_LOGS=$(ViashRemoveFlags "$1")
|
|
else
|
|
VIASH_PAR_GENE_SUMMARY_LOGS="$VIASH_PAR_GENE_SUMMARY_LOGS;"$(ViashRemoveFlags "$1")
|
|
fi
|
|
shift 1
|
|
;;
|
|
--reads_per_gene_logs)
|
|
if [ -z "$VIASH_PAR_READS_PER_GENE_LOGS" ]; then
|
|
VIASH_PAR_READS_PER_GENE_LOGS="$2"
|
|
else
|
|
VIASH_PAR_READS_PER_GENE_LOGS="$VIASH_PAR_READS_PER_GENE_LOGS;""$2"
|
|
fi
|
|
[ $# -lt 2 ] && ViashError Not enough arguments passed to --reads_per_gene_logs. Use "--help" to get more information on the parameters. && exit 1
|
|
shift 2
|
|
;;
|
|
--reads_per_gene_logs=*)
|
|
if [ -z "$VIASH_PAR_READS_PER_GENE_LOGS" ]; then
|
|
VIASH_PAR_READS_PER_GENE_LOGS=$(ViashRemoveFlags "$1")
|
|
else
|
|
VIASH_PAR_READS_PER_GENE_LOGS="$VIASH_PAR_READS_PER_GENE_LOGS;"$(ViashRemoveFlags "$1")
|
|
fi
|
|
shift 1
|
|
;;
|
|
--output)
|
|
[ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
|
VIASH_PAR_OUTPUT="$2"
|
|
[ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1
|
|
shift 2
|
|
;;
|
|
--output=*)
|
|
[ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
|
VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1")
|
|
shift 1
|
|
;;
|
|
---engine)
|
|
VIASH_ENGINE_ID="$2"
|
|
shift 2
|
|
;;
|
|
---engine=*)
|
|
VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")"
|
|
shift 1
|
|
;;
|
|
---setup)
|
|
VIASH_MODE='setup'
|
|
VIASH_SETUP_STRATEGY="$2"
|
|
shift 2
|
|
;;
|
|
---setup=*)
|
|
VIASH_MODE='setup'
|
|
VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")"
|
|
shift 1
|
|
;;
|
|
---dockerfile)
|
|
VIASH_MODE='dockerfile'
|
|
shift 1
|
|
;;
|
|
---docker_run_args)
|
|
VIASH_DOCKER_RUN_ARGS+=("$2")
|
|
shift 2
|
|
;;
|
|
---docker_run_args=*)
|
|
VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")")
|
|
shift 1
|
|
;;
|
|
---docker_image_id)
|
|
VIASH_MODE='docker_image_id'
|
|
shift 1
|
|
;;
|
|
---debug)
|
|
VIASH_MODE='debug'
|
|
shift 1
|
|
;;
|
|
---cpus)
|
|
[ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
|
VIASH_META_CPUS="$2"
|
|
[ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1
|
|
shift 2
|
|
;;
|
|
---cpus=*)
|
|
[ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
|
VIASH_META_CPUS=$(ViashRemoveFlags "$1")
|
|
shift 1
|
|
;;
|
|
---memory)
|
|
[ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
|
VIASH_META_MEMORY="$2"
|
|
[ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1
|
|
shift 2
|
|
;;
|
|
---memory=*)
|
|
[ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1
|
|
VIASH_META_MEMORY=$(ViashRemoveFlags "$1")
|
|
shift 1
|
|
;;
|
|
*) # positional arg or unknown option
|
|
# since the positional args will be eval'd, can we always quote, instead of using ViashQuote
|
|
VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'"
|
|
[[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters.
|
|
shift # past argument
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# parse positional parameters
|
|
eval set -- $VIASH_POSITIONAL_ARGS
|
|
|
|
|
|
if [ "$VIASH_ENGINE_ID" == "native" ] ; then
|
|
VIASH_ENGINE_TYPE='native'
|
|
elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then
|
|
VIASH_ENGINE_TYPE='docker'
|
|
else
|
|
ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native."
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
|
# check if docker is installed properly
|
|
ViashDockerInstallationCheck
|
|
|
|
# determine docker image id
|
|
if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then
|
|
VIASH_DOCKER_IMAGE_ID='images.viash-hub.com/vsh/htrnaseq/stats/combine_star_logs:main'
|
|
fi
|
|
|
|
# print dockerfile
|
|
if [ "$VIASH_MODE" == "dockerfile" ]; then
|
|
ViashDockerfile "$VIASH_ENGINE_ID"
|
|
exit 0
|
|
|
|
elif [ "$VIASH_MODE" == "docker_image_id" ]; then
|
|
echo "$VIASH_DOCKER_IMAGE_ID"
|
|
exit 0
|
|
|
|
# enter docker container
|
|
elif [[ "$VIASH_MODE" == "debug" ]]; then
|
|
VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID"
|
|
ViashNotice "+ $VIASH_CMD"
|
|
eval $VIASH_CMD
|
|
exit
|
|
|
|
# build docker image
|
|
elif [ "$VIASH_MODE" == "setup" ]; then
|
|
ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY"
|
|
ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash'
|
|
exit 0
|
|
fi
|
|
|
|
# check if docker image exists
|
|
ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild
|
|
ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'ps' 'bash'
|
|
fi
|
|
|
|
# setting computational defaults
|
|
|
|
# helper function for parsing memory strings
|
|
function ViashMemoryAsBytes {
|
|
local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'`
|
|
local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$'
|
|
if [[ $memory =~ $memory_regex ]]; then
|
|
local number=${memory/[^0-9]*/}
|
|
local symbol=${memory/*[0-9]/}
|
|
|
|
case $symbol in
|
|
b) memory_b=$number ;;
|
|
kb|k) memory_b=$(( $number * 1000 )) ;;
|
|
mb|m) memory_b=$(( $number * 1000 * 1000 )) ;;
|
|
gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;;
|
|
tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;;
|
|
pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;;
|
|
kib|ki) memory_b=$(( $number * 1024 )) ;;
|
|
mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;;
|
|
gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;;
|
|
tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;;
|
|
pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;;
|
|
esac
|
|
echo "$memory_b"
|
|
fi
|
|
}
|
|
# compute memory in different units
|
|
if [ ! -z ${VIASH_META_MEMORY+x} ]; then
|
|
VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY`
|
|
# do not define other variables if memory_b is an empty string
|
|
if [ ! -z "$VIASH_META_MEMORY_B" ]; then
|
|
VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 ))
|
|
VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 ))
|
|
VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 ))
|
|
VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 ))
|
|
VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 ))
|
|
VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 ))
|
|
VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 ))
|
|
VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 ))
|
|
VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 ))
|
|
VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 ))
|
|
else
|
|
# unset memory if string is empty
|
|
unset $VIASH_META_MEMORY_B
|
|
fi
|
|
fi
|
|
# unset nproc if string is empty
|
|
if [ -z "$VIASH_META_CPUS" ]; then
|
|
unset $VIASH_META_CPUS
|
|
fi
|
|
|
|
|
|
# check whether required parameters exist
|
|
if [ -z ${VIASH_PAR_BARCODES+x} ]; then
|
|
ViashError '--barcodes' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_PAR_STAR_LOGS+x} ]; then
|
|
ViashError '--star_logs' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_PAR_GENE_SUMMARY_LOGS+x} ]; then
|
|
ViashError '--gene_summary_logs' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_PAR_READS_PER_GENE_LOGS+x} ]; then
|
|
ViashError '--reads_per_gene_logs' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_META_NAME+x} ]; then
|
|
ViashError 'name' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then
|
|
ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then
|
|
ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_META_EXECUTABLE+x} ]; then
|
|
ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_META_CONFIG+x} ]; then
|
|
ViashError 'config' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
if [ -z ${VIASH_META_TEMP_DIR+x} ]; then
|
|
ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
|
|
# filling in defaults
|
|
if [ -z ${VIASH_PAR_OUTPUT+x} ]; then
|
|
VIASH_PAR_OUTPUT="starLogs.txt"
|
|
fi
|
|
|
|
# check whether required files exist
|
|
if [ ! -z "$VIASH_PAR_STAR_LOGS" ]; then
|
|
IFS=';'
|
|
set -f
|
|
for file in $VIASH_PAR_STAR_LOGS; do
|
|
unset IFS
|
|
if [ ! -e "$file" ]; then
|
|
ViashError "Input file '$file' does not exist."
|
|
exit 1
|
|
fi
|
|
done
|
|
set +f
|
|
fi
|
|
if [ ! -z "$VIASH_PAR_GENE_SUMMARY_LOGS" ]; then
|
|
IFS=';'
|
|
set -f
|
|
for file in $VIASH_PAR_GENE_SUMMARY_LOGS; do
|
|
unset IFS
|
|
if [ ! -e "$file" ]; then
|
|
ViashError "Input file '$file' does not exist."
|
|
exit 1
|
|
fi
|
|
done
|
|
set +f
|
|
fi
|
|
if [ ! -z "$VIASH_PAR_READS_PER_GENE_LOGS" ]; then
|
|
IFS=';'
|
|
set -f
|
|
for file in $VIASH_PAR_READS_PER_GENE_LOGS; do
|
|
unset IFS
|
|
if [ ! -e "$file" ]; then
|
|
ViashError "Input file '$file' does not exist."
|
|
exit 1
|
|
fi
|
|
done
|
|
set +f
|
|
fi
|
|
|
|
# check whether parameters values are of the right type
|
|
if [[ -n "$VIASH_META_CPUS" ]]; then
|
|
if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_B" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_KB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_MB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_GB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_TB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_PB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then
|
|
if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then
|
|
ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters.
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# create parent directories of output files, if so desired
|
|
if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then
|
|
mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")"
|
|
fi
|
|
|
|
if [ "$VIASH_ENGINE_ID" == "native" ] ; then
|
|
if [ "$VIASH_MODE" == "run" ]; then
|
|
VIASH_CMD="bash"
|
|
else
|
|
ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'."
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
|
# detect volumes from file arguments
|
|
VIASH_CHOWN_VARS=()
|
|
if [ ! -z "$VIASH_PAR_STAR_LOGS" ]; then
|
|
VIASH_TEST_STAR_LOGS=()
|
|
IFS=';'
|
|
for var in $VIASH_PAR_STAR_LOGS; do
|
|
unset IFS
|
|
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
|
|
var=$(ViashDockerAutodetectMount "$var")
|
|
VIASH_TEST_STAR_LOGS+=( "$var" )
|
|
done
|
|
VIASH_PAR_STAR_LOGS=$(IFS=';' ; echo "${VIASH_TEST_STAR_LOGS[*]}")
|
|
fi
|
|
if [ ! -z "$VIASH_PAR_GENE_SUMMARY_LOGS" ]; then
|
|
VIASH_TEST_GENE_SUMMARY_LOGS=()
|
|
IFS=';'
|
|
for var in $VIASH_PAR_GENE_SUMMARY_LOGS; do
|
|
unset IFS
|
|
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
|
|
var=$(ViashDockerAutodetectMount "$var")
|
|
VIASH_TEST_GENE_SUMMARY_LOGS+=( "$var" )
|
|
done
|
|
VIASH_PAR_GENE_SUMMARY_LOGS=$(IFS=';' ; echo "${VIASH_TEST_GENE_SUMMARY_LOGS[*]}")
|
|
fi
|
|
if [ ! -z "$VIASH_PAR_READS_PER_GENE_LOGS" ]; then
|
|
VIASH_TEST_READS_PER_GENE_LOGS=()
|
|
IFS=';'
|
|
for var in $VIASH_PAR_READS_PER_GENE_LOGS; do
|
|
unset IFS
|
|
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$var")" )
|
|
var=$(ViashDockerAutodetectMount "$var")
|
|
VIASH_TEST_READS_PER_GENE_LOGS+=( "$var" )
|
|
done
|
|
VIASH_PAR_READS_PER_GENE_LOGS=$(IFS=';' ; echo "${VIASH_TEST_READS_PER_GENE_LOGS[*]}")
|
|
fi
|
|
if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
|
|
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" )
|
|
VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT")
|
|
VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" )
|
|
fi
|
|
if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then
|
|
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" )
|
|
VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR")
|
|
fi
|
|
if [ ! -z "$VIASH_META_EXECUTABLE" ]; then
|
|
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" )
|
|
VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE")
|
|
fi
|
|
if [ ! -z "$VIASH_META_CONFIG" ]; then
|
|
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" )
|
|
VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG")
|
|
fi
|
|
if [ ! -z "$VIASH_META_TEMP_DIR" ]; then
|
|
VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" )
|
|
VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR")
|
|
fi
|
|
|
|
# get unique mounts
|
|
VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u))
|
|
fi
|
|
|
|
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
|
# change file ownership
|
|
function ViashPerformChown {
|
|
if (( ${#VIASH_CHOWN_VARS[@]} )); then
|
|
set +e
|
|
VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'"
|
|
ViashDebug "+ $VIASH_CMD"
|
|
eval $VIASH_CMD
|
|
set -e
|
|
fi
|
|
}
|
|
trap ViashPerformChown EXIT
|
|
fi
|
|
|
|
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
|
# helper function for filling in extra docker args
|
|
if [ ! -z "$VIASH_META_MEMORY_B" ]; then
|
|
VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}")
|
|
fi
|
|
if [ ! -z "$VIASH_META_CPUS" ]; then
|
|
VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}")
|
|
fi
|
|
fi
|
|
|
|
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
|
VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID"
|
|
fi
|
|
|
|
|
|
# set dependency paths
|
|
|
|
|
|
ViashDebug "Running command: $(echo $VIASH_CMD)"
|
|
cat << VIASHEOF | eval $VIASH_CMD
|
|
set -e
|
|
tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-combine_star_logs-XXXXXX").py
|
|
function clean_up {
|
|
rm "\$tempscript"
|
|
}
|
|
function interrupt {
|
|
echo -e "\nCTRL-C Pressed..."
|
|
exit 1
|
|
}
|
|
trap clean_up EXIT
|
|
trap interrupt INT SIGINT
|
|
cat > "\$tempscript" << 'VIASHMAIN'
|
|
import logging
|
|
import pandas as pd
|
|
from itertools import batched, starmap
|
|
|
|
### VIASH START
|
|
# The following code has been auto-generated by Viash.
|
|
par = {
|
|
'barcodes': $( if [ ! -z ${VIASH_PAR_BARCODES+x} ]; then echo "r'${VIASH_PAR_BARCODES//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ),
|
|
'star_logs': $( if [ ! -z ${VIASH_PAR_STAR_LOGS+x} ]; then echo "r'${VIASH_PAR_STAR_LOGS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ),
|
|
'gene_summary_logs': $( if [ ! -z ${VIASH_PAR_GENE_SUMMARY_LOGS+x} ]; then echo "r'${VIASH_PAR_GENE_SUMMARY_LOGS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ),
|
|
'reads_per_gene_logs': $( if [ ! -z ${VIASH_PAR_READS_PER_GENE_LOGS+x} ]; then echo "r'${VIASH_PAR_READS_PER_GENE_LOGS//\'/\'\"\'\"r\'}'.split(';')"; else echo None; fi ),
|
|
'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi )
|
|
}
|
|
meta = {
|
|
'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
|
'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
|
'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
|
'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
|
'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
|
'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ),
|
|
'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\'/\'\"\'\"r\'}')"; else echo None; fi ),
|
|
'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\'/\'\"\'\"r\'}')"; else echo None; fi )
|
|
}
|
|
dep = {
|
|
|
|
}
|
|
|
|
### VIASH END
|
|
|
|
logger = logging.getLogger()
|
|
console_handler = logging.StreamHandler()
|
|
logger.addHandler(console_handler)
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
|
|
def handle_percentages(column_value):
|
|
# TODO: handle this more gracefully
|
|
if column_value:
|
|
return column_value.strip('%')
|
|
return column_value
|
|
|
|
def star_log_to_dataframe(barcode: str, log_path) -> pd.DataFrame:
|
|
logger.info("Reading STAR log %s for barcode '%s'", log_path, barcode)
|
|
result = pd.read_table(log_path, sep=r"\\|\\t+", converters={"Value": handle_percentages},
|
|
engine="python", header=None, skip_blank_lines=True,
|
|
skipinitialspace=True, names=["Category", "Value"], index_col=0,
|
|
skiprows=[0, 1, 2])
|
|
logger.info("Read %d row(s) and %d column(s) from STAR logs at %s",
|
|
*result.shape, log_path)
|
|
return result
|
|
|
|
|
|
def summary_to_dataframe(barcode: str, summary_path) -> pd.DataFrame:
|
|
logger.info("Reading summary log %s for barcode %s", summary_path, barcode)
|
|
result = pd.read_table(summary_path, sep=",",
|
|
header=None, names=["Category", "Value"],
|
|
index_col=0, dtype=pd.StringDtype())
|
|
logger.info("Read %d row(s) and %d column(s) from summary file at %s",
|
|
*result.shape, summary_path)
|
|
return result
|
|
|
|
|
|
def reads_per_gene_to_dataframe(barcode, read_per_gene_path) -> pd.DataFrame:
|
|
logger.info("Reading reads per gene file %s for barcode %s", read_per_gene_path, barcode)
|
|
result = pd.read_table(read_per_gene_path, skiprows=[0, 1, 2, 3], header=None, sep="\\t",
|
|
dtype={"geneID": pd.StringDtype(),
|
|
"Unstranded": pd.Int64Dtype(),
|
|
"posStrand": pd.Int64Dtype(),
|
|
"negStrand": pd.Int64Dtype()},
|
|
index_col=0, names=["geneID", "Unstranded", "posStrand", "negStrand"])
|
|
result = result[["Unstranded"]] # Do not use .loc here because we need a DataFrame, not a Series
|
|
df = pd.DataFrame({"Value": result.sum()})
|
|
df = df.rename({"Unstranded": "NumberOfCountedReads"}, errors="raise")
|
|
df.index.name = "Category"
|
|
logger.info("Read %d row(s) and %d column(s) from reads per gene file at %s",
|
|
*df.shape, read_per_gene_path)
|
|
return df
|
|
|
|
def star_log_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame) -> pd.DataFrame:
|
|
"""
|
|
For a single star log (Log.final.out) in dataframe format, filter out the
|
|
entries that are not needed and format the labels for some metrics:
|
|
- Replace '%' with 'pect' in the labels.
|
|
- Remove labels ending with ':'
|
|
(mostly the section separators like 'MULTI-MAPPING READS:' and 'UNMAPPED READS:')
|
|
- Remove the metrics we do no need based on the following keywords:
|
|
Mapping speed, Average, Number of splices, per base, chimeric reads, average
|
|
|
|
The dataframe provided as input must have an index with 1 level with the metric names.
|
|
"""
|
|
# Remove index values ending with ':' (rows like 'MULTI-MAPPING READS:','UNIQUE READS:')
|
|
logger.info("Filtering STAR logs for barcode %s. Starting with %d row(s) and %d column(s)", barcode, *df.shape)
|
|
to_keep = ~df.index.to_series().str.endswith(":")
|
|
# Remove index values where the values contain any of these substrings
|
|
regex_columns_to_remove = "Mapping speed|Average|Number of splices|per base|chimeric reads|average"
|
|
to_keep = to_keep & ~df.index.to_series().str.contains(regex_columns_to_remove, regex=True)
|
|
logger.info("Removed the following log entries for barcode '%s':\\n\\t%s",
|
|
barcode,
|
|
"\\n\\t".join(to_keep[~to_keep].index.to_list()))
|
|
result = df.loc[to_keep]
|
|
|
|
# Replace % by pect, remove columns, use camel case and remove spaces
|
|
# You might be tempted to use .title() to make everything uppercase,
|
|
# but characters which are already uppercase should stay that way.
|
|
# (example: NumberOfUMIs and not NumberOfUmis)
|
|
result.index = result.index.str.replace("%", "pect")\\
|
|
.str.replace(":", "")\\
|
|
.str.replace(r"(?:^|\\s).", lambda m:m.group(0).upper(), regex=True)\\
|
|
.str.replace(" ", "")
|
|
result = result.rename({"UniquelyMappedReadsNumber": "NumberOfMappedReads",
|
|
"UniquelyMappedReadsPect": "PctMappedReads"}, errors="raise")
|
|
logger.info("Done filtering STAR logs for barcode %s. Result has %d row(s) and %d column(s). "
|
|
"Found entries:\\n\\t%s",
|
|
barcode, *result.shape, "\\n\\t".join(result.index.to_list()))
|
|
return result
|
|
|
|
|
|
def summary_remove_unwanted_entries_and_adjust_format(barcode, df: pd.DataFrame) -> pd.DataFrame:
|
|
logger.info("Filtering and formatting summary logs for barcode %s. "
|
|
"Starting with %d row(s) and %d column(s)", barcode, *df.shape)
|
|
columns_to_remove = (
|
|
"Number of Reads",
|
|
"Q30 Bases in RNA read",
|
|
"Reads Mapped to Genome: Unique",
|
|
"Reads Mapped to Transcriptome: Unique Genes",
|
|
"Reads in Cells Mapped to Unique Genes",
|
|
"Median UMI per Cell",
|
|
"Median Genes per Cell",
|
|
"Reads Mapped to Genome: Unique+Multiple",
|
|
"Median Reads per Cell",
|
|
"Mean UMI per Cell",
|
|
"Mean Genes per Cell",
|
|
)
|
|
|
|
to_keep = ~df.index.isin(columns_to_remove)
|
|
logger.info("Removed the following summary entries for barcode '%s':\\n\\t%s",
|
|
barcode,
|
|
"\\n\\t".join(df.loc[~to_keep].index.to_list()))
|
|
result = df.loc[to_keep]
|
|
result.index = result.index.str.replace(r"(?:^|\\s).", lambda m:m.group(0).upper(),
|
|
regex=True).str.replace(" ", "")
|
|
to_rename = {"UMIsInCells": "NumberOfUMIs",
|
|
"TotalGenesDetected": "NumberOfGenes"}
|
|
try:
|
|
result = result.rename(to_rename, errors="raise")
|
|
except KeyError as e:
|
|
raise KeyError(f"Tried to rename log entries ({','.join(to_rename)}) in the summary "
|
|
f"log for barcode {barcode}, but an entry was not found in the file. "
|
|
"Make sure that you are using the correct version of STAR."
|
|
f"Available entries: {", ".join(result.index.to_list())}") from e
|
|
logger.info("Done filtering summary logs for barcode %s. Result has %d row(s) and %d column(s). "
|
|
"Found entries:\\n\\t%s",
|
|
barcode, *result.shape, "\\n\\t".join(result.index.to_list()))
|
|
return result
|
|
|
|
|
|
def join_dfs(df_list, barcodes) -> pd.DataFrame:
|
|
# Combine the dataframes together and add the barcodes as a level to the dataframe
|
|
# in order to make a 2-level index (first level the barcodes and second level the metrics).
|
|
result = pd.concat(dict(zip(barcodes, df_list)), names=["WellBC"])
|
|
# Pivot the table by moving the metrics to the columns. Its added as an extra level,
|
|
# so we can just frop the 'Values' level that was already there
|
|
result = result.unstack(level="Category").droplevel(0, axis="columns")
|
|
return result
|
|
|
|
def main(par):
|
|
logger.info("Component started.")
|
|
# Provide an overview of the parameters in the logs
|
|
parameters_str = [f'\\t{param}: {param_val}\\n' for param, param_val in par.items()]
|
|
logger.info("Parameters:\\n%s", "".join(parameters_str).rstrip())
|
|
star_logs, gene_summary_logs, reads_per_gene_logs, barcodes = par["star_logs"], \\
|
|
par["gene_summary_logs"], par["reads_per_gene_logs"], par["barcodes"]
|
|
number_of_inputs = tuple(len(i) for i in (star_logs, gene_summary_logs,
|
|
reads_per_gene_logs, barcodes))
|
|
if len(set(number_of_inputs)) != 1:
|
|
raise ValueError("Expected the same number of inputs for 'star_logs' (%d), "
|
|
"'gene_summary_logs' (%d), 'reads_per_gene_logs' (%d) "
|
|
"and 'barcodes' (%d)." % number_of_inputs)
|
|
|
|
logs_to_process = [
|
|
(star_log_to_dataframe, star_log_remove_unwanted_entries_and_adjust_format, star_logs),
|
|
(summary_to_dataframe, summary_remove_unwanted_entries_and_adjust_format, gene_summary_logs),
|
|
(reads_per_gene_to_dataframe, None, reads_per_gene_logs),
|
|
]
|
|
logger.info("Formatting the contents of the log files.")
|
|
all_logs_data = []
|
|
for df_generator, formatter, data in logs_to_process:
|
|
data_as_df = list(starmap(df_generator, zip(barcodes, data)))
|
|
data_formatted = data_as_df
|
|
if formatter:
|
|
data_formatted = list(starmap(formatter, zip(barcodes, data_as_df)))
|
|
data_joined = join_dfs(data_formatted, barcodes)
|
|
all_logs_data.append(data_joined)
|
|
|
|
logger.info("Joining entries across the different logs together.")
|
|
all_stats = pd.concat(all_logs_data, axis=1)
|
|
logger.info("Log statistics were gathered for the following barcodes: %s",
|
|
", ".join(all_stats.index.to_list()))
|
|
dtypes = {
|
|
'NumberOfInputReads': pd.UInt64Dtype(),
|
|
'NumberOfMappedReads': pd.UInt64Dtype(),
|
|
'PctMappedReads': pd.Float64Dtype(),
|
|
'NumberOfReadsMappedToMultipleLoci': pd.UInt64Dtype(),
|
|
'PectOfReadsMappedToMultipleLoci': pd.Float64Dtype(),
|
|
'NumberOfReadsMappedToTooManyLoci': pd.UInt64Dtype(),
|
|
'PectOfReadsMappedToTooManyLoci': pd.Float64Dtype(),
|
|
'NumberOfReadsUnmappedTooManyMismatches': pd.UInt64Dtype(),
|
|
'PectOfReadsUnmappedTooManyMismatches': pd.Float64Dtype(),
|
|
'NumberOfReadsUnmappedTooShort': pd.UInt64Dtype(),
|
|
'PectOfReadsUnmappedTooShort': pd.Float64Dtype(),
|
|
'NumberOfReadsUnmappedOther': pd.UInt64Dtype(),
|
|
'PectOfReadsUnmappedOther': pd.Float64Dtype(),
|
|
'ReadsWithValidBarcodes': pd.Float64Dtype(),
|
|
'SequencingSaturation': pd.Float64Dtype(),
|
|
'Q30BasesInCB+UMI': pd.Float64Dtype(),
|
|
'ReadsMappedToTranscriptome:Unique+MultipeGenes': pd.Float64Dtype(),
|
|
'EstimatedNumberOfCells': pd.UInt64Dtype(),
|
|
'FractionOfReadsInCells': pd.Float64Dtype(),
|
|
'MeanReadsPerCell': pd.UInt64Dtype(),
|
|
'NumberOfUMIs': pd.UInt64Dtype(),
|
|
'NumberOfGenes': pd.UInt64Dtype(),
|
|
'NumberOfCountedReads': pd.UInt64Dtype(),
|
|
}
|
|
all_stats = all_stats.astype(dtypes)
|
|
# batched() is used here to print a limited amount of columnns at a time
|
|
# to make sure that they are all displayed (pandas might limit the view for readability)
|
|
logger.info("Summary of final output:\\n%s\\n",
|
|
"\\n".join(repr(all_stats.loc[:,columns].describe())
|
|
for columns in batched(all_stats.columns, 3)))
|
|
logger.info("Writing output to %s", par["output"])
|
|
all_stats.reset_index("WellBC").to_csv(par["output"], sep="\\t", header=True,
|
|
index=False, float_format='%g')
|
|
logger.info("Finished %s.", meta["name"])
|
|
|
|
if __name__ == "__main__":
|
|
main(par)
|
|
VIASHMAIN
|
|
python -B "\$tempscript" &
|
|
wait "\$!"
|
|
|
|
VIASHEOF
|
|
|
|
|
|
if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then
|
|
# strip viash automount from file paths
|
|
|
|
if [ ! -z "$VIASH_PAR_STAR_LOGS" ]; then
|
|
unset VIASH_TEST_STAR_LOGS
|
|
IFS=';'
|
|
for var in $VIASH_PAR_STAR_LOGS; do
|
|
unset IFS
|
|
if [ -z "$VIASH_TEST_STAR_LOGS" ]; then
|
|
VIASH_TEST_STAR_LOGS="$(ViashDockerStripAutomount "$var")"
|
|
else
|
|
VIASH_TEST_STAR_LOGS="$VIASH_TEST_STAR_LOGS;""$(ViashDockerStripAutomount "$var")"
|
|
fi
|
|
done
|
|
VIASH_PAR_STAR_LOGS="$VIASH_TEST_STAR_LOGS"
|
|
fi
|
|
if [ ! -z "$VIASH_PAR_GENE_SUMMARY_LOGS" ]; then
|
|
unset VIASH_TEST_GENE_SUMMARY_LOGS
|
|
IFS=';'
|
|
for var in $VIASH_PAR_GENE_SUMMARY_LOGS; do
|
|
unset IFS
|
|
if [ -z "$VIASH_TEST_GENE_SUMMARY_LOGS" ]; then
|
|
VIASH_TEST_GENE_SUMMARY_LOGS="$(ViashDockerStripAutomount "$var")"
|
|
else
|
|
VIASH_TEST_GENE_SUMMARY_LOGS="$VIASH_TEST_GENE_SUMMARY_LOGS;""$(ViashDockerStripAutomount "$var")"
|
|
fi
|
|
done
|
|
VIASH_PAR_GENE_SUMMARY_LOGS="$VIASH_TEST_GENE_SUMMARY_LOGS"
|
|
fi
|
|
if [ ! -z "$VIASH_PAR_READS_PER_GENE_LOGS" ]; then
|
|
unset VIASH_TEST_READS_PER_GENE_LOGS
|
|
IFS=';'
|
|
for var in $VIASH_PAR_READS_PER_GENE_LOGS; do
|
|
unset IFS
|
|
if [ -z "$VIASH_TEST_READS_PER_GENE_LOGS" ]; then
|
|
VIASH_TEST_READS_PER_GENE_LOGS="$(ViashDockerStripAutomount "$var")"
|
|
else
|
|
VIASH_TEST_READS_PER_GENE_LOGS="$VIASH_TEST_READS_PER_GENE_LOGS;""$(ViashDockerStripAutomount "$var")"
|
|
fi
|
|
done
|
|
VIASH_PAR_READS_PER_GENE_LOGS="$VIASH_TEST_READS_PER_GENE_LOGS"
|
|
fi
|
|
if [ ! -z "$VIASH_PAR_OUTPUT" ]; then
|
|
VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT")
|
|
fi
|
|
if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then
|
|
VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR")
|
|
fi
|
|
if [ ! -z "$VIASH_META_EXECUTABLE" ]; then
|
|
VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE")
|
|
fi
|
|
if [ ! -z "$VIASH_META_CONFIG" ]; then
|
|
VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG")
|
|
fi
|
|
if [ ! -z "$VIASH_META_TEMP_DIR" ]; then
|
|
VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR")
|
|
fi
|
|
fi
|
|
|
|
|
|
# check whether required files exist
|
|
if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then
|
|
ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist."
|
|
exit 1
|
|
fi
|
|
|
|
|
|
exit 0
|