Build branch main with version main (3c05b79)

Build pipeline: viash-hub.htrnaseq.main-bc45k

Source commit: 3c05b7950b

Source message: Remove duplicate entries from feature data (#41)
This commit is contained in:
CI
2025-02-17 15:45:09 +00:00
parent 1f84d46798
commit 6468e4a5de
61 changed files with 144 additions and 76 deletions

View File

@@ -177,7 +177,7 @@ build_info:
output: "target/executable/eset/create_fdata"
executable: "target/executable/eset/create_fdata/create_fdata"
viash_version: "0.9.0"
git_commit: "a0a780a9c1554e0e02de9e05bc88313594a08c6c"
git_commit: "3c05b7950b9627acdaa70687e0915d5ee69b6d1b"
git_remote: "https://github.com/viash-hub/htrnaseq"
package_config:
name: "htrnaseq"

View File

@@ -478,9 +478,9 @@ RUN pip install --upgrade pip && \
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
LABEL org.opencontainers.image.description="Companion container for running component eset create_fdata"
LABEL org.opencontainers.image.created="2025-02-17T07:49:05Z"
LABEL org.opencontainers.image.created="2025-02-17T14:58:42Z"
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
LABEL org.opencontainers.image.revision="a0a780a9c1554e0e02de9e05bc88313594a08c6c"
LABEL org.opencontainers.image.revision="3c05b7950b9627acdaa70687e0915d5ee69b6d1b"
LABEL org.opencontainers.image.version="main"
VIASHDOCKER
@@ -1162,10 +1162,17 @@ def main(par):
idx, cols = pd.factorize(column_to_get)
symbol_values = annotation.reindex(cols, axis=1).to_numpy()[np.arange(len(annotation)), idx]
annotation["SYMBOL"] = symbol_values
logger.info("Writing to %s", par["output"])
logger.info("Dropping unused columns")
annotation = annotation.drop(["score", "source", "frame", "feature"], axis=1)
logger.info("Looking for duplicate rows and removing them. Starting with %i entries", annotation.shape[0])
annotation = annotation.drop_duplicates(keep="first", ignore_index=True)
logger.info("After removing duplicates: %i entries", annotation.shape[0])
logger.info("Writing to %s", par["output"])
annotation.to_csv(par["output"], sep="\\t", header=True, index=False, na_rep="NA")
# Do these checks *after* writing the csv in order to be able to check the data
logger.info("Checking for unique gene IDs")
if not annotation["gene_id"].is_unique:
raise ValueError("Values from the 'gene_id' column are not unique after processing!")
logger.info("%s finished", meta['name'])