Build branch main with version main (3c05b79)
Build pipeline: viash-hub.htrnaseq.main-bc45k
Source commit: 3c05b7950b
Source message: Remove duplicate entries from feature data (#41)
This commit is contained in:
@@ -177,7 +177,7 @@ build_info:
|
||||
output: "target/executable/eset/create_fdata"
|
||||
executable: "target/executable/eset/create_fdata/create_fdata"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "a0a780a9c1554e0e02de9e05bc88313594a08c6c"
|
||||
git_commit: "3c05b7950b9627acdaa70687e0915d5ee69b6d1b"
|
||||
git_remote: "https://github.com/viash-hub/htrnaseq"
|
||||
package_config:
|
||||
name: "htrnaseq"
|
||||
|
||||
@@ -478,9 +478,9 @@ RUN pip install --upgrade pip && \
|
||||
|
||||
LABEL org.opencontainers.image.authors="Dries Schaumont, Marijke Van Moerbeke"
|
||||
LABEL org.opencontainers.image.description="Companion container for running component eset create_fdata"
|
||||
LABEL org.opencontainers.image.created="2025-02-17T07:49:05Z"
|
||||
LABEL org.opencontainers.image.created="2025-02-17T14:58:42Z"
|
||||
LABEL org.opencontainers.image.source="https://github.com/viash-hub/htrnaseq"
|
||||
LABEL org.opencontainers.image.revision="a0a780a9c1554e0e02de9e05bc88313594a08c6c"
|
||||
LABEL org.opencontainers.image.revision="3c05b7950b9627acdaa70687e0915d5ee69b6d1b"
|
||||
LABEL org.opencontainers.image.version="main"
|
||||
|
||||
VIASHDOCKER
|
||||
@@ -1162,10 +1162,17 @@ def main(par):
|
||||
idx, cols = pd.factorize(column_to_get)
|
||||
symbol_values = annotation.reindex(cols, axis=1).to_numpy()[np.arange(len(annotation)), idx]
|
||||
annotation["SYMBOL"] = symbol_values
|
||||
|
||||
logger.info("Writing to %s", par["output"])
|
||||
logger.info("Dropping unused columns")
|
||||
annotation = annotation.drop(["score", "source", "frame", "feature"], axis=1)
|
||||
logger.info("Looking for duplicate rows and removing them. Starting with %i entries", annotation.shape[0])
|
||||
annotation = annotation.drop_duplicates(keep="first", ignore_index=True)
|
||||
logger.info("After removing duplicates: %i entries", annotation.shape[0])
|
||||
logger.info("Writing to %s", par["output"])
|
||||
annotation.to_csv(par["output"], sep="\\t", header=True, index=False, na_rep="NA")
|
||||
# Do these checks *after* writing the csv in order to be able to check the data
|
||||
logger.info("Checking for unique gene IDs")
|
||||
if not annotation["gene_id"].is_unique:
|
||||
raise ValueError("Values from the 'gene_id' column are not unique after processing!")
|
||||
logger.info("%s finished", meta['name'])
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user