Build branch main with version main (1e1ffb3)
Build pipeline: vsh-ci-dev-jsbwk
Source commit: 1e1ffb315f
Source message: Merge pull request #17 from viash-hub/add_biobox_modules
- Migrate a number of components to biobox
- Fix tests
- Reduce size of test resources
- Prepare for Viash Hub
This commit is contained in:
258
target/executable/tximport/.config.vsh.yaml
Normal file
258
target/executable/tximport/.config.vsh.yaml
Normal file
@@ -0,0 +1,258 @@
|
||||
name: "tximport"
|
||||
version: "main"
|
||||
argument_groups:
|
||||
- name: "Input"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--quant_results"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: true
|
||||
multiple_sep: ","
|
||||
- type: "file"
|
||||
name: "--tx2gene_tsv"
|
||||
info: null
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "string"
|
||||
name: "--quant_type"
|
||||
description: "Method used for quantification"
|
||||
info: null
|
||||
required: false
|
||||
choices:
|
||||
- "salmon"
|
||||
- "kallisto"
|
||||
direction: "input"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- name: "Output"
|
||||
arguments:
|
||||
- type: "file"
|
||||
name: "--tpm_gene"
|
||||
info: null
|
||||
default:
|
||||
- "merged.gene_tpm.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--counts_gene"
|
||||
info: null
|
||||
default:
|
||||
- "merged.gene_counts.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--counts_gene_length_scaled"
|
||||
info: null
|
||||
default:
|
||||
- "merged.gene_counts_length_scaled.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--counts_gene_scaled"
|
||||
info: null
|
||||
default:
|
||||
- "merged.gene_counts_scaled.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--lengths_gene"
|
||||
info: null
|
||||
default:
|
||||
- "merged.gene_length.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--tpm_transcript"
|
||||
info: null
|
||||
default:
|
||||
- "merged.transcript_tpm.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--counts_transcript"
|
||||
info: null
|
||||
default:
|
||||
- "merged.transcript_counts.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
- type: "file"
|
||||
name: "--lengths_transcript"
|
||||
info: null
|
||||
default:
|
||||
- "merged.transcript_length.tsv"
|
||||
must_exist: true
|
||||
create_parent: true
|
||||
required: false
|
||||
direction: "output"
|
||||
multiple: false
|
||||
multiple_sep: ";"
|
||||
resources:
|
||||
- type: "bash_script"
|
||||
path: "script.sh"
|
||||
is_executable: true
|
||||
- type: "file"
|
||||
path: "tximport.r"
|
||||
description: "Get dataframe linking transcript ID, gene ID, and gene name"
|
||||
info:
|
||||
migration_info:
|
||||
git_repo: "https://github.com/nf-core/rnaseq.git"
|
||||
paths:
|
||||
- "modules/local/tximport/main.nf"
|
||||
last_sha: "489bcb4efdc7bd58839b22b0360d26b4d80b87a8"
|
||||
status: "enabled"
|
||||
requirements:
|
||||
commands:
|
||||
- "ps"
|
||||
runners:
|
||||
- type: "executable"
|
||||
id: "executable"
|
||||
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
||||
- type: "nextflow"
|
||||
id: "nextflow"
|
||||
directives:
|
||||
tag: "$id"
|
||||
auto:
|
||||
simplifyInput: true
|
||||
simplifyOutput: false
|
||||
transcript: false
|
||||
publish: false
|
||||
config:
|
||||
labels:
|
||||
mem1gb: "memory = 1000000000.B"
|
||||
mem2gb: "memory = 2000000000.B"
|
||||
mem5gb: "memory = 5000000000.B"
|
||||
mem10gb: "memory = 10000000000.B"
|
||||
mem20gb: "memory = 20000000000.B"
|
||||
mem50gb: "memory = 50000000000.B"
|
||||
mem100gb: "memory = 100000000000.B"
|
||||
mem200gb: "memory = 200000000000.B"
|
||||
mem500gb: "memory = 500000000000.B"
|
||||
mem1tb: "memory = 1000000000000.B"
|
||||
mem2tb: "memory = 2000000000000.B"
|
||||
mem5tb: "memory = 5000000000000.B"
|
||||
mem10tb: "memory = 10000000000000.B"
|
||||
mem20tb: "memory = 20000000000000.B"
|
||||
mem50tb: "memory = 50000000000000.B"
|
||||
mem100tb: "memory = 100000000000000.B"
|
||||
mem200tb: "memory = 200000000000000.B"
|
||||
mem500tb: "memory = 500000000000000.B"
|
||||
mem1gib: "memory = 1073741824.B"
|
||||
mem2gib: "memory = 2147483648.B"
|
||||
mem4gib: "memory = 4294967296.B"
|
||||
mem8gib: "memory = 8589934592.B"
|
||||
mem16gib: "memory = 17179869184.B"
|
||||
mem32gib: "memory = 34359738368.B"
|
||||
mem64gib: "memory = 68719476736.B"
|
||||
mem128gib: "memory = 137438953472.B"
|
||||
mem256gib: "memory = 274877906944.B"
|
||||
mem512gib: "memory = 549755813888.B"
|
||||
mem1tib: "memory = 1099511627776.B"
|
||||
mem2tib: "memory = 2199023255552.B"
|
||||
mem4tib: "memory = 4398046511104.B"
|
||||
mem8tib: "memory = 8796093022208.B"
|
||||
mem16tib: "memory = 17592186044416.B"
|
||||
mem32tib: "memory = 35184372088832.B"
|
||||
mem64tib: "memory = 70368744177664.B"
|
||||
mem128tib: "memory = 140737488355328.B"
|
||||
mem256tib: "memory = 281474976710656.B"
|
||||
mem512tib: "memory = 562949953421312.B"
|
||||
cpu1: "cpus = 1"
|
||||
cpu2: "cpus = 2"
|
||||
cpu5: "cpus = 5"
|
||||
cpu10: "cpus = 10"
|
||||
cpu20: "cpus = 20"
|
||||
cpu50: "cpus = 50"
|
||||
cpu100: "cpus = 100"
|
||||
cpu200: "cpus = 200"
|
||||
cpu500: "cpus = 500"
|
||||
cpu1000: "cpus = 1000"
|
||||
debug: false
|
||||
container: "docker"
|
||||
engines:
|
||||
- type: "docker"
|
||||
id: "docker"
|
||||
image: "ubuntu:22.04"
|
||||
target_registry: "images.viash-hub.com"
|
||||
target_tag: "main"
|
||||
namespace_separator: "/"
|
||||
setup:
|
||||
- type: "apt"
|
||||
packages:
|
||||
- "r-base"
|
||||
- "libcurl4-openssl-dev"
|
||||
- "libssl-dev"
|
||||
- "libxml2-dev"
|
||||
interactive: false
|
||||
- type: "r"
|
||||
cran:
|
||||
- "jsonlite"
|
||||
bioc:
|
||||
- "SummarizedExperiment"
|
||||
- "tximport"
|
||||
- "tximeta"
|
||||
bioc_force_install: false
|
||||
entrypoint: []
|
||||
cmd: null
|
||||
- type: "native"
|
||||
id: "native"
|
||||
build_info:
|
||||
config: "src/tximport/config.vsh.yaml"
|
||||
runner: "executable"
|
||||
engine: "docker|native"
|
||||
output: "target/executable/tximport"
|
||||
executable: "target/executable/tximport/tximport"
|
||||
viash_version: "0.9.0"
|
||||
git_commit: "1e1ffb315fefec05db2ee0c62e1c98ce4b49929c"
|
||||
git_remote: "https://github.com/viash-hub/rnaseq"
|
||||
package_config:
|
||||
version: "main"
|
||||
info:
|
||||
test_resources:
|
||||
- path: "gs://viash-hub-test-data/rnaseq/v1"
|
||||
dest: "testData"
|
||||
viash_version: "0.9.0"
|
||||
source: "src"
|
||||
target: "target"
|
||||
config_mods:
|
||||
- ".requirements.commands := ['ps']\n.runners[.type == 'nextflow'].directives.tag\
|
||||
\ := '$id'\n"
|
||||
- ".engines += { type: \"native\" }"
|
||||
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
||||
- ".engines[.type == 'docker'].target_tag := 'main'"
|
||||
organization: "vsh"
|
||||
1424
target/executable/tximport/tximport
Executable file
1424
target/executable/tximport/tximport
Executable file
File diff suppressed because it is too large
Load Diff
141
target/executable/tximport/tximport.r
Executable file
141
target/executable/tximport/tximport.r
Executable file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env Rscript
|
||||
|
||||
# Script for importing and processing transcript-level quantifications.
|
||||
# Written by Lorena Pantano, later modified by Jonathan Manning, and released under the MIT license.
|
||||
|
||||
# Loading required libraries
|
||||
library(SummarizedExperiment)
|
||||
library(tximport)
|
||||
|
||||
# Parsing command line arguments
|
||||
args <- commandArgs(trailingOnly=TRUE)
|
||||
if (length(args) < 4) {
|
||||
stop("Usage: tximport.r <coldata_path> <path> <prefix> <quant_type> <tx2gene_path>",
|
||||
call.=FALSE)
|
||||
}
|
||||
|
||||
# Assigning command line arguments to variables
|
||||
coldata_path <- args[1]
|
||||
path <- args[2]
|
||||
prefix <- args[3]
|
||||
quant_type <- args[4]
|
||||
tx2gene_path <- args[5]
|
||||
|
||||
## Functions
|
||||
|
||||
# Build a table from a SummarizedExperiment object
|
||||
build_table <- function(se.obj, slot) {
|
||||
cbind(rowData(se.obj)[,1:2], assays(se.obj)[[slot]])
|
||||
}
|
||||
|
||||
# Write a table to a file with given parameters
|
||||
write_se_table <- function(params) {
|
||||
file_name <- paste0(prefix, ".", params$suffix)
|
||||
write.table(build_table(params$obj, params$slot), file_name,
|
||||
sep="\t", quote=FALSE, row.names = FALSE)
|
||||
}
|
||||
|
||||
# Read transcript metadata from a given path
|
||||
read_transcript_info <- function(tinfo_path){
|
||||
info <- file.info(tinfo_path)
|
||||
if (info$size == 0) {
|
||||
stop("tx2gene file is empty")
|
||||
}
|
||||
|
||||
transcript_info <- read.csv(tinfo_path, sep="\t", header = FALSE,
|
||||
col.names = c("tx", "gene_id", "gene_name"))
|
||||
|
||||
extra <- setdiff(rownames(txi[[1]]), as.character(transcript_info[["tx"]]))
|
||||
transcript_info <- rbind(transcript_info, data.frame(tx=extra, gene_id=extra, gene_name=extra))
|
||||
transcript_info <- transcript_info[match(rownames(txi[[1]]), transcript_info[["tx"]]), ]
|
||||
rownames(transcript_info) <- transcript_info[["tx"]]
|
||||
|
||||
list(transcript = transcript_info,
|
||||
gene = unique(transcript_info[,2:3]),
|
||||
tx2gene = transcript_info[,1:2])
|
||||
}
|
||||
|
||||
# Read and process sample/column data from a given path
|
||||
read_coldata <- function(coldata_path){
|
||||
if (file.exists(coldata_path)) {
|
||||
coldata <- read.csv(coldata_path, sep="\t")
|
||||
coldata <- coldata[match(names, coldata[,1]),]
|
||||
coldata <- cbind(files = fns, coldata)
|
||||
} else {
|
||||
message("ColData not available: ", coldata_path)
|
||||
coldata <- data.frame(files = fns, names = names)
|
||||
}
|
||||
rownames(coldata) <- coldata[["names"]]
|
||||
}
|
||||
|
||||
# Create a SummarizedExperiment object with given data
|
||||
create_summarized_experiment <- function(counts, abundance, length, col_data, row_data) {
|
||||
SummarizedExperiment(assays = list(counts = counts, abundance = abundance, length = length),
|
||||
colData = col_data,
|
||||
rowData = row_data)
|
||||
}
|
||||
|
||||
# Main script starts here
|
||||
|
||||
# Define pattern for file names based on quantification type
|
||||
pattern <- ifelse(quant_type == "kallisto", "abundance.tsv", ".*quant_results\\.sf")
|
||||
fns <- list.files(path, pattern = pattern, recursive = T, full.names = T)
|
||||
names <- basename(fns)
|
||||
names(fns) <- names
|
||||
dropInfReps <- quant_type == "kallisto"
|
||||
|
||||
# Import transcript-level quantifications
|
||||
txi <- tximport(fns, type = quant_type, txOut = TRUE, dropInfReps = dropInfReps)
|
||||
|
||||
# Read transcript and sample data
|
||||
transcript_info <- read_transcript_info(tx2gene_path)
|
||||
coldata <- read_coldata(coldata_path)
|
||||
|
||||
# Create initial SummarizedExperiment object
|
||||
se <- create_summarized_experiment(txi[["counts"]], txi[["abundance"]], txi[["length"]],
|
||||
DataFrame(coldata), transcript_info$transcript)
|
||||
|
||||
# Setting parameters for writing tables
|
||||
params <- list(
|
||||
list(obj = se, slot = "abundance", suffix = "transcript_tpm.tsv"),
|
||||
list(obj = se, slot = "counts", suffix = "transcript_counts.tsv"),
|
||||
list(obj = se, slot = "length", suffix = "transcript_lengths.tsv")
|
||||
)
|
||||
|
||||
# Process gene-level data if tx2gene mapping is available
|
||||
if ("tx2gene" %in% names(transcript_info) && !is.null(transcript_info$tx2gene)) {
|
||||
tx2gene <- transcript_info$tx2gene
|
||||
gi <- summarizeToGene(txi, tx2gene = tx2gene)
|
||||
gi.ls <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "lengthScaledTPM")
|
||||
gi.s <- summarizeToGene(txi, tx2gene = tx2gene, countsFromAbundance = "scaledTPM")
|
||||
|
||||
gene_info <- transcript_info$gene[match(rownames(gi[[1]]), transcript_info$gene[["gene_id"]]),]
|
||||
rownames(gene_info) <- gene_info[["tx"]]
|
||||
|
||||
col_data_frame <- DataFrame(coldata)
|
||||
|
||||
# Create gene-level SummarizedExperiment objects
|
||||
gse <- create_summarized_experiment(gi[["counts"]], gi[["abundance"]], gi[["length"]],
|
||||
col_data_frame, gene_info)
|
||||
gse.ls <- create_summarized_experiment(gi.ls[["counts"]], gi.ls[["abundance"]], gi.ls[["length"]],
|
||||
col_data_frame, gene_info)
|
||||
gse.s <- create_summarized_experiment(gi.s[["counts"]], gi.s[["abundance"]], gi.s[["length"]],
|
||||
col_data_frame, gene_info)
|
||||
|
||||
params <- c(params, list(
|
||||
list(obj = gse, slot = "length", suffix = "gene_lengths.tsv"),
|
||||
list(obj = gse, slot = "abundance", suffix = "gene_tpm.tsv"),
|
||||
list(obj = gse, slot = "counts", suffix = "gene_counts.tsv"),
|
||||
list(obj = gse.ls, slot = "abundance", suffix = "gene_tpm_length_scaled.tsv"),
|
||||
list(obj = gse.ls, slot = "counts", suffix = "gene_counts_length_scaled.tsv"),
|
||||
list(obj = gse.s, slot = "abundance", suffix = "gene_tpm_scaled.tsv"),
|
||||
list(obj = gse.s, slot = "counts", suffix = "gene_counts_scaled.tsv")
|
||||
))
|
||||
}
|
||||
|
||||
# Writing tables for each set of parameters
|
||||
done <- lapply(params, write_se_table)
|
||||
|
||||
# Output session information and citations
|
||||
citation("tximeta")
|
||||
sessionInfo()
|
||||
Reference in New Issue
Block a user