Files
biobox/target/executable/rsem/rsem_calculate_expression/.config.vsh.yaml
CI 6a8cd85cf3 Build branch main with version main (bc9cc0a)
Build pipeline: viash-hub.biobox.main-bd96b

Source commit: bc9cc0a6ce

Source message: Kallisto quant (#152)

* initial commit dedup

* Revert "initial commit dedup"

This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2.

* complete component

* Update changelog

* add help.txt

* apply suggested changes (changelog, config)
2024-09-19 04:13:10 +00:00

880 lines
31 KiB
YAML

name: "rsem_calculate_expression"
namespace: "rsem"
version: "main"
argument_groups:
- name: "Input"
arguments:
- type: "string"
name: "--id"
description: "Sample ID."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--strandedness"
description: "Sample strand-specificity. Must be one of unstranded, forward, reverse"
info: null
required: false
choices:
- "forward"
- "reverse"
- "unstranded"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--paired"
description: "Paired-end reads or not?"
info: null
direction: "input"
- type: "file"
name: "--input"
description: "Input reads for quantification."
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: true
multiple_sep: ";"
- type: "file"
name: "--index"
description: "RSEM index."
info: null
must_exist: false
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--extra_args"
description: "Extra rsem-calculate-expression arguments in addition to the examples."
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- name: "Output"
arguments:
- type: "file"
name: "--counts_gene"
description: "Expression counts on gene level"
info: null
example:
- "$id.genes.results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--counts_transcripts"
description: "Expression counts on transcript level"
info: null
example:
- "$id.isoforms.results"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--stat"
description: "RSEM statistics"
info: null
example:
- "$id.stat"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--logs"
description: "RSEM logs"
info: null
example:
- "$id.log"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_star"
description: "BAM file generated by STAR (optional)"
info: null
example:
- "$id.STAR.genome.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_genome"
description: "Genome BAM file (optional)"
info: null
example:
- "$id.genome.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--bam_transcript"
description: "Transcript BAM file (optional)"
info: null
example:
- "$id.transcript.bam"
must_exist: true
create_parent: true
required: false
direction: "output"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--sort_bam_by_read_name"
description: "Sort BAM file aligned under transcript coordidate by read name.\
\ Setting this option on will produce \ndeterministic maximum likelihood estimations\
\ from independent runs. Note that sorting will take long \ntime and lots of\
\ memory.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--no_bam_output"
description: "Do not output any BAM file."
info: null
direction: "input"
- type: "boolean_true"
name: "--sampling_for_bam"
description: "When RSEM generates a BAM file, instead of outputting all alignments\
\ a read has with their posterior \nprobabilities, one alignment is sampled\
\ according to the posterior probabilities. The sampling procedure \nincludes\
\ the alignment to the \"noise\" transcript, which does not appear in the BAM\
\ file. Only the \nsampled alignment has a weight of 1. All other alignments\
\ have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared\
\ in the BAM file should have weight 0.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--output_genome_bam"
description: "Generate a BAM file, 'sample_name.genome.bam', with alignments mapped\
\ to genomic coordinates and \nannotated with their posterior probabilities.\
\ In addition, RSEM will call samtools (included in RSEM \npackage) to sort\
\ and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai'\
\ \nwill be generated.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--sort_bam_by_coordinate"
description: "Sort RSEM generated transcript and genome BAM files by coordinates\
\ and build associated indices.\n"
info: null
direction: "input"
- name: "Basic Options"
arguments:
- type: "boolean_true"
name: "--no_qualities"
description: "Input reads do not contain quality scores."
info: null
direction: "input"
- type: "boolean_true"
name: "--alignments"
description: "Input file contains alignments in SAM/BAM/CRAM format. The exact\
\ file format will be determined \nautomatically.\n"
info: null
direction: "input"
- type: "file"
name: "--fai"
description: "If the header section of input alignment file does not contain reference\
\ sequence information, \nthis option should be turned on. <file> is a FAI format\
\ file containing each reference sequence's \nname and length. Please refer\
\ to the SAM official website for the details of FAI format.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--bowtie2"
description: "Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM\
\ does not handle indel, local \nand discordant alignments, the Bowtie2 parameters\
\ are set in a way to avoid those alignments. In \nparticular, we use options\
\ '--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1'\
\ \nby default. The last parameter of '--score_min', '-0.1', is the negative\
\ of maximum mismatch rate. \nThis rate can be set by option '--bowtie2_mismatch_rate'.\
\ If reads are paired-end, we additionally \nuse options '--no_mixed' and '--no_discordant'.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--star"
description: "Use STAR to align reads. Alignment parameters are from ENCODE3's\
\ STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR's\
\ Output BAM file is unsorted. It is stored in RSEM's \ntemporary directory\
\ with name as 'sample_name.bam'. Each STAR job will have its own private copy\
\ of \nthe genome in memory.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--hisat2_hca"
description: "Use HISAT2 to align reads to the transcriptome according to Human\
\ Cell Atlast.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--append_names"
description: "If gene_name/transcript_name is available, append it to the end\
\ of gene_id/transcript_id (separated \nby '_') in files 'sample_name.isoforms.results'\
\ and 'sample_name.genes.results'.\n"
info: null
direction: "input"
- type: "integer"
name: "--seed"
description: "Set the seed for the random number generators used in calculating\
\ posterior mean estimates and \ncredibility intervals. The seed must be a non-negative\
\ 32 bit integer.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--single_cell_prior"
description: "By default, RSEM uses Dirichlet(1) as the prior to calculate posterior\
\ mean estimates and credibility \nintervals. However, much less genes are expressed\
\ in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean\
\ estimates and/or credibility intervals and you have single-cell RNA-Seq data,\
\ \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1)\
\ as the prior which \nencourage the sparsity of the expression levels.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--calc_pme"
description: "Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates."
info: null
direction: "input"
- type: "boolean_true"
name: "--calc_ci"
description: "Calculate 95% credibility intervals and posterior mean estimates.\
\ The credibility level can be \nchanged by setting '--ci_credibility_level'.\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--quiet"
alternatives:
- "-q"
description: "Suppress the output of logging information."
info: null
direction: "input"
- name: "Aligner Options"
arguments:
- type: "integer"
name: "--seed_length"
description: "Seed length used by the read aligner. Providing the correct value\
\ is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie's\
\ seed length parameter. Any read with its or at least \none of its mates' (for\
\ paired-end reads) length less than this value will be ignored. If the \nreferences\
\ are not added poly(A) tails, the minimum allowed value is 5, otherwise, the\
\ minimum \nallowed value is 25. Note that this script will only check if the\
\ value >= 5 and give a warning \nmessage if the value < 25 but >= 5. (Default:\
\ 25)\n"
info: null
example:
- 25
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--phred64_quals"
description: "Input quality scores are encoded as Phred+64 (default for GA Pipeline\
\ ver. >= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise,\
\ quality score will be encoded as Phred+33. (Default: false)\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--solexa_quals"
description: "Input quality scores are solexa encoded (from GA Pipeline ver. <\
\ 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality\
\ score will be encoded as Phred+33. (Default: false)\n"
info: null
direction: "input"
- type: "integer"
name: "--bowtie_n"
description: "(Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,\
\ Default: 2)\n"
info: null
example:
- 2
required: false
choices:
- 0
- 1
- 2
- 3
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bowtie_e"
description: "(Bowtie parameter) max sum of mismatch quality scores across the\
\ alignment. (Default: 99999999)\n"
info: null
example:
- 99999999
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bowtie_m"
description: "(Bowtie parameter) suppress all alignments for a read if > <int>\
\ valid alignments exist. (Default: 200)\n"
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bowtie_chunkmbs"
description: "(Bowtie parameter) memory allocated for best first alignment calculation\
\ (Default: 0 - use Bowtie's default)\n"
info: null
example:
- 0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--bowtie2_mismatch_rate"
description: "(Bowtie 2 parameter) The maximum mismatch rate allowed. (Default:\
\ 0.1)\n"
info: null
example:
- 0.1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--bowtie2_k"
description: "(Bowtie 2 parameter) Find up to <int> alignments per read. (Default:\
\ 200)\n"
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--bowtie2_sensitivity_level"
description: "(Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end\
\ mode. This option controls how \nhard Bowtie 2 tries to find alignments. <string>\
\ must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\"\
. The four candidates correspond to Bowtie 2's \"--very-fast\", \"--fast\",\
\ \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\"\
\ - use Bowtie 2's default)\n"
info: null
example:
- "sensitive"
required: false
choices:
- "very_fast"
- "fast"
- "sensitive"
- "very_sensitive"
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--star_gzipped_read_file"
description: "Input read file(s) is compressed by gzip. (Default: false)\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--star_bzipped_read_file"
description: "Input read file(s) is compressed by bzip2. (Default: false)\n"
info: null
direction: "input"
- type: "boolean_true"
name: "--star_output_genome_bam"
description: "Save the BAM file from STAR alignment under genomic coordinate to\
\ 'sample_name.STAR.genome.bam'. \nThis file is NOT sorted by genomic coordinate.\
\ In this file, according to STAR's manual, 'paired \nends of an alignment are\
\ always adjacent, and multiple alignments of a read are adjacent as well'.\
\ \n(Default: false)\n"
info: null
direction: "input"
- name: "Advanced Options"
arguments:
- type: "string"
name: "--tag"
description: "The name of the optional field used in the SAM input for identifying\
\ a read with too many valid \nalignments. The field should have the format\
\ <tagName>:i:<value>, where a <value> bigger than 0 \nindicates a read with\
\ too many alignments. (Default: \"\")\n"
info: null
example:
- ""
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length_min"
description: "Minimum read/insert length allowed. This is also the value for the\
\ Bowtie/Bowtie2 -I option. \n(Default: 1)\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length_max"
description: "Maximum read/insert length allowed. This is also the value for the\
\ Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n"
info: null
example:
- 1000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--fragment_length_mean"
description: "(single-end data only) The mean of the fragment length distribution,\
\ which is assumed to be a \nGaussian. (Default: -1, which disables use of the\
\ fragment length distribution)\n"
info: null
example:
- -1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--gragment_length_sd"
description: "(single-end data only) The standard deviation of the fragment length\
\ distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes\
\ that all fragments are of the same length, \ngiven by the rounded value of\
\ --fragment_length_mean).\n"
info: null
example:
- 0.0
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--estimate_rspd"
description: "Set this option if you want to estimate the read start position\
\ distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n"
info: null
direction: "input"
- type: "integer"
name: "--num_rspd_bins"
description: "Number of bins in the RSPD. Only relevant when '--estimate_rspd'\
\ is specified. Use of the default \nsetting is recommended. (Default: 20)\n"
info: null
example:
- 20
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gibbs_burnin"
description: "The number of burn-in rounds for RSEM's Gibbs sampler. Each round\
\ passes over the entire data set \nonce. If RSEM can use multiple threads,\
\ multiple Gibbs samplers will start at the same time and all \nsamplers share\
\ the same burn-in number. (Default: 200)\n"
info: null
example:
- 200
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gibbs_number_of_samples"
description: "The total number of count vectors RSEM will collect from its Gibbs\
\ samplers. (Default: 1000)\n"
info: null
example:
- 1000
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--gibbs_sampling_gap"
description: "The number of rounds between two succinct count vectors RSEM collects.\
\ If the count vector after \nround N is collected, the count vector after round\
\ N + <int> will also be collected. (Default: 1)\n"
info: null
example:
- 1
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "double"
name: "--ci_credibility_level"
description: "The credibility level for credibility intervals. (Default: 0.95)\n"
info: null
example:
- 0.95
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "integer"
name: "--ci_number_of_samples_per_count_vector"
description: "The number of read generating probability vectors sampled per sampled\
\ count vector. The crebility \nintervals are calculated by first sampling P(C\
\ | D) and then sampling P(Theta | C) for each sampled \ncount vector. This\
\ option controls how many Theta vectors are sampled per sampled count vector.\
\ \n(Default: 50)\n"
info: null
example:
- 50
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--keep_intermediate_files"
description: "Keep temporary files generated by RSEM. RSEM creates a temporary\
\ directory, 'sample_name.temp', \ninto which it puts all intermediate output\
\ files. If this directory already exists, RSEM overwrites \nall files generated\
\ by previous RSEM runs inside of it. By default, after RSEM finishes, the \n\
temporary directory is deleted. Set this option to prevent the deletion of this\
\ directory and the \nintermediate files inside of it.\n"
info: null
direction: "input"
- type: "string"
name: "--temporary_folder"
description: "Set where to put the temporary files generated by RSEM. If the folder\
\ specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n"
info: null
example:
- "sample_name.temp"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--time"
description: "Output time consumed by each step of RSEM to 'sample_name.time'.\n"
info: null
direction: "input"
- name: "Prior-Enhanced RSEM Options"
arguments:
- type: "boolean_true"
name: "--run_pRSEM"
description: "Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform's\
\ initial pseudo-count for \nRSEM's Gibbs sampling, will be learned from input\
\ RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq\
\ peak information to partition isoforms (e.g. in pRSEM's default \npartition\
\ model), either ChIP-seq peak file (with the '--chipseq_peak_file' option)\
\ or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables\
\ are required (with the \n'--chipseq_target_read_files <string>', '--chipseq_control_read_files\
\ <string>', and '--bowtie_path \n<path> options), otherwise, ChIP-seq FASTQ\
\ files for target and control and the path to Bowtie \nexecutables are required.\n"
info: null
direction: "input"
- type: "file"
name: "--chipseq_peak_file"
description: "Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e. BED6+4,\
\ format. This file is used \nwhen running prior-enhanced RSEM in the default\
\ two-partition model. It partitions isoforms by \nwhether they have ChIP-seq\
\ overlapping with their transcription start site region or not. Each \npartition\
\ will have its own prior parameter learned from a training set. This file can\
\ be either \ngzipped or ungzipped.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chipseq_target_read_files"
description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq target.\
\ This option is used when running \nprior-enhanced RSEM. It provides information\
\ to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped\
\ or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path <path>'\
\ \nand '--chipseq_control_read_files <string>' must be defined when this option\
\ is specified.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chipseq_control_read_files"
description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol.\
\ This option is used when running \nprior-enhanced RSEM. It provides information\
\ to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with\
\ a suffix '.gz' or '.gzip'. The options '--bowtie_path <path>' and \n'--chipseq_target_read_files\
\ <string>' must be defined when this option is specified.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chipseq_read_files_multi_targets"
description: "Comma-separated full path of FASTQ read files for multiple ChIP-seq\
\ targets. This option is used when \nrunning prior-enhanced RSEM, where prior\
\ is learned from multiple complementary data sets. It provides \ninformation\
\ to calculate ChIP-seq signals. All files can be either ungzipped or gzipped\
\ with a suffix \n'.gz' or '.gzip'. When this option is specified, the option\
\ '--bowtie_path <path>' must be defined and \nthe option '--partition_model\
\ <string>' will be set to 'cmb_lgt' automatically.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "file"
name: "--chipseq_bed_files_multi_targets"
description: "Comma-separated full path of BED files for multiple ChIP-seq targets.\
\ This option is used when running \nprior-enhanced RSEM, where prior is learned\
\ from multiple complementary data sets. It provides information \nof ChIP-seq\
\ signals and must have at least the first six BED columns. All files can be\
\ either ungzipped \nor gzipped with a suffix '.gz' or '.gzip'. When this option\
\ is specified, the option '--partition_model \n<string>' will be set to 'cmb_lgt'\
\ automatically.\n"
info: null
must_exist: true
create_parent: true
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "boolean_true"
name: "--cap_stacked_chipseq_reads"
description: "Keep a maximum number of ChIP-seq reads that aligned to the same\
\ genomic interval. This option is used \nwhen running prior-enhanced RSEM,\
\ where prior is learned from multiple complementary data sets. This \noption\
\ is only in use when either '--chipseq_read_files_multi_targets <string>' or\
\ \n'--chipseq_bed_files_multi_targets <string>' is specified.\n"
info: null
direction: "input"
- type: "integer"
name: "--n_max_stacked_chipseq_reads"
description: "The maximum number of stacked ChIP-seq reads to keep. This option\
\ is used when running prior-enhanced \nRSEM, where prior is learned from multiple\
\ complementary data sets. This option is only in use when the \noption '--cap_stacked_chipseq_reads'\
\ is set.\n"
info: null
required: false
direction: "input"
multiple: false
multiple_sep: ";"
- type: "string"
name: "--partition_model"
description: "A keyword to specify the partition model used by prior-enhanced\
\ RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3,\
\ lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk,\
\ pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above\
\ models are learned from a training set. For detailed explanations, please\
\ \nsee prior-enhanced RSEM's paper. (Default: 'pk')\n"
info: null
example:
- "pk"
required: false
direction: "input"
multiple: false
multiple_sep: ";"
resources:
- type: "bash_script"
path: "script.sh"
is_executable: true
description: "Calculate expression with RSEM. \n"
test_resources:
- type: "bash_script"
path: "test.sh"
is_executable: true
info: null
status: "enabled"
requirements:
commands:
- "ps"
keywords:
- "Transcriptome"
- "Index"
- "Alignment"
- "RSEM"
license: "GPL-3.0"
references:
doi:
- "https://doi.org/10.1186/1471-2105-12-323"
links:
repository: "https://github.com/deweylab/RSEM"
homepage: "https://deweylab.github.io/RSEM/"
documentation: "https://deweylab.github.io/RSEM/rsem-calculate-expression.html"
runners:
- type: "executable"
id: "executable"
docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
id: "nextflow"
directives:
tag: "$id"
auto:
simplifyInput: true
simplifyOutput: false
transcript: false
publish: false
config:
labels:
mem1gb: "memory = 1000000000.B"
mem2gb: "memory = 2000000000.B"
mem5gb: "memory = 5000000000.B"
mem10gb: "memory = 10000000000.B"
mem20gb: "memory = 20000000000.B"
mem50gb: "memory = 50000000000.B"
mem100gb: "memory = 100000000000.B"
mem200gb: "memory = 200000000000.B"
mem500gb: "memory = 500000000000.B"
mem1tb: "memory = 1000000000000.B"
mem2tb: "memory = 2000000000000.B"
mem5tb: "memory = 5000000000000.B"
mem10tb: "memory = 10000000000000.B"
mem20tb: "memory = 20000000000000.B"
mem50tb: "memory = 50000000000000.B"
mem100tb: "memory = 100000000000000.B"
mem200tb: "memory = 200000000000000.B"
mem500tb: "memory = 500000000000000.B"
mem1gib: "memory = 1073741824.B"
mem2gib: "memory = 2147483648.B"
mem4gib: "memory = 4294967296.B"
mem8gib: "memory = 8589934592.B"
mem16gib: "memory = 17179869184.B"
mem32gib: "memory = 34359738368.B"
mem64gib: "memory = 68719476736.B"
mem128gib: "memory = 137438953472.B"
mem256gib: "memory = 274877906944.B"
mem512gib: "memory = 549755813888.B"
mem1tib: "memory = 1099511627776.B"
mem2tib: "memory = 2199023255552.B"
mem4tib: "memory = 4398046511104.B"
mem8tib: "memory = 8796093022208.B"
mem16tib: "memory = 17592186044416.B"
mem32tib: "memory = 35184372088832.B"
mem64tib: "memory = 70368744177664.B"
mem128tib: "memory = 140737488355328.B"
mem256tib: "memory = 281474976710656.B"
mem512tib: "memory = 562949953421312.B"
cpu1: "cpus = 1"
cpu2: "cpus = 2"
cpu5: "cpus = 5"
cpu10: "cpus = 10"
cpu20: "cpus = 20"
cpu50: "cpus = 50"
cpu100: "cpus = 100"
cpu200: "cpus = 200"
cpu500: "cpus = 500"
cpu1000: "cpus = 1000"
debug: false
container: "docker"
engines:
- type: "docker"
id: "docker"
image: "ubuntu:22.04"
target_registry: "images.viash-hub.com"
target_tag: "main"
namespace_separator: "/"
setup:
- type: "apt"
packages:
- "build-essential"
- "gcc"
- "g++"
- "make"
- "wget"
- "zlib1g-dev"
- "unzip"
interactive: false
- type: "docker"
run:
- "apt-get update && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip\
\ && \\\nunzip 2.7.11a.zip && \\\ncp STAR-2.7.11a/bin/Linux_x86_64_static/STAR\
\ /usr/local/bin && \\\ncd && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip\
\ && \\\nunzip v1.3.3.zip && \\\ncd RSEM-1.3.3 && \\\nmake && \\\nmake install\n"
env:
- "STAR_VERSION=2.7.11b"
- "RSEM_VERSION=1.3.3"
- type: "docker"
run:
- "echo \"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version:\
\ RSEM v//g'`\" > /var/software_versions.txt && \\\necho \"STAR: `STAR --version`\"\
\ >> /var/software_versions.txt && \\\necho \"bowtie2: `bowtie2 --version |\
\ grep -oP '\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho\
\ \"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\K\\d+\\.\\\
d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho \"HISAT2: `hisat2 --version\
\ | grep -oP 'hisat2-align-s version \\K\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt\n"
entrypoint: []
cmd: null
- type: "native"
id: "native"
build_info:
config: "src/rsem/rsem_calculate_expression/config.vsh.yaml"
runner: "executable"
engine: "docker|native"
output: "target/executable/rsem/rsem_calculate_expression"
executable: "target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression"
viash_version: "0.9.0"
git_commit: "bc9cc0a6ce4e0b87a4ce47561b4812b449e101ca"
git_remote: "https://x-access-token:ghs_lSKHBbqnO6mkOoPjjavM2ghphcCkFP1NX9Uo@github.com/viash-hub/biobox"
git_tag: "v0.2.0-5-gbc9cc0a"
package_config:
name: "biobox"
version: "main"
description: "A collection of bioinformatics tools for working with sequence data.\n"
info: null
viash_version: "0.9.0"
source: "src"
target: "target"
config_mods:
- ".requirements.commands := ['ps']\n"
- ".engines += { type: \"native\" }"
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
- ".engines[.type == 'docker'].target_tag := 'main'"
keywords:
- "bioinformatics"
- "modules"
- "sequencing"
license: "MIT"
organization: "vsh"
links:
repository: "https://github.com/viash-hub/biobox"
issue_tracker: "https://github.com/viash-hub/biobox/issues"