Build pipeline: viash-hub.biobox.main-bd96b
Source commit: bc9cc0a6ce
Source message: Kallisto quant (#152)
* initial commit dedup
* Revert "initial commit dedup"
This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2.
* complete component
* Update changelog
* add help.txt
* apply suggested changes (changelog, config)
880 lines
31 KiB
YAML
880 lines
31 KiB
YAML
name: "rsem_calculate_expression"
|
|
namespace: "rsem"
|
|
version: "main"
|
|
argument_groups:
|
|
- name: "Input"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--id"
|
|
description: "Sample ID."
|
|
info: null
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--strandedness"
|
|
description: "Sample strand-specificity. Must be one of unstranded, forward, reverse"
|
|
info: null
|
|
required: false
|
|
choices:
|
|
- "forward"
|
|
- "reverse"
|
|
- "unstranded"
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--paired"
|
|
description: "Paired-end reads or not?"
|
|
info: null
|
|
direction: "input"
|
|
- type: "file"
|
|
name: "--input"
|
|
description: "Input reads for quantification."
|
|
info: null
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: true
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--index"
|
|
description: "RSEM index."
|
|
info: null
|
|
must_exist: false
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--extra_args"
|
|
description: "Extra rsem-calculate-expression arguments in addition to the examples."
|
|
info: null
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- name: "Output"
|
|
arguments:
|
|
- type: "file"
|
|
name: "--counts_gene"
|
|
description: "Expression counts on gene level"
|
|
info: null
|
|
example:
|
|
- "$id.genes.results"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--counts_transcripts"
|
|
description: "Expression counts on transcript level"
|
|
info: null
|
|
example:
|
|
- "$id.isoforms.results"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--stat"
|
|
description: "RSEM statistics"
|
|
info: null
|
|
example:
|
|
- "$id.stat"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--logs"
|
|
description: "RSEM logs"
|
|
info: null
|
|
example:
|
|
- "$id.log"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--bam_star"
|
|
description: "BAM file generated by STAR (optional)"
|
|
info: null
|
|
example:
|
|
- "$id.STAR.genome.bam"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--bam_genome"
|
|
description: "Genome BAM file (optional)"
|
|
info: null
|
|
example:
|
|
- "$id.genome.bam"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--bam_transcript"
|
|
description: "Transcript BAM file (optional)"
|
|
info: null
|
|
example:
|
|
- "$id.transcript.bam"
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "output"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--sort_bam_by_read_name"
|
|
description: "Sort BAM file aligned under transcript coordidate by read name.\
|
|
\ Setting this option on will produce \ndeterministic maximum likelihood estimations\
|
|
\ from independent runs. Note that sorting will take long \ntime and lots of\
|
|
\ memory.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--no_bam_output"
|
|
description: "Do not output any BAM file."
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--sampling_for_bam"
|
|
description: "When RSEM generates a BAM file, instead of outputting all alignments\
|
|
\ a read has with their posterior \nprobabilities, one alignment is sampled\
|
|
\ according to the posterior probabilities. The sampling procedure \nincludes\
|
|
\ the alignment to the \"noise\" transcript, which does not appear in the BAM\
|
|
\ file. Only the \nsampled alignment has a weight of 1. All other alignments\
|
|
\ have weight 0. If the \"noise\" transcript is \nsampled, all alignments appeared\
|
|
\ in the BAM file should have weight 0.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--output_genome_bam"
|
|
description: "Generate a BAM file, 'sample_name.genome.bam', with alignments mapped\
|
|
\ to genomic coordinates and \nannotated with their posterior probabilities.\
|
|
\ In addition, RSEM will call samtools (included in RSEM \npackage) to sort\
|
|
\ and index the bam file. 'sample_name.genome.sorted.bam' and 'sample_name.genome.sorted.bam.bai'\
|
|
\ \nwill be generated.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--sort_bam_by_coordinate"
|
|
description: "Sort RSEM generated transcript and genome BAM files by coordinates\
|
|
\ and build associated indices.\n"
|
|
info: null
|
|
direction: "input"
|
|
- name: "Basic Options"
|
|
arguments:
|
|
- type: "boolean_true"
|
|
name: "--no_qualities"
|
|
description: "Input reads do not contain quality scores."
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--alignments"
|
|
description: "Input file contains alignments in SAM/BAM/CRAM format. The exact\
|
|
\ file format will be determined \nautomatically.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "file"
|
|
name: "--fai"
|
|
description: "If the header section of input alignment file does not contain reference\
|
|
\ sequence information, \nthis option should be turned on. <file> is a FAI format\
|
|
\ file containing each reference sequence's \nname and length. Please refer\
|
|
\ to the SAM official website for the details of FAI format.\n"
|
|
info: null
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--bowtie2"
|
|
description: "Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM\
|
|
\ does not handle indel, local \nand discordant alignments, the Bowtie2 parameters\
|
|
\ are set in a way to avoid those alignments. In \nparticular, we use options\
|
|
\ '--sensitive --dpad 0 --gbar 99999999 --mp 1,1 --np 1 --score_min L,0,-0.1'\
|
|
\ \nby default. The last parameter of '--score_min', '-0.1', is the negative\
|
|
\ of maximum mismatch rate. \nThis rate can be set by option '--bowtie2_mismatch_rate'.\
|
|
\ If reads are paired-end, we additionally \nuse options '--no_mixed' and '--no_discordant'.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--star"
|
|
description: "Use STAR to align reads. Alignment parameters are from ENCODE3's\
|
|
\ STAR-RSEM pipeline. To save \ncomputational time and memory resources, STAR's\
|
|
\ Output BAM file is unsorted. It is stored in RSEM's \ntemporary directory\
|
|
\ with name as 'sample_name.bam'. Each STAR job will have its own private copy\
|
|
\ of \nthe genome in memory.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--hisat2_hca"
|
|
description: "Use HISAT2 to align reads to the transcriptome according to Human\
|
|
\ Cell Atlast.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--append_names"
|
|
description: "If gene_name/transcript_name is available, append it to the end\
|
|
\ of gene_id/transcript_id (separated \nby '_') in files 'sample_name.isoforms.results'\
|
|
\ and 'sample_name.genes.results'.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "integer"
|
|
name: "--seed"
|
|
description: "Set the seed for the random number generators used in calculating\
|
|
\ posterior mean estimates and \ncredibility intervals. The seed must be a non-negative\
|
|
\ 32 bit integer.\n"
|
|
info: null
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--single_cell_prior"
|
|
description: "By default, RSEM uses Dirichlet(1) as the prior to calculate posterior\
|
|
\ mean estimates and credibility \nintervals. However, much less genes are expressed\
|
|
\ in single cell RNA-Seq data. Thus, if you want to \ncompute posterior mean\
|
|
\ estimates and/or credibility intervals and you have single-cell RNA-Seq data,\
|
|
\ \nyou are recommended to turn on this option. Then RSEM will use Dirichlet(0.1)\
|
|
\ as the prior which \nencourage the sparsity of the expression levels.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--calc_pme"
|
|
description: "Run RSEM's collapsed Gibbs sampler to calculate posterior mean estimates."
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--calc_ci"
|
|
description: "Calculate 95% credibility intervals and posterior mean estimates.\
|
|
\ The credibility level can be \nchanged by setting '--ci_credibility_level'.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--quiet"
|
|
alternatives:
|
|
- "-q"
|
|
description: "Suppress the output of logging information."
|
|
info: null
|
|
direction: "input"
|
|
- name: "Aligner Options"
|
|
arguments:
|
|
- type: "integer"
|
|
name: "--seed_length"
|
|
description: "Seed length used by the read aligner. Providing the correct value\
|
|
\ is important for RSEM. If RSEM \nruns Bowtie, it uses this value for Bowtie's\
|
|
\ seed length parameter. Any read with its or at least \none of its mates' (for\
|
|
\ paired-end reads) length less than this value will be ignored. If the \nreferences\
|
|
\ are not added poly(A) tails, the minimum allowed value is 5, otherwise, the\
|
|
\ minimum \nallowed value is 25. Note that this script will only check if the\
|
|
\ value >= 5 and give a warning \nmessage if the value < 25 but >= 5. (Default:\
|
|
\ 25)\n"
|
|
info: null
|
|
example:
|
|
- 25
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--phred64_quals"
|
|
description: "Input quality scores are encoded as Phred+64 (default for GA Pipeline\
|
|
\ ver. >= 1.3). This option is \nused by Bowtie, Bowtie 2 and HISAT2. Otherwise,\
|
|
\ quality score will be encoded as Phred+33. (Default: false)\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--solexa_quals"
|
|
description: "Input quality scores are solexa encoded (from GA Pipeline ver. <\
|
|
\ 1.3). This option is used by \nBowtie, Bowtie 2 and HISAT2. Otherwise, quality\
|
|
\ score will be encoded as Phred+33. (Default: false)\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "integer"
|
|
name: "--bowtie_n"
|
|
description: "(Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,\
|
|
\ Default: 2)\n"
|
|
info: null
|
|
example:
|
|
- 2
|
|
required: false
|
|
choices:
|
|
- 0
|
|
- 1
|
|
- 2
|
|
- 3
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--bowtie_e"
|
|
description: "(Bowtie parameter) max sum of mismatch quality scores across the\
|
|
\ alignment. (Default: 99999999)\n"
|
|
info: null
|
|
example:
|
|
- 99999999
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--bowtie_m"
|
|
description: "(Bowtie parameter) suppress all alignments for a read if > <int>\
|
|
\ valid alignments exist. (Default: 200)\n"
|
|
info: null
|
|
example:
|
|
- 200
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--bowtie_chunkmbs"
|
|
description: "(Bowtie parameter) memory allocated for best first alignment calculation\
|
|
\ (Default: 0 - use Bowtie's default)\n"
|
|
info: null
|
|
example:
|
|
- 0
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "double"
|
|
name: "--bowtie2_mismatch_rate"
|
|
description: "(Bowtie 2 parameter) The maximum mismatch rate allowed. (Default:\
|
|
\ 0.1)\n"
|
|
info: null
|
|
example:
|
|
- 0.1
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--bowtie2_k"
|
|
description: "(Bowtie 2 parameter) Find up to <int> alignments per read. (Default:\
|
|
\ 200)\n"
|
|
info: null
|
|
example:
|
|
- 200
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--bowtie2_sensitivity_level"
|
|
description: "(Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end\
|
|
\ mode. This option controls how \nhard Bowtie 2 tries to find alignments. <string>\
|
|
\ must be one of \"very_fast\", \"fast\", \"sensitive\" \nand \"very_sensitive\"\
|
|
. The four candidates correspond to Bowtie 2's \"--very-fast\", \"--fast\",\
|
|
\ \n\"--sensitive\" and \"--very-sensitive\" options. (Default: \"sensitive\"\
|
|
\ - use Bowtie 2's default)\n"
|
|
info: null
|
|
example:
|
|
- "sensitive"
|
|
required: false
|
|
choices:
|
|
- "very_fast"
|
|
- "fast"
|
|
- "sensitive"
|
|
- "very_sensitive"
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--star_gzipped_read_file"
|
|
description: "Input read file(s) is compressed by gzip. (Default: false)\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--star_bzipped_read_file"
|
|
description: "Input read file(s) is compressed by bzip2. (Default: false)\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "boolean_true"
|
|
name: "--star_output_genome_bam"
|
|
description: "Save the BAM file from STAR alignment under genomic coordinate to\
|
|
\ 'sample_name.STAR.genome.bam'. \nThis file is NOT sorted by genomic coordinate.\
|
|
\ In this file, according to STAR's manual, 'paired \nends of an alignment are\
|
|
\ always adjacent, and multiple alignments of a read are adjacent as well'.\
|
|
\ \n(Default: false)\n"
|
|
info: null
|
|
direction: "input"
|
|
- name: "Advanced Options"
|
|
arguments:
|
|
- type: "string"
|
|
name: "--tag"
|
|
description: "The name of the optional field used in the SAM input for identifying\
|
|
\ a read with too many valid \nalignments. The field should have the format\
|
|
\ <tagName>:i:<value>, where a <value> bigger than 0 \nindicates a read with\
|
|
\ too many alignments. (Default: \"\")\n"
|
|
info: null
|
|
example:
|
|
- ""
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--fragment_length_min"
|
|
description: "Minimum read/insert length allowed. This is also the value for the\
|
|
\ Bowtie/Bowtie2 -I option. \n(Default: 1)\n"
|
|
info: null
|
|
example:
|
|
- 1
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--fragment_length_max"
|
|
description: "Maximum read/insert length allowed. This is also the value for the\
|
|
\ Bowtie/Bowtie 2 -X option. \n(Default: 1000)\n"
|
|
info: null
|
|
example:
|
|
- 1000
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--fragment_length_mean"
|
|
description: "(single-end data only) The mean of the fragment length distribution,\
|
|
\ which is assumed to be a \nGaussian. (Default: -1, which disables use of the\
|
|
\ fragment length distribution)\n"
|
|
info: null
|
|
example:
|
|
- -1
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "double"
|
|
name: "--gragment_length_sd"
|
|
description: "(single-end data only) The standard deviation of the fragment length\
|
|
\ distribution, which is \nassumed to be a Gaussian. (Default: 0, which assumes\
|
|
\ that all fragments are of the same length, \ngiven by the rounded value of\
|
|
\ --fragment_length_mean).\n"
|
|
info: null
|
|
example:
|
|
- 0.0
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--estimate_rspd"
|
|
description: "Set this option if you want to estimate the read start position\
|
|
\ distribution (RSPD) from data.\nOtherwise, RSEM will use a uniform RSPD.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "integer"
|
|
name: "--num_rspd_bins"
|
|
description: "Number of bins in the RSPD. Only relevant when '--estimate_rspd'\
|
|
\ is specified. Use of the default \nsetting is recommended. (Default: 20)\n"
|
|
info: null
|
|
example:
|
|
- 20
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--gibbs_burnin"
|
|
description: "The number of burn-in rounds for RSEM's Gibbs sampler. Each round\
|
|
\ passes over the entire data set \nonce. If RSEM can use multiple threads,\
|
|
\ multiple Gibbs samplers will start at the same time and all \nsamplers share\
|
|
\ the same burn-in number. (Default: 200)\n"
|
|
info: null
|
|
example:
|
|
- 200
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--gibbs_number_of_samples"
|
|
description: "The total number of count vectors RSEM will collect from its Gibbs\
|
|
\ samplers. (Default: 1000)\n"
|
|
info: null
|
|
example:
|
|
- 1000
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--gibbs_sampling_gap"
|
|
description: "The number of rounds between two succinct count vectors RSEM collects.\
|
|
\ If the count vector after \nround N is collected, the count vector after round\
|
|
\ N + <int> will also be collected. (Default: 1)\n"
|
|
info: null
|
|
example:
|
|
- 1
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "double"
|
|
name: "--ci_credibility_level"
|
|
description: "The credibility level for credibility intervals. (Default: 0.95)\n"
|
|
info: null
|
|
example:
|
|
- 0.95
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "integer"
|
|
name: "--ci_number_of_samples_per_count_vector"
|
|
description: "The number of read generating probability vectors sampled per sampled\
|
|
\ count vector. The crebility \nintervals are calculated by first sampling P(C\
|
|
\ | D) and then sampling P(Theta | C) for each sampled \ncount vector. This\
|
|
\ option controls how many Theta vectors are sampled per sampled count vector.\
|
|
\ \n(Default: 50)\n"
|
|
info: null
|
|
example:
|
|
- 50
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--keep_intermediate_files"
|
|
description: "Keep temporary files generated by RSEM. RSEM creates a temporary\
|
|
\ directory, 'sample_name.temp', \ninto which it puts all intermediate output\
|
|
\ files. If this directory already exists, RSEM overwrites \nall files generated\
|
|
\ by previous RSEM runs inside of it. By default, after RSEM finishes, the \n\
|
|
temporary directory is deleted. Set this option to prevent the deletion of this\
|
|
\ directory and the \nintermediate files inside of it.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "string"
|
|
name: "--temporary_folder"
|
|
description: "Set where to put the temporary files generated by RSEM. If the folder\
|
|
\ specified does not exist, \nRSEM will try to create it. (Default: sample_name.temp)\n"
|
|
info: null
|
|
example:
|
|
- "sample_name.temp"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--time"
|
|
description: "Output time consumed by each step of RSEM to 'sample_name.time'.\n"
|
|
info: null
|
|
direction: "input"
|
|
- name: "Prior-Enhanced RSEM Options"
|
|
arguments:
|
|
- type: "boolean_true"
|
|
name: "--run_pRSEM"
|
|
description: "Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e. isoform's\
|
|
\ initial pseudo-count for \nRSEM's Gibbs sampling, will be learned from input\
|
|
\ RNA-seq data and an external data set. When pRSEM \nneeds and only needs ChIP-seq\
|
|
\ peak information to partition isoforms (e.g. in pRSEM's default \npartition\
|
|
\ model), either ChIP-seq peak file (with the '--chipseq_peak_file' option)\
|
|
\ or ChIP-seq \nFASTQ files for target and input and the path for Bowtie executables\
|
|
\ are required (with the \n'--chipseq_target_read_files <string>', '--chipseq_control_read_files\
|
|
\ <string>', and '--bowtie_path \n<path> options), otherwise, ChIP-seq FASTQ\
|
|
\ files for target and control and the path to Bowtie \nexecutables are required.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "file"
|
|
name: "--chipseq_peak_file"
|
|
description: "Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e. BED6+4,\
|
|
\ format. This file is used \nwhen running prior-enhanced RSEM in the default\
|
|
\ two-partition model. It partitions isoforms by \nwhether they have ChIP-seq\
|
|
\ overlapping with their transcription start site region or not. Each \npartition\
|
|
\ will have its own prior parameter learned from a training set. This file can\
|
|
\ be either \ngzipped or ungzipped.\n"
|
|
info: null
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--chipseq_target_read_files"
|
|
description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq target.\
|
|
\ This option is used when running \nprior-enhanced RSEM. It provides information\
|
|
\ to calculate ChIP-seq peaks and signals. The file(s) \ncan be either ungzipped\
|
|
\ or gzipped with a suffix '.gz' or '.gzip'. The options '--bowtie_path <path>'\
|
|
\ \nand '--chipseq_control_read_files <string>' must be defined when this option\
|
|
\ is specified.\n"
|
|
info: null
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--chipseq_control_read_files"
|
|
description: "Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol.\
|
|
\ This option is used when running \nprior-enhanced RSEM. It provides information\
|
|
\ to call ChIP-seq peaks. The file(s) can be either \nungzipped or gzipped with\
|
|
\ a suffix '.gz' or '.gzip'. The options '--bowtie_path <path>' and \n'--chipseq_target_read_files\
|
|
\ <string>' must be defined when this option is specified.\n"
|
|
info: null
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--chipseq_read_files_multi_targets"
|
|
description: "Comma-separated full path of FASTQ read files for multiple ChIP-seq\
|
|
\ targets. This option is used when \nrunning prior-enhanced RSEM, where prior\
|
|
\ is learned from multiple complementary data sets. It provides \ninformation\
|
|
\ to calculate ChIP-seq signals. All files can be either ungzipped or gzipped\
|
|
\ with a suffix \n'.gz' or '.gzip'. When this option is specified, the option\
|
|
\ '--bowtie_path <path>' must be defined and \nthe option '--partition_model\
|
|
\ <string>' will be set to 'cmb_lgt' automatically.\n"
|
|
info: null
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "file"
|
|
name: "--chipseq_bed_files_multi_targets"
|
|
description: "Comma-separated full path of BED files for multiple ChIP-seq targets.\
|
|
\ This option is used when running \nprior-enhanced RSEM, where prior is learned\
|
|
\ from multiple complementary data sets. It provides information \nof ChIP-seq\
|
|
\ signals and must have at least the first six BED columns. All files can be\
|
|
\ either ungzipped \nor gzipped with a suffix '.gz' or '.gzip'. When this option\
|
|
\ is specified, the option '--partition_model \n<string>' will be set to 'cmb_lgt'\
|
|
\ automatically.\n"
|
|
info: null
|
|
must_exist: true
|
|
create_parent: true
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "boolean_true"
|
|
name: "--cap_stacked_chipseq_reads"
|
|
description: "Keep a maximum number of ChIP-seq reads that aligned to the same\
|
|
\ genomic interval. This option is used \nwhen running prior-enhanced RSEM,\
|
|
\ where prior is learned from multiple complementary data sets. This \noption\
|
|
\ is only in use when either '--chipseq_read_files_multi_targets <string>' or\
|
|
\ \n'--chipseq_bed_files_multi_targets <string>' is specified.\n"
|
|
info: null
|
|
direction: "input"
|
|
- type: "integer"
|
|
name: "--n_max_stacked_chipseq_reads"
|
|
description: "The maximum number of stacked ChIP-seq reads to keep. This option\
|
|
\ is used when running prior-enhanced \nRSEM, where prior is learned from multiple\
|
|
\ complementary data sets. This option is only in use when the \noption '--cap_stacked_chipseq_reads'\
|
|
\ is set.\n"
|
|
info: null
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
- type: "string"
|
|
name: "--partition_model"
|
|
description: "A keyword to specify the partition model used by prior-enhanced\
|
|
\ RSEM. It must be one of the following \nkeywords:\n* pk\n* pk_lgtnopk\n* lm3,\
|
|
\ lm4, lm5, or lm6\n* nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk\n* pk_lm2nopk,\
|
|
\ pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk\n* cmb_lgt\nParameters for all the above\
|
|
\ models are learned from a training set. For detailed explanations, please\
|
|
\ \nsee prior-enhanced RSEM's paper. (Default: 'pk')\n"
|
|
info: null
|
|
example:
|
|
- "pk"
|
|
required: false
|
|
direction: "input"
|
|
multiple: false
|
|
multiple_sep: ";"
|
|
resources:
|
|
- type: "bash_script"
|
|
path: "script.sh"
|
|
is_executable: true
|
|
description: "Calculate expression with RSEM. \n"
|
|
test_resources:
|
|
- type: "bash_script"
|
|
path: "test.sh"
|
|
is_executable: true
|
|
info: null
|
|
status: "enabled"
|
|
requirements:
|
|
commands:
|
|
- "ps"
|
|
keywords:
|
|
- "Transcriptome"
|
|
- "Index"
|
|
- "Alignment"
|
|
- "RSEM"
|
|
license: "GPL-3.0"
|
|
references:
|
|
doi:
|
|
- "https://doi.org/10.1186/1471-2105-12-323"
|
|
links:
|
|
repository: "https://github.com/deweylab/RSEM"
|
|
homepage: "https://deweylab.github.io/RSEM/"
|
|
documentation: "https://deweylab.github.io/RSEM/rsem-calculate-expression.html"
|
|
runners:
|
|
- type: "executable"
|
|
id: "executable"
|
|
docker_setup_strategy: "ifneedbepullelsecachedbuild"
|
|
- type: "nextflow"
|
|
id: "nextflow"
|
|
directives:
|
|
tag: "$id"
|
|
auto:
|
|
simplifyInput: true
|
|
simplifyOutput: false
|
|
transcript: false
|
|
publish: false
|
|
config:
|
|
labels:
|
|
mem1gb: "memory = 1000000000.B"
|
|
mem2gb: "memory = 2000000000.B"
|
|
mem5gb: "memory = 5000000000.B"
|
|
mem10gb: "memory = 10000000000.B"
|
|
mem20gb: "memory = 20000000000.B"
|
|
mem50gb: "memory = 50000000000.B"
|
|
mem100gb: "memory = 100000000000.B"
|
|
mem200gb: "memory = 200000000000.B"
|
|
mem500gb: "memory = 500000000000.B"
|
|
mem1tb: "memory = 1000000000000.B"
|
|
mem2tb: "memory = 2000000000000.B"
|
|
mem5tb: "memory = 5000000000000.B"
|
|
mem10tb: "memory = 10000000000000.B"
|
|
mem20tb: "memory = 20000000000000.B"
|
|
mem50tb: "memory = 50000000000000.B"
|
|
mem100tb: "memory = 100000000000000.B"
|
|
mem200tb: "memory = 200000000000000.B"
|
|
mem500tb: "memory = 500000000000000.B"
|
|
mem1gib: "memory = 1073741824.B"
|
|
mem2gib: "memory = 2147483648.B"
|
|
mem4gib: "memory = 4294967296.B"
|
|
mem8gib: "memory = 8589934592.B"
|
|
mem16gib: "memory = 17179869184.B"
|
|
mem32gib: "memory = 34359738368.B"
|
|
mem64gib: "memory = 68719476736.B"
|
|
mem128gib: "memory = 137438953472.B"
|
|
mem256gib: "memory = 274877906944.B"
|
|
mem512gib: "memory = 549755813888.B"
|
|
mem1tib: "memory = 1099511627776.B"
|
|
mem2tib: "memory = 2199023255552.B"
|
|
mem4tib: "memory = 4398046511104.B"
|
|
mem8tib: "memory = 8796093022208.B"
|
|
mem16tib: "memory = 17592186044416.B"
|
|
mem32tib: "memory = 35184372088832.B"
|
|
mem64tib: "memory = 70368744177664.B"
|
|
mem128tib: "memory = 140737488355328.B"
|
|
mem256tib: "memory = 281474976710656.B"
|
|
mem512tib: "memory = 562949953421312.B"
|
|
cpu1: "cpus = 1"
|
|
cpu2: "cpus = 2"
|
|
cpu5: "cpus = 5"
|
|
cpu10: "cpus = 10"
|
|
cpu20: "cpus = 20"
|
|
cpu50: "cpus = 50"
|
|
cpu100: "cpus = 100"
|
|
cpu200: "cpus = 200"
|
|
cpu500: "cpus = 500"
|
|
cpu1000: "cpus = 1000"
|
|
debug: false
|
|
container: "docker"
|
|
engines:
|
|
- type: "docker"
|
|
id: "docker"
|
|
image: "ubuntu:22.04"
|
|
target_registry: "images.viash-hub.com"
|
|
target_tag: "main"
|
|
namespace_separator: "/"
|
|
setup:
|
|
- type: "apt"
|
|
packages:
|
|
- "build-essential"
|
|
- "gcc"
|
|
- "g++"
|
|
- "make"
|
|
- "wget"
|
|
- "zlib1g-dev"
|
|
- "unzip"
|
|
interactive: false
|
|
- type: "docker"
|
|
run:
|
|
- "apt-get update && \\\napt-get clean && \\\nwget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/2.7.11a.zip\
|
|
\ && \\\nunzip 2.7.11a.zip && \\\ncp STAR-2.7.11a/bin/Linux_x86_64_static/STAR\
|
|
\ /usr/local/bin && \\\ncd && \\\nwget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v1.3.3.zip\
|
|
\ && \\\nunzip v1.3.3.zip && \\\ncd RSEM-1.3.3 && \\\nmake && \\\nmake install\n"
|
|
env:
|
|
- "STAR_VERSION=2.7.11b"
|
|
- "RSEM_VERSION=1.3.3"
|
|
- type: "docker"
|
|
run:
|
|
- "echo \"RSEM: `rsem-calculate-expression --version | sed -e 's/Current version:\
|
|
\ RSEM v//g'`\" > /var/software_versions.txt && \\\necho \"STAR: `STAR --version`\"\
|
|
\ >> /var/software_versions.txt && \\\necho \"bowtie2: `bowtie2 --version |\
|
|
\ grep -oP '\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho\
|
|
\ \"bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \\K\\d+\\.\\\
|
|
d+\\.\\d+'`\" >> /var/software_versions.txt && \\\necho \"HISAT2: `hisat2 --version\
|
|
\ | grep -oP 'hisat2-align-s version \\K\\d+\\.\\d+\\.\\d+'`\" >> /var/software_versions.txt\n"
|
|
entrypoint: []
|
|
cmd: null
|
|
- type: "native"
|
|
id: "native"
|
|
build_info:
|
|
config: "src/rsem/rsem_calculate_expression/config.vsh.yaml"
|
|
runner: "executable"
|
|
engine: "docker|native"
|
|
output: "target/executable/rsem/rsem_calculate_expression"
|
|
executable: "target/executable/rsem/rsem_calculate_expression/rsem_calculate_expression"
|
|
viash_version: "0.9.0"
|
|
git_commit: "bc9cc0a6ce4e0b87a4ce47561b4812b449e101ca"
|
|
git_remote: "https://x-access-token:ghs_lSKHBbqnO6mkOoPjjavM2ghphcCkFP1NX9Uo@github.com/viash-hub/biobox"
|
|
git_tag: "v0.2.0-5-gbc9cc0a"
|
|
package_config:
|
|
name: "biobox"
|
|
version: "main"
|
|
description: "A collection of bioinformatics tools for working with sequence data.\n"
|
|
info: null
|
|
viash_version: "0.9.0"
|
|
source: "src"
|
|
target: "target"
|
|
config_mods:
|
|
- ".requirements.commands := ['ps']\n"
|
|
- ".engines += { type: \"native\" }"
|
|
- ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
|
|
- ".engines[.type == 'docker'].target_tag := 'main'"
|
|
keywords:
|
|
- "bioinformatics"
|
|
- "modules"
|
|
- "sequencing"
|
|
license: "MIT"
|
|
organization: "vsh"
|
|
links:
|
|
repository: "https://github.com/viash-hub/biobox"
|
|
issue_tracker: "https://github.com/viash-hub/biobox/issues"
|