biobox/target/executable/star/star_align_reads/.config.vsh.yaml

name: "star_align_reads"
namespace: "star"
version: "main"
authors:
- name: "Angela Oliveira Pisco"
  roles:
  - "author"
  info:
    role: "Contributor"
    links:
      github: "aopisco"
      orcid: "0000-0003-0142-2355"
      linkedin: "aopisco"
    organizations:
    - name: "Insitro"
      href: "https://insitro.com"
      role: "Director of Computational Biology"
    - name: "Open Problems"
      href: "https://openproblems.bio"
      role: "Core Member"
- name: "Robrecht Cannoodt"
  roles:
  - "author"
  - "maintainer"
  info:
    links:
      email: "robrecht@data-intuitive.com"
      github: "rcannood"
      orcid: "0000-0003-3641-729X"
      linkedin: "robrechtcannoodt"
    organizations:
    - name: "Data Intuitive"
      href: "https://www.data-intuitive.com"
      role: "Data Science Engineer"
    - name: "Open Problems"
      href: "https://openproblems.bio"
      role: "Core Member"
argument_groups:
- name: "Run Parameters"
  arguments:
  - type: "integer"
    name: "--run_rng_seed"
    description: "random number generator seed."
    info:
      orig_name: "--runRNGseed"
    example:
    - 777
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Genome Parameters"
  arguments:
  - type: "file"
    name: "--genome_dir"
    description: "path to the directory where genome files are stored (for --runMode\
      \ alignReads) or will be generated (for --runMode generateGenome)"
    info:
      orig_name: "--genomeDir"
    example:
    - "./GenomeDir"
    must_exist: true
    create_parent: true
    required: true
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--genome_load"
    description: "mode of shared memory usage for the genome files. Only used with\
      \ --runMode alignReads.\n\n- LoadAndKeep     ... load genome into shared and\
      \ keep it in memory after run\n- LoadAndRemove   ... load genome into shared\
      \ but remove it after run\n- LoadAndExit     ... load genome into shared memory\
      \ and exit, keeping the genome in memory for future runs\n- Remove         \
      \ ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory\
      \  ... do not use shared memory, each job will have its own private copy of\
      \ the genome"
    info:
      orig_name: "--genomeLoad"
    example:
    - "NoSharedMemory"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--genome_fasta_files"
    description: "path(s) to the fasta files with the genome sequences, separated\
      \ by spaces. These files should be plain text FASTA files, they *cannot* be\
      \ zipped.\n\nRequired for the genome generation (--runMode genomeGenerate).\
      \ Can also be used in the mapping (--runMode alignReads) to add extra (new)\
      \ sequences to the genome (e.g. spike-ins)."
    info:
      orig_name: "--genomeFastaFiles"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--genome_file_sizes"
    description: "genome files exact sizes in bytes. Typically, this should not be\
      \ defined by the user."
    info:
      orig_name: "--genomeFileSizes"
    example:
    - 0
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--genome_transform_output"
    description: "which output to transform back to original genome\n\n- SAM     ...\
      \ SAM/BAM alignments\n- SJ      ... splice junctions (SJ.out.tab)\n- Quant \
      \  ... quantifications (from --quant_mode option)\n- None    ... no transformation\
      \ of the output"
    info:
      orig_name: "--genomeTransformOutput"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--genome_chr_set_mitochondrial"
    description: "names of the mitochondrial chromosomes. Presently only used for\
      \ STARsolo statistics output/"
    info:
      orig_name: "--genomeChrSetMitochondrial"
    example:
    - "chrM"
    - "M"
    - "MT"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "Splice Junctions Database"
  arguments:
  - type: "string"
    name: "--sjdb_file_chr_start_end"
    description: "path to the files with genomic coordinates (chr <tab> start <tab>\
      \ end <tab> strand) for the splice junction introns. Multiple files can be supplied\
      \ and will be concatenated."
    info:
      orig_name: "--sjdbFileChrStartEnd"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "file"
    name: "--sjdb_gtf_file"
    description: "path to the GTF file with annotations"
    info:
      orig_name: "--sjdbGTFfile"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--sjdb_gtf_chr_prefix"
    description: "prefix for chromosome names in a GTF file (e.g. 'chr' for using\
      \ ENSMEBL annotations with UCSC genomes)"
    info:
      orig_name: "--sjdbGTFchrPrefix"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--sjdb_gtf_feature_exon"
    description: "feature type in GTF file to be used as exons for building transcripts"
    info:
      orig_name: "--sjdbGTFfeatureExon"
    example:
    - "exon"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--sjdb_gtf_tag_exon_parent_transcript"
    description: "GTF attribute name for parent transcript ID (default \"transcript_id\"\
      \ works for GTF files)"
    info:
      orig_name: "--sjdbGTFtagExonParentTranscript"
    example:
    - "transcript_id"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--sjdb_gtf_tag_exon_parent_gene"
    description: "GTF attribute name for parent gene ID (default \"gene_id\" works\
      \ for GTF files)"
    info:
      orig_name: "--sjdbGTFtagExonParentGene"
    example:
    - "gene_id"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--sjdb_gtf_tag_exon_parent_gene_name"
    description: "GTF attribute name for parent gene name"
    info:
      orig_name: "--sjdbGTFtagExonParentGeneName"
    example:
    - "gene_name"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--sjdb_gtf_tag_exon_parent_gene_type"
    description: "GTF attribute name for parent gene type"
    info:
      orig_name: "--sjdbGTFtagExonParentGeneType"
    example:
    - "gene_type"
    - "gene_biotype"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--sjdb_overhang"
    description: "length of the donor/acceptor sequence on each side of the junctions,\
      \ ideally = (mate_length - 1)"
    info:
      orig_name: "--sjdbOverhang"
    example:
    - 100
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--sjdb_score"
    description: "extra alignment score for alignments that cross database junctions"
    info:
      orig_name: "--sjdbScore"
    example:
    - 2
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--sjdb_insert_save"
    description: "which files to save when sjdb junctions are inserted on the fly\
      \ at the mapping step\n\n- Basic ... only small junction / transcript files\n\
      - All   ... all files including big Genome, SA and SAindex - this will create\
      \ a complete genome directory"
    info:
      orig_name: "--sjdbInsertSave"
    example:
    - "Basic"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Variation parameters"
  arguments:
  - type: "string"
    name: "--var_vcf_file"
    description: "path to the VCF file that contains variation data. The 10th column\
      \ should contain the genotype information, e.g. 0/1"
    info:
      orig_name: "--varVCFfile"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Read Parameters"
  arguments:
  - type: "string"
    name: "--read_files_type"
    description: "format of input read files\n\n- Fastx       ... FASTA or FASTQ\n\
      - SAM SE      ... SAM or BAM single-end reads; for BAM use --read_files_command\
      \ samtools view\n- SAM PE      ... SAM or BAM paired-end reads; for BAM use\
      \ --read_files_command samtools view"
    info:
      orig_name: "--readFilesType"
    example:
    - "Fastx"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--read_files_sam_attr_keep"
    description: "for --read_files_type SAM SE/PE, which SAM tags to keep in the output\
      \ BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All     ... keep all tags\n-\
      \ None    ... do not keep any tags"
    info:
      orig_name: "--readFilesSAMattrKeep"
    example:
    - "All"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "file"
    name: "--read_files_manifest"
    description: "path to the \"manifest\" file with the names of read files. The\
      \ manifest file should contain 3 tab-separated columns:\n\npaired-end reads:\
      \ read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads:\
      \ read1_file_name $tab$ -               $tab$ read_group_line.\nSpaces, but\
      \ not tabs are allowed in file names.\nIf read_group_line does not start with\
      \ ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line\
      \ starts with ID:, it can contain several fields separated by $tab$, and all\
      \ fields will be be copied verbatim into SAM @RG header line."
    info:
      orig_name: "--readFilesManifest"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--read_files_prefix"
    description: "prefix for the read files names, i.e. it will be added in front\
      \ of the strings in --readFilesIn"
    info:
      orig_name: "--readFilesPrefix"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--read_files_command"
    description: "command line to execute for each of the input file. This command\
      \ should generate FASTA or FASTQ text and send it to stdout\n\nFor example:\
      \ zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc."
    info:
      orig_name: "--readFilesCommand"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--read_map_number"
    description: "number of reads to map from the beginning of the file\n\n-1: map\
      \ all reads"
    info:
      orig_name: "--readMapNumber"
    example:
    - -1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--read_mates_lengths_in"
    description: "Equal/NotEqual - lengths of names,sequences,qualities for both mates\
      \ are the same  / not the same. NotEqual is safe in all situations."
    info:
      orig_name: "--readMatesLengthsIn"
    example:
    - "NotEqual"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--read_name_separator"
    description: "character(s) separating the part of the read names that will be\
      \ trimmed in output (read name after space is always trimmed)"
    info:
      orig_name: "--readNameSeparator"
    example:
    - "/"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--read_quality_score_base"
    description: "number to be subtracted from the ASCII code to get Phred quality\
      \ score"
    info:
      orig_name: "--readQualityScoreBase"
    example:
    - 33
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Read Clipping"
  arguments:
  - type: "string"
    name: "--clip_adapter_type"
    description: "adapter clipping type\n\n- Hamming ... adapter clipping based on\
      \ Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n\
      - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes\
      \ Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ...\
      \ no adapter clipping, all other clip* parameters are disregarded"
    info:
      orig_name: "--clipAdapterType"
    example:
    - "Hamming"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--clip3p_nbases"
    description: "number(s) of bases to clip from 3p of each mate. If one value is\
      \ given, it will be assumed the same for both mates."
    info:
      orig_name: "--clip3pNbases"
    example:
    - 0
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--clip3p_adapter_seq"
    description: "adapter sequences to clip from 3p of each mate.  If one value is\
      \ given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence\
      \ with the length equal to read length"
    info:
      orig_name: "--clip3pAdapterSeq"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "double"
    name: "--clip3p_adapter_mm_p"
    description: "max proportion of mismatches for 3p adapter clipping for each mate.\
      \  If one value is given, it will be assumed the same for both mates."
    info:
      orig_name: "--clip3pAdapterMMp"
    example:
    - 0.1
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--clip3p_after_adapter_nbases"
    description: "number of bases to clip from 3p of each mate after the adapter clipping.\
      \ If one value is given, it will be assumed the same for both mates."
    info:
      orig_name: "--clip3pAfterAdapterNbases"
    example:
    - 0
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--clip5p_nbases"
    description: "number(s) of bases to clip from 5p of each mate. If one value is\
      \ given, it will be assumed the same for both mates."
    info:
      orig_name: "--clip5pNbases"
    example:
    - 0
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "Limits"
  arguments:
  - type: "long"
    name: "--limit_genome_generate_ram"
    description: "maximum available RAM (bytes) for genome generation"
    info:
      orig_name: "--limitGenomeGenerateRAM"
    example:
    - 31000000000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "long"
    name: "--limit_io_buffer_size"
    description: "max available buffers size (bytes) for input/output, per thread"
    info:
      orig_name: "--limitIObufferSize"
    example:
    - 30000000
    - 50000000
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "long"
    name: "--limit_out_sam_one_read_bytes"
    description: "max size of the SAM record (bytes) for one read. Recommended value:\
      \ >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax"
    info:
      orig_name: "--limitOutSAMoneReadBytes"
    example:
    - 100000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--limit_out_sj_one_read"
    description: "max number of junctions for one read (including all multi-mappers)"
    info:
      orig_name: "--limitOutSJoneRead"
    example:
    - 1000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--limit_out_sj_collapsed"
    description: "max number of collapsed junctions"
    info:
      orig_name: "--limitOutSJcollapsed"
    example:
    - 1000000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "long"
    name: "--limit_bam_sort_ram"
    description: "maximum available RAM (bytes) for sorting BAM. If =0, it will be\
      \ set to the genome index size. 0 value can only be used with --genome_load\
      \ NoSharedMemory option."
    info:
      orig_name: "--limitBAMsortRAM"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--limit_sjdb_insert_nsj"
    description: "maximum number of junctions to be inserted to the genome on the\
      \ fly at the mapping stage, including those from annotations and those detected\
      \ in the 1st step of the 2-pass run"
    info:
      orig_name: "--limitSjdbInsertNsj"
    example:
    - 1000000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--limit_nreads_soft"
    description: "soft limit on the number of reads"
    info:
      orig_name: "--limitNreadsSoft"
    example:
    - -1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Output: general"
  arguments:
  - type: "string"
    name: "--out_tmp_keep"
    description: "whether to keep the temporary files after STAR runs is finished\n\
      \n- None ... remove all temporary files\n- All ... keep all files"
    info:
      orig_name: "--outTmpKeep"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_std"
    description: "which output will be directed to stdout (standard out)\n\n- Log\
      \                    ... log messages\n- SAM                    ... alignments\
      \ in SAM format (which normally are output to Aligned.out.sam file), normal\
      \ standard output will go into Log.std.out\n- BAM_Unsorted           ... alignments\
      \ in BAM format, unsorted. Requires --out_sam_type BAM Unsorted\n- BAM_SortedByCoordinate\
      \ ... alignments in BAM format, sorted by coordinate. Requires --out_sam_type\
      \ BAM SortedByCoordinate\n- BAM_Quant              ... alignments to transcriptome\
      \ in BAM format, unsorted. Requires --quant_mode TranscriptomeSAM"
    info:
      orig_name: "--outStd"
    example:
    - "Log"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_reads_unmapped"
    description: "output of unmapped and partially mapped (i.e. mapped only one mate\
      \ of a paired end read) reads in separate file(s).\n\n- None    ... no output\n\
      - Fastx   ... output in separate fasta/fastq files, Unmapped.out.mate1/2"
    info:
      orig_name: "--outReadsUnmapped"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_qs_conversion_add"
    description: "add this number to the quality score (e.g. to convert from Illumina\
      \ to Sanger, use -31)"
    info:
      orig_name: "--outQSconversionAdd"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_multimapper_order"
    description: "order of multimapping alignments in the output files\n\n- Old_2.4\
      \             ... quasi-random order used before 2.5.0\n- Random           \
      \   ... random order of alignments for each multi-mapper. Read mates (pairs)\
      \ are always adjacent, all alignment for each read stay together. This option\
      \ will become default in the future releases."
    info:
      orig_name: "--outMultimapperOrder"
    example:
    - "Old_2.4"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Output: SAM and BAM"
  arguments:
  - type: "string"
    name: "--out_sam_type"
    description: "type of SAM/BAM output\n\n1st word:\n- BAM  ... output BAM without\
      \ sorting\n- SAM  ... output SAM without sorting\n- None ... no SAM/BAM output\n\
      2nd, 3rd:\n- Unsorted           ... standard unsorted\n- SortedByCoordinate\
      \ ... sorted by coordinate. This option will allocate extra memory for sorting\
      \ which can be specified by --limit_bam_sort_ram."
    info:
      orig_name: "--outSAMtype"
    example:
    - "SAM"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_mode"
    description: "mode of SAM output\n\n- None ... no SAM output\n- Full ... full\
      \ SAM output\n- NoQS ... full SAM but without quality scores"
    info:
      orig_name: "--outSAMmode"
    example:
    - "Full"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_strand_field"
    description: "Cufflinks-like strand field flag\n\n- None        ... not used\n\
      - intronMotif ... strand derived from the intron motif. This option changes\
      \ the output alignments: reads with inconsistent and/or non-canonical introns\
      \ are filtered out."
    info:
      orig_name: "--outSAMstrandField"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_attributes"
    description: "a string of desired SAM attributes, in the order desired for the\
      \ output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n\
      - None        ... no attributes\n- Standard    ... NH HI AS nM\n- All      \
      \   ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH          ... number\
      \ of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard\
      \ SAM tag.\n- HI          ... multiple alignment index, starts with --out_sam_attr_ih_start\
      \ (=1 by default). Standard SAM tag.\n- AS          ... local alignment score,\
      \ +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE\
      \ reads, total score for two mates. Stadnard SAM tag.\n- nM          ... number\
      \ of mismatches. For PE reads, sum over two mates.\n- NM          ... edit distance\
      \ to the reference (number of mismatched + inserted + deleted bases) for each\
      \ mate. Standard SAM tag.\n- MD          ... string encoding mismatched and\
      \ deleted reference bases (see standard SAM specifications). Standard SAM tag.\n\
      - jM          ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical;\
      \ 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions\
      \ database is used, and a junction is annotated, 20 is added to its motif value.\n\
      - jI          ... start and end of introns for all junctions (1-based).\n- XS\
      \          ... alignment strand according to --out_sam_strand_field.\n- MC \
      \         ... mate's CIGAR string. Standard SAM tag.\n- ch          ... marks\
      \ all segment of all chimeric alingments for --chim_out_type WithinBAM output.\n\
      - cN          ... number of bases clipped from the read ends: 5' and 3'\n***Variation:\n\
      - vA          ... variant allele\n- vG          ... genomic coordinate of the\
      \ variant overlapped by the read.\n- vW          ... 1 - alignment passes WASP\
      \ filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires\
      \ --wasp_output_mode SAMtag.\n- ha          ... haplotype (1/2) when mapping\
      \ to the diploid genome. Requires genome generated with --genomeTransformType\
      \ Diploid .\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of\
      \ cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN       ... gene\
      \ ID and gene name for unique-gene reads.\n- gx gn       ... gene IDs and gene\
      \ names for unique- and multi-gene reads.\n- CB UB       ... error-corrected\
      \ cell barcodes and UMIs for solo* demultiplexing. Requires --out_sam_type BAM\
      \ SortedByCoordinate.\n- sM          ... assessment of CB and UMI.\n- sS   \
      \       ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ         \
      \ ... quality of the entire barcode.\n- sF          ... type of feature overlap\
      \ and number of features for each alignment\n***Unsupported/undocumented:\n\
      - rB          ... alignment block read/genomic coordinates.\n- vR          ...\
      \ read coordinate of the variant."
    info:
      orig_name: "--outSAMattributes"
    example:
    - "Standard"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sam_attr_ih_start"
    description: "start value for the IH attribute. 0 may be required by some downstream\
      \ software, such as Cufflinks or StringTie."
    info:
      orig_name: "--outSAMattrIHstart"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_unmapped"
    description: "output of unmapped reads in the SAM format\n\n1st word:\n- None\
      \   ... no output\n- Within ... output unmapped reads within the main SAM file\
      \ (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for\
      \ each alignment, and, in case of unsorted output, keep it adjacent to its mapped\
      \ mate. Only affects multi-mapping reads."
    info:
      orig_name: "--outSAMunmapped"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_order"
    description: "type of sorting for the SAM output\n\nPaired: one mate after the\
      \ other for all paired alignments\nPairedKeepInputOrder: one mate after the\
      \ other for all paired alignments, the order is kept the same as in the input\
      \ FASTQ files"
    info:
      orig_name: "--outSAMorder"
    example:
    - "Paired"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_primary_flag"
    description: "which alignments are considered primary - all others will be marked\
      \ with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the\
      \ best score is primary\n- AllBestScore ... all alignments with the best score\
      \ are primary"
    info:
      orig_name: "--outSAMprimaryFlag"
    example:
    - "OneBestScore"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_read_id"
    description: "read ID record type\n\n- Standard ... first word (until space) from\
      \ the FASTx read ID line, removing /1,/2 from the end\n- Number   ... read number\
      \ (index) in the FASTx file"
    info:
      orig_name: "--outSAMreadID"
    example:
    - "Standard"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sam_mapq_unique"
    description: "0 to 255: the MAPQ value for unique mappers"
    info:
      orig_name: "--outSAMmapqUnique"
    example:
    - 255
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sam_flag_or"
    description: "0 to 65535: sam FLAG will be bitwise OR'd with this value, i.e.\
      \ FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by\
      \ STAR, and after outSAMflagAND. Can be used to set specific bits that are not\
      \ set otherwise."
    info:
      orig_name: "--outSAMflagOR"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sam_flag_and"
    description: "0 to 65535: sam FLAG will be bitwise AND'd with this value, i.e.\
      \ FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by\
      \ STAR, but before outSAMflagOR. Can be used to unset specific bits that are\
      \ not set otherwise."
    info:
      orig_name: "--outSAMflagAND"
    example:
    - 65535
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_attr_rg_line"
    description: "SAM/BAM read group line. The first word contains the read group\
      \ identifier and must start with \"ID:\", e.g. --out_sam_attr_rg_line ID:xxx\
      \ CN:yy \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment.\
      \ Any spaces in the tag values have to be double quoted.\nComma separated RG\
      \ lines correspons to different (comma separated) input files in --readFilesIn.\
      \ Commas have to be surrounded by spaces, e.g.\n--out_sam_attr_rg_line ID:xxx\
      \ , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy"
    info:
      orig_name: "--outSAMattrRGline"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_header_hd"
    description: "@HD (header) line of the SAM header"
    info:
      orig_name: "--outSAMheaderHD"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_header_pg"
    description: "extra @PG (software) line of the SAM header (in addition to STAR)"
    info:
      orig_name: "--outSAMheaderPG"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_header_comment_file"
    description: "path to the file with @CO (comment) lines of the SAM header"
    info:
      orig_name: "--outSAMheaderCommentFile"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_sam_filter"
    description: "filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences\
      \ ... only keep the reads for which all alignments are to the extra reference\
      \ sequences added with --genome_fasta_files at the mapping stage.\n- KeepAllAddedReferences\
      \ ...  keep all alignments to the extra reference sequences added with --genome_fasta_files\
      \ at the mapping stage."
    info:
      orig_name: "--outSAMfilter"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sam_mult_nmax"
    description: "max number of multiple alignments for a read that will be output\
      \ to the SAM/BAM files. Note that if this value is not equal to -1, the top\
      \ scoring alignment will be output first\n\n- -1 ... all alignments (up to --out_filter_multimap_nmax)\
      \ will be output"
    info:
      orig_name: "--outSAMmultNmax"
    example:
    - -1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sam_tlen"
    description: "calculation method for the TLEN field in the SAM/BAM files\n\n-\
      \ 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate.\
      \ (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost\
      \ base of any mate. (+)sign for the mate with the leftmost base. This is different\
      \ from 1 for overlapping mates with protruding ends"
    info:
      orig_name: "--outSAMtlen"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_bam_compression"
    description: "-1 to 10  BAM compression level, -1=default compression (6?), 0=no\
      \ compression, 10=maximum compression"
    info:
      orig_name: "--outBAMcompression"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_bam_sorting_thread_n"
    description: ">=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)."
    info:
      orig_name: "--outBAMsortingThreadN"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_bam_sorting_bins_n"
    description: ">0:  number of genome bins for coordinate-sorting"
    info:
      orig_name: "--outBAMsortingBinsN"
    example:
    - 50
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "BAM processing"
  arguments:
  - type: "string"
    name: "--bam_remove_duplicates_type"
    description: "mark duplicates in the BAM file, for now only works with (i) sorted\
      \ BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- -\
      \                       ... no duplicate removal/marking\n- UniqueIdentical\
      \         ... mark all multimappers, and duplicate unique mappers. The coordinates,\
      \ FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti  ... mark duplicate\
      \ unique mappers but not multimappers."
    info:
      orig_name: "--bamRemoveDuplicatesType"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--bam_remove_duplicates_mate2bases_n"
    description: "number of bases from the 5' of mate 2 to use in collapsing (e.g.\
      \ for RAMPAGE)"
    info:
      orig_name: "--bamRemoveDuplicatesMate2basesN"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Output Wiggle"
  arguments:
  - type: "string"
    name: "--out_wig_type"
    description: "type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\"\
      . Requires sorted BAM: --out_sam_type BAM SortedByCoordinate .\n\n1st word:\n\
      - None       ... no signal output\n- bedGraph   ... bedGraph format\n- wiggle\
      \     ... wiggle format\n2nd word:\n- read1_5p   ... signal from only 5' of\
      \ the 1st read, useful for CAGE/RAMPAGE etc\n- read2      ... signal from only\
      \ 2nd read"
    info:
      orig_name: "--outWigType"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--out_wig_strand"
    description: "strandedness of wiggle/bedGraph output\n\n- Stranded   ...  separate\
      \ strands, str1 and str2\n- Unstranded ...  collapsed strands"
    info:
      orig_name: "--outWigStrand"
    example:
    - "Stranded"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_wig_references_prefix"
    description: "prefix matching reference names to include in the output wiggle\
      \ file, e.g. \"chr\", default \"-\" - include all references"
    info:
      orig_name: "--outWigReferencesPrefix"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_wig_norm"
    description: "type of normalization for the signal\n\n- RPM    ... reads per million\
      \ of mapped reads\n- None   ... no normalization, \"raw\" counts"
    info:
      orig_name: "--outWigNorm"
    example:
    - "RPM"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Output Filtering"
  arguments:
  - type: "string"
    name: "--out_filter_type"
    description: "type of filtering\n\n- Normal  ... standard filtering using only\
      \ current alignment\n- BySJout ... keep only those reads that contain junctions\
      \ that passed filtering into SJ.out.tab"
    info:
      orig_name: "--outFilterType"
    example:
    - "Normal"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_filter_multimap_score_range"
    description: "the score range below the maximum score for multimapping alignments"
    info:
      orig_name: "--outFilterMultimapScoreRange"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_filter_multimap_nmax"
    description: "maximum number of loci the read is allowed to map to. Alignments\
      \ (all of them) will be output only if the read maps to no more loci than this\
      \ value.\n\nOtherwise no alignments will be output, and the read will be counted\
      \ as \"mapped to too many loci\" in the Log.final.out ."
    info:
      orig_name: "--outFilterMultimapNmax"
    example:
    - 10
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_filter_mismatch_nmax"
    description: "alignment will be output only if it has no more mismatches than\
      \ this value."
    info:
      orig_name: "--outFilterMismatchNmax"
    example:
    - 10
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--out_filter_mismatch_nover_lmax"
    description: "alignment will be output only if its ratio of mismatches to *mapped*\
      \ length is less than or equal to this value."
    info:
      orig_name: "--outFilterMismatchNoverLmax"
    example:
    - 0.3
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--out_filter_mismatch_nover_read_lmax"
    description: "alignment will be output only if its ratio of mismatches to *read*\
      \ length is less than or equal to this value."
    info:
      orig_name: "--outFilterMismatchNoverReadLmax"
    example:
    - 1.0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_filter_score_min"
    description: "alignment will be output only if its score is higher than or equal\
      \ to this value."
    info:
      orig_name: "--outFilterScoreMin"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--out_filter_score_min_over_lread"
    description: "same as outFilterScoreMin, but normalized to read length (sum of\
      \ mates' lengths for paired-end reads)"
    info:
      orig_name: "--outFilterScoreMinOverLread"
    example:
    - 0.66
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_filter_match_nmin"
    description: "alignment will be output only if the number of matched bases is\
      \ higher than or equal to this value."
    info:
      orig_name: "--outFilterMatchNmin"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--out_filter_match_nmin_over_lread"
    description: "sam as outFilterMatchNmin, but normalized to the read length (sum\
      \ of mates' lengths for paired-end reads)."
    info:
      orig_name: "--outFilterMatchNminOverLread"
    example:
    - 0.66
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_filter_intron_motifs"
    description: "filter alignment using their motifs\n\n- None                  \
      \         ... no filtering\n- RemoveNoncanonical             ... filter out\
      \ alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated\
      \  ... filter out alignments that contain non-canonical unannotated junctions\
      \ when using annotated splice junctions database. The annotated non-canonical\
      \ junctions will be kept."
    info:
      orig_name: "--outFilterIntronMotifs"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--out_filter_intron_strands"
    description: "filter alignments\n\n- RemoveInconsistentStrands      ... remove\
      \ alignments that have junctions with inconsistent strands\n- None         \
      \                  ... no filtering"
    info:
      orig_name: "--outFilterIntronStrands"
    example:
    - "RemoveInconsistentStrands"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Output splice junctions (SJ.out.tab)"
  arguments:
  - type: "string"
    name: "--out_sj_type"
    description: "type of splice junction output\n\n- Standard    ... standard SJ.out.tab\
      \ output\n- None        ... no splice junction output"
    info:
      orig_name: "--outSJtype"
    example:
    - "Standard"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Output Filtering: Splice Junctions"
  arguments:
  - type: "string"
    name: "--out_sj_filter_reads"
    description: "which reads to consider for collapsed splice junctions output\n\n\
      - All     ... all reads, unique- and multi-mappers\n- Unique  ... uniquely mapping\
      \ reads only"
    info:
      orig_name: "--outSJfilterReads"
    example:
    - "All"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sj_filter_overhang_min"
    description: "minimum overhang length for splice junctions on both sides for:\
      \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif,\
      \ (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply\
      \ to annotated junctions"
    info:
      orig_name: "--outSJfilterOverhangMin"
    example:
    - 30
    - 12
    - 12
    - 12
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sj_filter_count_unique_min"
    description: "minimum uniquely mapping read count per junction for: (1) non-canonical\
      \ motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and\
      \ GT/AT motif. -1 means no output for that motif\n\nJunctions are output if\
      \ one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are\
      \ satisfied\ndoes not apply to annotated junctions"
    info:
      orig_name: "--outSJfilterCountUniqueMin"
    example:
    - 3
    - 1
    - 1
    - 1
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sj_filter_count_total_min"
    description: "minimum total (multi-mapping+unique) read count per junction for:\
      \ (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif,\
      \ (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions\
      \ are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin\
      \ conditions are satisfied\ndoes not apply to annotated junctions"
    info:
      orig_name: "--outSJfilterCountTotalMin"
    example:
    - 3
    - 1
    - 1
    - 1
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sj_filter_dist_to_other_sj_min"
    description: "minimum allowed distance to other junctions' donor/acceptor\n\n\
      does not apply to annotated junctions"
    info:
      orig_name: "--outSJfilterDistToOtherSJmin"
    example:
    - 10
    - 0
    - 5
    - 10
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--out_sj_filter_intron_max_vs_read_n"
    description: "maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\
      \ni.e. by default junctions supported by 1 read can have gaps <=50000b, by 2\
      \ reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax\n\
      does not apply to annotated junctions"
    info:
      orig_name: "--outSJfilterIntronMaxVsReadN"
    example:
    - 50000
    - 100000
    - 200000
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "Scoring"
  arguments:
  - type: "integer"
    name: "--score_gap"
    description: "splice junction penalty (independent on intron motif)"
    info:
      orig_name: "--scoreGap"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_gap_noncan"
    description: "non-canonical junction penalty (in addition to scoreGap)"
    info:
      orig_name: "--scoreGapNoncan"
    example:
    - -8
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_gap_gcag"
    description: "GC/AG and CT/GC junction penalty (in addition to scoreGap)"
    info:
      orig_name: "--scoreGapGCAG"
    example:
    - -4
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_gap_atac"
    description: "AT/AC  and GT/AT junction penalty  (in addition to scoreGap)"
    info:
      orig_name: "--scoreGapATAC"
    example:
    - -8
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_genomic_length_log2scale"
    description: "extra score logarithmically scaled with genomic length of the alignment:\
      \ scoreGenomicLengthLog2scale*log2(genomicLength)"
    info:
      orig_name: "--scoreGenomicLengthLog2scale"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_del_open"
    description: "deletion open penalty"
    info:
      orig_name: "--scoreDelOpen"
    example:
    - -2
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_del_base"
    description: "deletion extension penalty per base (in addition to scoreDelOpen)"
    info:
      orig_name: "--scoreDelBase"
    example:
    - -2
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_ins_open"
    description: "insertion open penalty"
    info:
      orig_name: "--scoreInsOpen"
    example:
    - -2
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_ins_base"
    description: "insertion extension penalty per base (in addition to scoreInsOpen)"
    info:
      orig_name: "--scoreInsBase"
    example:
    - -2
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--score_stitch_sj_shift"
    description: "maximum score reduction while searching for SJ boundaries in the\
      \ stitching step"
    info:
      orig_name: "--scoreStitchSJshift"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Alignments and Seeding"
  arguments:
  - type: "integer"
    name: "--seed_search_start_lmax"
    description: "defines the search start point through the read - the read is split\
      \ into pieces no longer than this value"
    info:
      orig_name: "--seedSearchStartLmax"
    example:
    - 50
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--seed_search_start_lmax_over_lread"
    description: "seedSearchStartLmax normalized to read length (sum of mates' lengths\
      \ for paired-end reads)"
    info:
      orig_name: "--seedSearchStartLmaxOverLread"
    example:
    - 1.0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--seed_search_lmax"
    description: "defines the maximum length of the seeds, if =0 seed length is not\
      \ limited"
    info:
      orig_name: "--seedSearchLmax"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--seed_multimap_nmax"
    description: "only pieces that map fewer than this value are utilized in the stitching\
      \ procedure"
    info:
      orig_name: "--seedMultimapNmax"
    example:
    - 10000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--seed_per_read_nmax"
    description: "max number of seeds per read"
    info:
      orig_name: "--seedPerReadNmax"
    example:
    - 1000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--seed_per_window_nmax"
    description: "max number of seeds per window"
    info:
      orig_name: "--seedPerWindowNmax"
    example:
    - 50
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--seed_none_loci_per_window"
    description: "max number of one seed loci per window"
    info:
      orig_name: "--seedNoneLociPerWindow"
    example:
    - 10
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--seed_split_min"
    description: "min length of the seed sequences split by Ns or mate gap"
    info:
      orig_name: "--seedSplitMin"
    example:
    - 12
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--seed_map_min"
    description: "min length of seeds to be mapped"
    info:
      orig_name: "--seedMapMin"
    example:
    - 5
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_intron_min"
    description: "minimum intron size, genomic gap is considered intron if its length>=alignIntronMin,\
      \ otherwise it is considered Deletion"
    info:
      orig_name: "--alignIntronMin"
    example:
    - 21
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_intron_max"
    description: "maximum intron size, if 0, max intron size will be determined by\
      \ (2^winBinNbits)*winAnchorDistNbins"
    info:
      orig_name: "--alignIntronMax"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_mates_gap_max"
    description: "maximum gap between two mates, if 0, max intron gap will be determined\
      \ by (2^winBinNbits)*winAnchorDistNbins"
    info:
      orig_name: "--alignMatesGapMax"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_sj_overhang_min"
    description: "minimum overhang (i.e. block size) for spliced alignments"
    info:
      orig_name: "--alignSJoverhangMin"
    example:
    - 5
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_sj_stitch_mismatch_nmax"
    description: "maximum number of mismatches for stitching of the splice junctions\
      \ (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3)\
      \ GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif."
    info:
      orig_name: "--alignSJstitchMismatchNmax"
    example:
    - 0
    - -1
    - 0
    - 0
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--align_sjdb_overhang_min"
    description: "minimum overhang (i.e. block size) for annotated (sjdb) spliced\
      \ alignments"
    info:
      orig_name: "--alignSJDBoverhangMin"
    example:
    - 3
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_spliced_mate_map_lmin"
    description: "minimum mapped length for a read mate that is spliced"
    info:
      orig_name: "--alignSplicedMateMapLmin"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--align_spliced_mate_map_lmin_over_lmate"
    description: "alignSplicedMateMapLmin normalized to mate length"
    info:
      orig_name: "--alignSplicedMateMapLminOverLmate"
    example:
    - 0.66
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_windows_per_read_nmax"
    description: "max number of windows per read"
    info:
      orig_name: "--alignWindowsPerReadNmax"
    example:
    - 10000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_transcripts_per_window_nmax"
    description: "max number of transcripts per window"
    info:
      orig_name: "--alignTranscriptsPerWindowNmax"
    example:
    - 100
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--align_transcripts_per_read_nmax"
    description: "max number of different alignments per read to consider"
    info:
      orig_name: "--alignTranscriptsPerReadNmax"
    example:
    - 10000
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--align_ends_type"
    description: "type of read ends alignment\n\n- Local             ... standard\
      \ local alignment with soft-clipping allowed\n- EndToEnd          ... force\
      \ end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1   ... fully\
      \ extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12\
      \ ... fully extend only the 5p of the both read1 and read2, all other ends:\
      \ local alignment"
    info:
      orig_name: "--alignEndsType"
    example:
    - "Local"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--align_ends_protrude"
    description: "allow protrusion of alignment ends, i.e. start (end) of the +strand\
      \ mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum\
      \ number of protrusion bases allowed\n2nd word: string:\n-                 \
      \    ConcordantPair ... report alignments with non-zero protrusion as concordant\
      \ pairs\n-                     DiscordantPair ... report alignments with non-zero\
      \ protrusion as discordant pairs"
    info:
      orig_name: "--alignEndsProtrude"
    example:
    - "0    ConcordantPair"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--align_soft_clip_at_reference_ends"
    description: "allow the soft-clipping of the alignments past the end of the chromosomes\n\
      \n- Yes ... allow\n- No  ... prohibit, useful for compatibility with Cufflinks"
    info:
      orig_name: "--alignSoftClipAtReferenceEnds"
    example:
    - "Yes"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--align_insertion_flush"
    description: "how to flush ambiguous insertion positions\n\n- None    ... insertions\
      \ are not flushed\n- Right   ... insertions are flushed to the right"
    info:
      orig_name: "--alignInsertionFlush"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Paired-End reads"
  arguments:
  - type: "integer"
    name: "--pe_overlap_nbases_min"
    description: "minimum number of overlapping bases to trigger mates merging and\
      \ realignment. Specify >0 value to switch on the \"merginf of overlapping mates\"\
      \ algorithm."
    info:
      orig_name: "--peOverlapNbasesMin"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--pe_overlap_mm_p"
    description: "maximum proportion of mismatched bases in the overlap area"
    info:
      orig_name: "--peOverlapMMp"
    example:
    - 0.01
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Windows, Anchors, Binning"
  arguments:
  - type: "integer"
    name: "--win_anchor_multimap_nmax"
    description: "max number of loci anchors are allowed to map to"
    info:
      orig_name: "--winAnchorMultimapNmax"
    example:
    - 50
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--win_bin_nbits"
    description: "=log2(winBin), where winBin is the size of the bin for the windows/clustering,\
      \ each window will occupy an integer number of bins."
    info:
      orig_name: "--winBinNbits"
    example:
    - 16
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--win_anchor_dist_nbins"
    description: "max number of bins between two anchors that allows aggregation of\
      \ anchors into one window"
    info:
      orig_name: "--winAnchorDistNbins"
    example:
    - 9
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--win_flank_nbins"
    description: "log2(winFlank), where win Flank is the size of the left and right\
      \ flanking regions for each window"
    info:
      orig_name: "--winFlankNbins"
    example:
    - 4
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "double"
    name: "--win_read_coverage_relative_min"
    description: "minimum relative coverage of the read sequence by the seeds in a\
      \ window, for STARlong algorithm only."
    info:
      orig_name: "--winReadCoverageRelativeMin"
    example:
    - 0.5
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--win_read_coverage_bases_min"
    description: "minimum number of bases covered by the seeds in a window , for STARlong\
      \ algorithm only."
    info:
      orig_name: "--winReadCoverageBasesMin"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Chimeric Alignments"
  arguments:
  - type: "string"
    name: "--chim_out_type"
    description: "type of chimeric output\n\n- Junctions       ... Chimeric.out.junction\n\
      - SeparateSAMold  ... output old SAM into separate Chimeric.out.sam file\n-\
      \ WithinBAM       ... output into main aligned BAM files (Aligned.*.bam)\n-\
      \ WithinBAM HardClip  ... (default) hard-clipping in the CIGAR for supplemental\
      \ chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip\
      \  ... soft-clipping in the CIGAR for supplemental chimeric alignments"
    info:
      orig_name: "--chimOutType"
    example:
    - "Junctions"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_segment_min"
    description: "minimum length of chimeric segment length, if ==0, no chimeric output"
    info:
      orig_name: "--chimSegmentMin"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_score_min"
    description: "minimum total (summed) score of the chimeric segments"
    info:
      orig_name: "--chimScoreMin"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_score_drop_max"
    description: "max drop (difference) of chimeric score (the sum of scores of all\
      \ chimeric segments) from the read length"
    info:
      orig_name: "--chimScoreDropMax"
    example:
    - 20
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_score_separation"
    description: "minimum difference (separation) between the best chimeric score\
      \ and the next one"
    info:
      orig_name: "--chimScoreSeparation"
    example:
    - 10
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_score_junction_non_gtag"
    description: "penalty for a non-GT/AG chimeric junction"
    info:
      orig_name: "--chimScoreJunctionNonGTAG"
    example:
    - -1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_junction_overhang_min"
    description: "minimum overhang for a chimeric junction"
    info:
      orig_name: "--chimJunctionOverhangMin"
    example:
    - 20
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_segment_read_gap_max"
    description: "maximum gap in the read sequence between chimeric segments"
    info:
      orig_name: "--chimSegmentReadGapMax"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--chim_filter"
    description: "different filters for chimeric alignments\n\n- None ... no filtering\n\
      - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric\
      \ junction"
    info:
      orig_name: "--chimFilter"
    example:
    - "banGenomicN"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_main_segment_mult_nmax"
    description: "maximum number of multi-alignments for the main chimeric segment.\
      \ =1 will prohibit multimapping main segments."
    info:
      orig_name: "--chimMainSegmentMultNmax"
    example:
    - 10
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_multimap_nmax"
    description: "maximum number of chimeric multi-alignments\n\n- 0 ... use the old\
      \ scheme for chimeric detection which only considered unique alignments"
    info:
      orig_name: "--chimMultimapNmax"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_multimap_score_range"
    description: "the score range for multi-mapping chimeras below the best chimeric\
      \ score. Only works with --chim_multimap_nmax > 1"
    info:
      orig_name: "--chimMultimapScoreRange"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_nonchim_score_drop_min"
    description: "to trigger chimeric detection, the drop in the best non-chimeric\
      \ alignment score with respect to the read length has to be greater than this\
      \ value"
    info:
      orig_name: "--chimNonchimScoreDropMin"
    example:
    - 20
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--chim_out_junction_format"
    description: "formatting type for the Chimeric.out.junction file\n\n- 0 ... no\
      \ comment lines/headers\n- 1 ... comment lines at the end of the file: command\
      \ line and Nreads: total, unique/multi-mapping"
    info:
      orig_name: "--chimOutJunctionFormat"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Quantification of Annotations"
  arguments:
  - type: "string"
    name: "--quant_mode"
    description: "types of quantification requested\n\n- -                ... none\n\
      - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate\
      \ file\n- GeneCounts       ... count reads per gene"
    info:
      orig_name: "--quantMode"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--quant_transcriptome_bam_compression"
    description: "-2 to 10  transcriptome BAM compression level\n\n- -2  ... no BAM\
      \ output\n- -1  ... default compression (6?)\n-  0  ... no compression\n-  10\
      \ ... maximum compression"
    info:
      orig_name: "--quantTranscriptomeBAMcompression"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--quant_transcriptome_sam_output"
    description: "alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip\
      \ ... prohibit indels and single-end alignments, extend softclips - compatible\
      \ with RSEM\n- BanSingleEnd               ... prohibit single-end alignments,\
      \ allow indels and softclips\n- BanSingleEnd_ExtendSoftclip ... prohibit single-end\
      \ alignments, extend softclips, allow indels"
    info:
      orig_name: "--quantTranscriptomeSAMoutput"
    example:
    - "BanSingleEnd_BanIndels_ExtendSoftclip"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "2-pass Mapping"
  arguments:
  - type: "string"
    name: "--twopass_mode"
    description: "2-pass mapping mode.\n\n- None        ... 1-pass mapping\n- Basic\
      \       ... basic 2-pass mapping, with all 1st pass junctions inserted into\
      \ the genome indices on the fly"
    info:
      orig_name: "--twopassMode"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--twopass1reads_n"
    description: "number of reads to process for the 1st step. Use very large number\
      \ (or default -1) to map all reads in the first step."
    info:
      orig_name: "--twopass1readsN"
    example:
    - -1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "WASP parameters"
  arguments:
  - type: "string"
    name: "--wasp_output_mode"
    description: "WASP allele-specific output type. This is re-implementation of the\
      \ original WASP mappability filtering by Bryce van de Geijn, Graham McVicker,\
      \ Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature\
      \ Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\
      \n- SAMtag      ... add WASP tags to the alignments that pass WASP filtering"
    info:
      orig_name: "--waspOutputMode"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "STARsolo (single cell RNA-seq) parameters"
  arguments:
  - type: "string"
    name: "--solo_type"
    description: "type of single-cell RNA-seq\n\n- CB_UMI_Simple   ... (a.k.a. Droplet)\
      \ one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X\
      \ Chromium.\n- CB_UMI_Complex  ... multiple Cell Barcodes of varying length,\
      \ one UMI of fixed length and one adapter sequence of fixed length are allowed\
      \ in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut    ... output Cell Barcode\
      \ as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2\
      \ if paired-end] CellBarcode_read . Requires --out_sam_type BAM Unsorted [and/or\
      \ SortedByCoordinate]\n- SmartSeq        ... Smart-seq: each cell in a separate\
      \ FASTQ (paired- or single-end), barcodes are corresponding read-groups, no\
      \ UMI sequences, alignments deduplicated according to alignment start and end\
      \ (after extending soft-clipped bases)"
    info:
      orig_name: "--soloType"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_cb_type"
    description: "cell barcode type\n\nSequence: cell barcode is a sequence (standard\
      \ option)\nString: cell barcode is an arbitrary string"
    info:
      orig_name: "--soloCBtype"
    example:
    - "Sequence"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--solo_cb_whitelist"
    description: "file(s) with whitelist(s) of cell barcodes. Only --solo_type CB_UMI_Complex\
      \ allows more than one whitelist file.\n\n- None            ... no whitelist:\
      \ all cell barcodes are allowed"
    info:
      orig_name: "--soloCBwhitelist"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "integer"
    name: "--solo_cb_start"
    description: "cell barcode start base"
    info:
      orig_name: "--soloCBstart"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--solo_cb_len"
    description: "cell barcode length"
    info:
      orig_name: "--soloCBlen"
    example:
    - 16
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--solo_umi_start"
    description: "UMI start base"
    info:
      orig_name: "--soloUMIstart"
    example:
    - 17
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--solo_umi_len"
    description: "UMI length"
    info:
      orig_name: "--soloUMIlen"
    example:
    - 10
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--solo_barcode_read_length"
    description: "length of the barcode read\n\n- 1   ... equal to sum of soloCBlen+soloUMIlen\n\
      - 0   ... not defined, do not check"
    info:
      orig_name: "--soloBarcodeReadLength"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--solo_barcode_mate"
    description: "identifies which read mate contains the barcode (CB+UMI) sequence\n\
      \n- 0   ... barcode sequence is on separate read, which should always be the\
      \ last file in the --readFilesIn listed\n- 1   ... barcode sequence is a part\
      \ of mate 1\n- 2   ... barcode sequence is a part of mate 2"
    info:
      orig_name: "--soloBarcodeMate"
    example:
    - 0
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--solo_cb_position"
    description: "position of Cell Barcode(s) on the barcode read.\n\nPresently only\
      \ works with --solo_type CB_UMI_Complex, and barcodes are assumed to be on Read2.\n\
      Format for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor\
      \ defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter\
      \ start; 3: adapter end\nstart(end)Position is the 0-based position with of\
      \ the CB start(end) with respect to the Anchor Base\nString for different barcodes\
      \ are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols,\
      \ 2017):\n--solo_cb_position  0_0_2_-1  3_1_3_8"
    info:
      orig_name: "--soloCBposition"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_umi_position"
    description: "position of the UMI on the barcode read, same as soloCBposition\n\
      \nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--solo_cb_position\
      \  3_9_3_14"
    info:
      orig_name: "--soloUMIposition"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--solo_adapter_sequence"
    description: "adapter sequence to anchor barcodes. Only one adapter sequence is\
      \ allowed."
    info:
      orig_name: "--soloAdapterSequence"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "integer"
    name: "--solo_adapter_mismatches_nmax"
    description: "maximum number of mismatches allowed in adapter sequence."
    info:
      orig_name: "--soloAdapterMismatchesNmax"
    example:
    - 1
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--solo_cb_match_wl_type"
    description: "matching the Cell Barcodes to the WhiteList\n\n- Exact         \
      \                  ... only exact matches allowed\n- 1MM                   \
      \          ... only one match in whitelist with 1 mismatched base allowed. Allowed\
      \ CBs have to have at least one read with exact match.\n- 1MM_multi        \
      \               ... multiple matches in whitelist with 1 mismatched base allowed,\
      \ posterior probability calculation is used choose one of the matches.\nAllowed\
      \ CBs have to have at least one read with exact match. This option matches best\
      \ with CellRanger 2.2.0\n- 1MM_multi_pseudocounts          ... same as 1MM_Multi,\
      \ but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts\
      \    ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for\
      \ CBs with N-bases. This option matches best with CellRanger >= 3.0.0\n- EditDist_2\
      \                    ... allow up to edit distance of 3 fpr each of the barcodes.\
      \ May include one deletion + one insertion. Only works with --solo_type CB_UMI_Complex.\
      \ Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio\
      \ Split-seq pipeline."
    info:
      orig_name: "--soloCBmatchWLtype"
    example:
    - "1MM_multi"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--solo_input_sam_attr_barcode_seq"
    description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\
      \ these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance,\
      \ for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_seq\
      \ CR UR .\nThis parameter is required when running STARsolo with input from\
      \ SAM."
    info:
      orig_name: "--soloInputSAMattrBarcodeSeq"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_input_sam_attr_barcode_qual"
    description: "when inputting reads from a SAM file (--readsFileType SAM SE/PE),\
      \ these SAM attributes mark the barcode qualities (in proper order).\n\nFor\
      \ instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_qual\
      \ CY UY .\nIf this parameter is '-' (default), the quality 'H' will be assigned\
      \ to all bases."
    info:
      orig_name: "--soloInputSAMattrBarcodeQual"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_strand"
    description: "strandedness of the solo libraries:\n\n- Unstranded  ... no strand\
      \ information\n- Forward     ... read strand same as the original RNA molecule\n\
      - Reverse     ... read strand opposite to the original RNA molecule"
    info:
      orig_name: "--soloStrand"
    example:
    - "Forward"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
  - type: "string"
    name: "--solo_features"
    description: "genomic features for which the UMI counts per Cell Barcode are collected\n\
      \n- Gene            ... genes: reads match the gene transcript\n- SJ       \
      \       ... splice junctions: reported in SJ.out.tab\n- GeneFull        ...\
      \ full gene (pre-mRNA): count all reads overlapping genes' exons and introns\n\
      - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping\
      \ genes' exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS\
      \        ... full gene (pre-RNA): count all reads overlapping genes' exons and\
      \ introns: prioritize >50% overlap with exons. Do not count reads with 100%\
      \ exonic overlap in the antisense direction."
    info:
      orig_name: "--soloFeatures"
    example:
    - "Gene"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_multi_mappers"
    description: "counting method for reads mapping to multiple genes\n\n- Unique\
      \     ... count only reads that map to unique genes\n- Uniform    ... uniformly\
      \ distribute multi-genic UMIs to all genes\n- Rescue     ... distribute UMIs\
      \ proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique\
      \ ... distribute UMIs proportionally to unique mappers, if present, and uniformly\
      \ if not.\n- EM         ... multi-gene UMIs are distributed using Expectation\
      \ Maximization algorithm"
    info:
      orig_name: "--soloMultiMappers"
    example:
    - "Unique"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_umi_dedup"
    description: "type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All  \
      \                   ... all UMIs with 1 mismatch distance to each other are\
      \ collapsed (i.e. counted once).\n- 1MM_Directional_UMItools    ... follows\
      \ the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery\
      \ (Genome Research 2017).\n- 1MM_Directional             ... same as 1MM_Directional_UMItools,\
      \ but with more stringent criteria for duplicate UMIs\n- Exact             \
      \          ... only exactly matching UMIs are collapsed.\n- NoDedup        \
      \             ... no deduplication of UMIs, count all reads.\n- 1MM_CR     \
      \                 ... CellRanger2-4 algorithm for 1MM UMI collapsing."
    info:
      orig_name: "--soloUMIdedup"
    example:
    - "1MM_All"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_umi_filtering"
    description: "type of UMI filtering (for reads uniquely mapping to genes)\n\n\
      - -                  ... basic filtering: remove UMIs with N and homopolymers\
      \ (similar to CellRanger 2.2.0).\n- MultiGeneUMI       ... basic + remove lower-count\
      \ UMIs that map to more than one gene.\n- MultiGeneUMI_All   ... basic + remove\
      \ all UMIs that map to more than one gene.\n- MultiGeneUMI_CR    ... basic +\
      \ remove lower-count UMIs that map to more than one gene, matching CellRanger\
      \ > 3.0.0 .\nOnly works with --solo_umi_dedup 1MM_CR"
    info:
      orig_name: "--soloUMIfiltering"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_out_file_names"
    description: "file names for STARsolo output:\n\nfile_name_prefix   gene_names\
      \   barcode_sequences   cell_feature_count_matrix"
    info:
      orig_name: "--soloOutFileNames"
    example:
    - "Solo.out/"
    - "features.tsv"
    - "barcodes.tsv"
    - "matrix.mtx"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_cell_filter"
    description: "cell filtering type and parameters\n\n- None            ... do not\
      \ output filtered cells\n- TopCells        ... only report top cells by UMI\
      \ count, followed by the exact number of cells\n- CellRanger2.2   ... simple\
      \ filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected\
      \ cells, robust maximum percentile for UMI count, maximum to minimum ratio for\
      \ UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; \
      \ maxPercentile=0.99;  maxMinRatio=10\n- EmptyDrops_CR   ... EmptyDrops filtering\
      \ in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun\
      \ et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\n\
      Can be followed by 10 numeric parameters:  nExpectedCells   maxPercentile  \
      \ maxMinRatio   indMin   indMax   umiMin   umiMinFracMedian   candMaxN   FDR\
      \   simN\nThe harcoded values are from CellRanger:             3000        \
      \    0.99            10    45000    90000      500               0.01      20000\
      \  0.01  10000"
    info:
      orig_name: "--soloCellFilter"
    example:
    - "CellRanger2.2"
    - "3000"
    - "0.99"
    - "10"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_out_format_features_gene_field3"
    description: "field 3 in the Gene features.tsv file. If \"-\", then no 3rd field\
      \ is output."
    info:
      orig_name: "--soloOutFormatFeaturesGeneField3"
    example:
    - "Gene Expression"
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "string"
    name: "--solo_cell_read_stats"
    description: "Output reads statistics for each CB\n\n- Standard    ... standard\
      \ output"
    info:
      orig_name: "--soloCellReadStats"
    required: false
    direction: "input"
    multiple: false
    multiple_sep: ";"
- name: "Inputs"
  arguments:
  - type: "file"
    name: "--input"
    alternatives:
    - "--readFilesIn"
    description: "The single-end or paired-end R1 FastQ files to be processed."
    info: null
    example:
    - "mysample_S1_L001_R1_001.fastq.gz"
    must_exist: true
    create_parent: true
    required: true
    direction: "input"
    multiple: true
    multiple_sep: ";"
  - type: "file"
    name: "--input_r2"
    description: "The paired-end R2 FastQ files to be processed. Only required if\
      \ --input is a paired-end R1 file."
    info: null
    example:
    - "mysample_S1_L001_R2_001.fastq.gz"
    must_exist: true
    create_parent: true
    required: false
    direction: "input"
    multiple: true
    multiple_sep: ";"
- name: "Outputs"
  arguments:
  - type: "file"
    name: "--aligned_reads"
    description: "The output file containing the aligned reads."
    info: null
    example:
    - "aligned_reads.bam"
    must_exist: true
    create_parent: true
    required: true
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--reads_per_gene"
    description: "The output file containing the number of reads per gene."
    info: null
    example:
    - "reads_per_gene.tsv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--unmapped"
    description: "The output file containing the unmapped reads."
    info: null
    example:
    - "unmapped.fastq"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--unmapped_r2"
    description: "The output file containing the unmapped R2 reads."
    info: null
    example:
    - "unmapped_r2.fastq"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--chimeric_junctions"
    description: "The output file containing the chimeric junctions."
    info: null
    example:
    - "chimeric_junctions.tsv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--log"
    description: "The output file containing the log of the alignment process."
    info: null
    example:
    - "log.txt"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--splice_junctions"
    description: "The output file containing the splice junctions."
    info: null
    example:
    - "splice_junctions.tsv"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
  - type: "file"
    name: "--reads_aligned_to_transcriptome"
    description: "The output file containing the alignments to transcriptome in BAM\
      \ formats. This file is generated when --quantMode is set to TranscriptomeSAM."
    info: null
    example:
    - "transcriptome_aligned.bam"
    must_exist: true
    create_parent: true
    required: false
    direction: "output"
    multiple: false
    multiple_sep: ";"
resources:
- type: "python_script"
  path: "script.py"
  is_executable: true
description: "Aligns reads to a reference genome using STAR.\n"
test_resources:
- type: "bash_script"
  path: "test.sh"
  is_executable: true
info: null
status: "enabled"
requirements:
  commands:
  - "ps"
keywords:
- "align"
- "fasta"
- "genome"
license: "MIT"
references:
  doi:
  - "10.1093/bioinformatics/bts635"
links:
  repository: "https://github.com/alexdobin/STAR"
  documentation: "https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf"
runners:
- type: "executable"
  id: "executable"
  docker_setup_strategy: "ifneedbepullelsecachedbuild"
- type: "nextflow"
  id: "nextflow"
  directives:
    tag: "$id"
  auto:
    simplifyInput: true
    simplifyOutput: false
    transcript: false
    publish: false
  config:
    labels:
      mem1gb: "memory = 1000000000.B"
      mem2gb: "memory = 2000000000.B"
      mem5gb: "memory = 5000000000.B"
      mem10gb: "memory = 10000000000.B"
      mem20gb: "memory = 20000000000.B"
      mem50gb: "memory = 50000000000.B"
      mem100gb: "memory = 100000000000.B"
      mem200gb: "memory = 200000000000.B"
      mem500gb: "memory = 500000000000.B"
      mem1tb: "memory = 1000000000000.B"
      mem2tb: "memory = 2000000000000.B"
      mem5tb: "memory = 5000000000000.B"
      mem10tb: "memory = 10000000000000.B"
      mem20tb: "memory = 20000000000000.B"
      mem50tb: "memory = 50000000000000.B"
      mem100tb: "memory = 100000000000000.B"
      mem200tb: "memory = 200000000000000.B"
      mem500tb: "memory = 500000000000000.B"
      mem1gib: "memory = 1073741824.B"
      mem2gib: "memory = 2147483648.B"
      mem4gib: "memory = 4294967296.B"
      mem8gib: "memory = 8589934592.B"
      mem16gib: "memory = 17179869184.B"
      mem32gib: "memory = 34359738368.B"
      mem64gib: "memory = 68719476736.B"
      mem128gib: "memory = 137438953472.B"
      mem256gib: "memory = 274877906944.B"
      mem512gib: "memory = 549755813888.B"
      mem1tib: "memory = 1099511627776.B"
      mem2tib: "memory = 2199023255552.B"
      mem4tib: "memory = 4398046511104.B"
      mem8tib: "memory = 8796093022208.B"
      mem16tib: "memory = 17592186044416.B"
      mem32tib: "memory = 35184372088832.B"
      mem64tib: "memory = 70368744177664.B"
      mem128tib: "memory = 140737488355328.B"
      mem256tib: "memory = 281474976710656.B"
      mem512tib: "memory = 562949953421312.B"
      cpu1: "cpus = 1"
      cpu2: "cpus = 2"
      cpu5: "cpus = 5"
      cpu10: "cpus = 10"
      cpu20: "cpus = 20"
      cpu50: "cpus = 50"
      cpu100: "cpus = 100"
      cpu200: "cpus = 200"
      cpu500: "cpus = 500"
      cpu1000: "cpus = 1000"
  debug: false
  container: "docker"
engines:
- type: "docker"
  id: "docker"
  image: "python:3.12-slim"
  target_registry: "images.viash-hub.com"
  target_tag: "main"
  namespace_separator: "/"
  setup:
  - type: "apt"
    packages:
    - "procps"
    - "gzip"
    - "bzip2"
    interactive: false
  - type: "docker"
    run:
    - "apt-get update && \\\n  apt-get install -y --no-install-recommends ${PACKAGES}\
      \ && \\\n  cd /tmp && \\\n  wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip\
      \ && \\\n  unzip ${STAR_VERSION}.zip && \\\n  cd STAR-${STAR_VERSION}/source\
      \ && \\\n  make STARstatic CXXFLAGS_SIMD=-std=c++11 && \\\n  cp STAR /usr/local/bin\
      \ && \\\n  cd / && \\\n  rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip\
      \ && \\\n  apt-get --purge autoremove -y ${PACKAGES} && \\\n  apt-get clean\n"
    env:
    - "STAR_VERSION 2.7.11b"
    - "PACKAGES gcc g++ make wget zlib1g-dev unzip xxd"
  - type: "python"
    user: false
    packages:
    - "pyyaml"
    upgrade: true
  - type: "docker"
    run:
    - "STAR --version | sed 's#\\(.*\\)#star: \"\\1\"#' > /var/software_versions.txt\n"
  entrypoint: []
  cmd: null
- type: "native"
  id: "native"
build_info:
  config: "src/star/star_align_reads/config.vsh.yaml"
  runner: "executable"
  engine: "docker|native"
  output: "target/executable/star/star_align_reads"
  executable: "target/executable/star/star_align_reads/star_align_reads"
  viash_version: "0.9.0-RC6"
  git_commit: "766ab6c9c3059004c7c3f205621909b2d8b0b26d"
  git_remote: "https://github.com/viash-hub/biobox"
package_config:
  name: "biobox"
  version: "main"
  description: "A collection of bioinformatics tools for working with sequence data.\n"
  info: null
  viash_version: "0.9.0-RC6"
  source: "src"
  target: "target"
  config_mods:
  - ".requirements.commands := ['ps']\n"
  - ".engines += { type: \"native\" }"
  - ".engines[.type == 'docker'].target_registry := 'images.viash-hub.com'"
  - ".engines[.type == 'docker'].target_tag := 'main'"
  keywords:
  - "bioinformatics"
  - "modules"
  - "sequencing"
  license: "MIT"
  organization: "vsh"
  links:
    repository: "https://github.com/viash-hub/biobox"
    issue_tracker: "https://github.com/viash-hub/biobox/issues"