Build pipeline: viash-hub.biobox.main-dkgvl
Source commit: ad89f43726
Source message: Add authors to package config and update author information (#180)
* Add authors to package config and update author information
* update changelog
* add linkedin
468 lines
21 KiB
JSON
468 lines
21 KiB
JSON
{
|
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
"title": "gffread",
|
|
"description": "Validate, filter, convert and perform various other operations on GFF files.",
|
|
"type": "object",
|
|
"$defs": {
|
|
"inputs": {
|
|
"title": "Inputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
"input": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"exists": true,
|
|
"description": "A reference file in either the GFF3, GFF2 or GTF format.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"annotation.gff\"`. "
|
|
},
|
|
"chr_mapping": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "<chr_replace> is a name mapping table for converting reference sequence names, \nhaving this 2-column format: <original_ref_ID> <new_ref_ID>.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
|
|
},
|
|
"seq_info": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "<seq_info.fsize> is a tab-delimited file providing this info for each of the mapped \nsequences: <seq-name> <seq-length> <seq-description> (useful for --description option with \nmRNA/EST/protein mappings).\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
|
|
},
|
|
"genome": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "Full path to a multi-fasta file with the genomic sequences for all input mappings, \nOR a directory with single-fasta files (one per genomic sequence, with file names \nmatching sequence names).\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"genome.fa\"`. "
|
|
}
|
|
}
|
|
},
|
|
"outputs": {
|
|
"title": "Outputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
"outfile": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "Write the output records into <outfile>.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.outfile.gff\"`, direction: `output`, example: `\"output.gff\"`. ",
|
|
"default": "$id.$key.outfile.gff"
|
|
},
|
|
"force_exons": {
|
|
"type": "boolean",
|
|
"description": "Make sure that the lowest level GFF features are considered \"exon\" features.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"gene2exon": {
|
|
"type": "boolean",
|
|
"description": "For single-line genes not parenting any transcripts, add an exon feature spanning \nthe entire gene (treat it as a transcript).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"t_adopt": {
|
|
"type": "boolean",
|
|
"description": "Try to find a parent gene overlapping/containing a transcript that does not have \nany explicit gene Parent.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"decode": {
|
|
"type": "boolean",
|
|
"description": "Decode url encoded characters within attributes.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"merge_exons": {
|
|
"type": "boolean",
|
|
"description": "Merge very close exons into a single exon (when intron size<4).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"junctions": {
|
|
"type": "boolean",
|
|
"description": "Output the junctions and the corresponding transcripts.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"spliced_exons": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "Write a fasta file with spliced exons for each transcript.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.spliced_exons.fa\"`, direction: `output`, example: `\"exons.fa\"`. ",
|
|
"default": "$id.$key.spliced_exons.fa"
|
|
},
|
|
"w_add": {
|
|
"type": "integer",
|
|
"description": "For the --spliced_exons option, extract additional <N> bases both upstream and \ndownstream of the transcript boundaries.\n",
|
|
"help_text": "Type: `integer`, multiple: `False`. "
|
|
},
|
|
"w_nocds": {
|
|
"type": "boolean",
|
|
"description": "For --spliced_exons, disable the output of CDS info in the FASTA file.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"spliced_cds": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "Write a fasta file with spliced CDS for each GFF transcript.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"cds.fa\"`. "
|
|
},
|
|
"tr_cds": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "Write a protein fasta file with the translation of CDS for each record.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"tr_cds.fa\"`. "
|
|
},
|
|
"w_coords": {
|
|
"type": "boolean",
|
|
"description": "For --spliced_exons, --spliced_cds and -tr_cds options, write in the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"stop_dot": {
|
|
"type": "boolean",
|
|
"description": "For --tr_cds option, use '*' instead of '.' as stop codon translation.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"id_version": {
|
|
"type": "boolean",
|
|
"description": "Ensembl GTF to GFF3 conversion, adds version to IDs.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"trackname": {
|
|
"type": "string",
|
|
"description": "Use <trackname> in the 2nd column of each GFF/GTF output line.\n",
|
|
"help_text": "Type: `string`, multiple: `False`. "
|
|
},
|
|
"gtf_output": {
|
|
"type": "boolean",
|
|
"description": "Main output will be GTF instead of GFF3.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"bed": {
|
|
"type": "boolean",
|
|
"description": "Output records in BED format instead of default GFF3.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"tlf": {
|
|
"type": "boolean",
|
|
"description": "Output \"transcript line format\" which is like GFF but with exons and CDS related \nfeatures stored as GFF attributes in the transcript feature line, like this:\n exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords>\n<exons> is a comma-delimited list of exon_start-exon_end coordinates;\n<CDScoords> is CDS_start:CDS_end coordinates or a list like <exons>.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"table": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Output a simple tab delimited format instead of GFF, with columns having the values \nof GFF attributes given in <attrlist>; special pseudo-attributes (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds FASTA output files are enabled, the \nsame fields (excluding @id) are appended to the definition line of corresponding FASTA\nrecords.\n",
|
|
"help_text": "Type: `string`, multiple: `True`. "
|
|
},
|
|
"expose_dups": {
|
|
"type": "boolean",
|
|
"description": "Expose (warn about) duplicate transcript IDs and other potential problems with the \ngiven GFF/GTF records.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
}
|
|
}
|
|
},
|
|
"options": {
|
|
"title": "Options",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
"ids": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "Discard records/transcripts if their IDs are not listed in <IDs.lst>.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
|
|
},
|
|
"nids": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "Discard records/transcripts if their IDs are listed in <IDs.lst>.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
|
|
},
|
|
"maxintron": {
|
|
"type": "integer",
|
|
"description": "Discard transcripts having an intron larger than <maxintron>.\n",
|
|
"help_text": "Type: `integer`, multiple: `False`. "
|
|
},
|
|
"minlen": {
|
|
"type": "integer",
|
|
"description": "Discard transcripts shorter than <minlen> bases.\n",
|
|
"help_text": "Type: `integer`, multiple: `False`. "
|
|
},
|
|
"range": {
|
|
"type": "string",
|
|
"description": "Only show transcripts overlapping coordinate range <start>..<end> (on chromosome/contig \n<chr>, strand <strand> if provided).\n",
|
|
"help_text": "Type: `string`, multiple: `False`. "
|
|
},
|
|
"strict_range": {
|
|
"type": "boolean",
|
|
"description": "For --range option, discard all transcripts that are not fully contained within the given \nrange.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"jmatch": {
|
|
"type": "string",
|
|
"description": "Only output transcripts matching the given junction.\n",
|
|
"help_text": "Type: `string`, multiple: `False`. "
|
|
},
|
|
"no_single_exon": {
|
|
"type": "boolean",
|
|
"description": "Discard single-exon transcripts.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"coding": {
|
|
"type": "boolean",
|
|
"description": "Coding only: discard mRNAs that have no CDS features.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"nc": {
|
|
"type": "boolean",
|
|
"description": "Non-coding only: discard mRNAs that have CDS features.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"ignore_locus": {
|
|
"type": "boolean",
|
|
"description": "Discard locus features and attributes found in the input.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"description": {
|
|
"type": "boolean",
|
|
"description": "Use the description field from <seq_info.fsize> and add it as the value for a 'descr' \nattribute to the GFF record.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
}
|
|
}
|
|
},
|
|
"sorting": {
|
|
"title": "Sorting",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
"sort_alpha": {
|
|
"type": "boolean",
|
|
"description": "Chromosomes (reference sequences) are sorted alphabetically.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"sort_by": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "Sort the reference sequences by the order in which their names are given in the \n<refseq.lst> file.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
|
|
}
|
|
}
|
|
},
|
|
"misc options": {
|
|
"title": "Misc options",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
"keep_attrs": {
|
|
"type": "boolean",
|
|
"description": "Keep all GFF attributes (for non-exon features).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"keep_exon_attrs": {
|
|
"type": "boolean",
|
|
"description": "For -F option, do not attempt to reduce redundant exon/CDS attributes.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"no_exon_attrs": {
|
|
"type": "boolean",
|
|
"description": "Do not keep exon attributes, move them to the transcript feature (for GFF3 output).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"attrs": {
|
|
"type": "string",
|
|
"description": "Only output the GTF/GFF attributes listed in <attr-list> which is a comma delimited \nlist of attribute names to.\n",
|
|
"help_text": "Type: `string`, multiple: `False`. "
|
|
},
|
|
"keep_genes": {
|
|
"type": "boolean",
|
|
"description": "In transcript-only mode (default), also preserve gene records.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"keep_comments": {
|
|
"type": "boolean",
|
|
"description": "For GFF3 input/output, try to preserve comments.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"process_other": {
|
|
"type": "boolean",
|
|
"description": "process other non-transcript GFF records (by default non-transcript records are ignored).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"rm_stop_codons": {
|
|
"type": "boolean",
|
|
"description": "Discard any mRNAs with CDS having in-frame stop codons (requires --genome).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"adj_cds_start": {
|
|
"type": "boolean",
|
|
"description": "For --rm_stop_codons option, check and adjust the starting CDS phase if the original phase\nleads to a translation with an in-frame stop codon.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"opposite_strand": {
|
|
"type": "boolean",
|
|
"description": "For -V option, single-exon transcripts are also checked on the opposite strand (requires \n--genome)",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"coding_status": {
|
|
"type": "boolean",
|
|
"description": "Add transcript level GFF attributes about the coding status of each transcript, including \npartialness or in-frame stop codons (requires --genome).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"add_hasCDS": {
|
|
"type": "boolean",
|
|
"description": "Add a \"hasCDS\" attribute with value \"true\" for transcripts that have CDS features",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"adj_stop": {
|
|
"type": "boolean",
|
|
"description": "Stop codon adjustment: enables --coding_status and performs automatic adjustment of the CDS stop \ncoordinate if premature or downstream.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"rm_noncanon": {
|
|
"type": "boolean",
|
|
"description": "Discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus \n(i.e",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"complete_cds": {
|
|
"type": "boolean",
|
|
"description": "Discard any mRNAs that either lack initial START codon or the terminal STOP codon, or \nhave an in-frame stop codon (i.e",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"no_pseudo": {
|
|
"type": "boolean",
|
|
"description": "Filter out records matching the 'pseudo' keyword.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"in_bed": {
|
|
"type": "boolean",
|
|
"description": "Input should be parsed as BED format (automatic if the input filename ends with .bed*).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"in_tlf": {
|
|
"type": "boolean",
|
|
"description": "Input GFF-like one-line-per-transcript format without exon/CDS features (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"stream": {
|
|
"type": "boolean",
|
|
"description": "Fast processing of input GFF/BED transcripts as they are received (no sorting, exons must \nbe grouped by transcript in the input data).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
}
|
|
}
|
|
},
|
|
"clustering": {
|
|
"title": "Clustering",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
"merge": {
|
|
"type": "boolean",
|
|
"description": "Cluster the input transcripts into loci, discarding \"redundant\" transcripts (those with \nthe same exact introns and fully contained or equal boundaries).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"dupinfo": {
|
|
"type": "string",
|
|
"format": "path",
|
|
"description": "For --merge option, write duplication info to file <dupinfo>.\n",
|
|
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
|
|
},
|
|
"cluster_only": {
|
|
"type": "boolean",
|
|
"description": "Same as --merge but without discarding any of the \"duplicate\" transcripts, only create \n\"locus\" features.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"rm_redundant": {
|
|
"type": "boolean",
|
|
"description": "For --merge option: also discard as redundant the shorter, fully contained transcripts (intron \nchains matching a part of the container).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"no_boundary": {
|
|
"type": "boolean",
|
|
"description": "For --merge option, no longer require boundary containment when assessing redundancy (can be \ncombined with --rm_redundant); only introns have to match for multi-exon transcripts, and >=80%\noverlap for single-exon transcripts.\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
},
|
|
"no_overlap": {
|
|
"type": "boolean",
|
|
"description": "For --merge option, enforce --no_boundary but also discard overlapping single-exon transcripts,\neven on the opposite strand (can be combined with --rm_redudant).\n",
|
|
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
|
|
"default": false
|
|
}
|
|
}
|
|
},
|
|
"nextflow input-output arguments": {
|
|
"title": "Nextflow input-output arguments",
|
|
"type": "object",
|
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
|
"properties": {
|
|
"publish_dir": {
|
|
"type": "string",
|
|
"description": "Path to an output directory.",
|
|
"help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"allOf": [
|
|
{
|
|
"$ref": "#/$defs/inputs"
|
|
},
|
|
{
|
|
"$ref": "#/$defs/outputs"
|
|
},
|
|
{
|
|
"$ref": "#/$defs/options"
|
|
},
|
|
{
|
|
"$ref": "#/$defs/sorting"
|
|
},
|
|
{
|
|
"$ref": "#/$defs/misc options"
|
|
},
|
|
{
|
|
"$ref": "#/$defs/clustering"
|
|
},
|
|
{
|
|
"$ref": "#/$defs/nextflow input-output arguments"
|
|
}
|
|
]
|
|
}
|