Files
biobox/target/nextflow/gffread/nextflow_schema.json
CI 93f55cc2ee Build branch main with version main (ad89f43)
Build pipeline: viash-hub.biobox.main-dkgvl

Source commit: ad89f43726

Source message: Add authors to package config and update author information (#180)

* Add authors to package config and update author information

* update changelog

* add linkedin
2025-06-16 10:27:22 +00:00

468 lines
21 KiB
JSON

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "gffread",
"description": "Validate, filter, convert and perform various other operations on GFF files.",
"type": "object",
"$defs": {
"inputs": {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"input": {
"type": "string",
"format": "path",
"exists": true,
"description": "A reference file in either the GFF3, GFF2 or GTF format.\n",
"help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"annotation.gff\"`. "
},
"chr_mapping": {
"type": "string",
"format": "path",
"description": "<chr_replace> is a name mapping table for converting reference sequence names, \nhaving this 2-column format: <original_ref_ID> <new_ref_ID>.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
},
"seq_info": {
"type": "string",
"format": "path",
"description": "<seq_info.fsize> is a tab-delimited file providing this info for each of the mapped \nsequences: <seq-name> <seq-length> <seq-description> (useful for --description option with \nmRNA/EST/protein mappings).\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
},
"genome": {
"type": "string",
"format": "path",
"description": "Full path to a multi-fasta file with the genomic sequences for all input mappings, \nOR a directory with single-fasta files (one per genomic sequence, with file names \nmatching sequence names).\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"genome.fa\"`. "
}
}
},
"outputs": {
"title": "Outputs",
"type": "object",
"description": "No description",
"properties": {
"outfile": {
"type": "string",
"format": "path",
"description": "Write the output records into <outfile>.\n",
"help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.outfile.gff\"`, direction: `output`, example: `\"output.gff\"`. ",
"default": "$id.$key.outfile.gff"
},
"force_exons": {
"type": "boolean",
"description": "Make sure that the lowest level GFF features are considered \"exon\" features.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"gene2exon": {
"type": "boolean",
"description": "For single-line genes not parenting any transcripts, add an exon feature spanning \nthe entire gene (treat it as a transcript).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"t_adopt": {
"type": "boolean",
"description": "Try to find a parent gene overlapping/containing a transcript that does not have \nany explicit gene Parent.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"decode": {
"type": "boolean",
"description": "Decode url encoded characters within attributes.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"merge_exons": {
"type": "boolean",
"description": "Merge very close exons into a single exon (when intron size<4).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"junctions": {
"type": "boolean",
"description": "Output the junctions and the corresponding transcripts.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"spliced_exons": {
"type": "string",
"format": "path",
"description": "Write a fasta file with spliced exons for each transcript.\n",
"help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.spliced_exons.fa\"`, direction: `output`, example: `\"exons.fa\"`. ",
"default": "$id.$key.spliced_exons.fa"
},
"w_add": {
"type": "integer",
"description": "For the --spliced_exons option, extract additional <N> bases both upstream and \ndownstream of the transcript boundaries.\n",
"help_text": "Type: `integer`, multiple: `False`. "
},
"w_nocds": {
"type": "boolean",
"description": "For --spliced_exons, disable the output of CDS info in the FASTA file.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"spliced_cds": {
"type": "string",
"format": "path",
"description": "Write a fasta file with spliced CDS for each GFF transcript.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"cds.fa\"`. "
},
"tr_cds": {
"type": "string",
"format": "path",
"description": "Write a protein fasta file with the translation of CDS for each record.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"tr_cds.fa\"`. "
},
"w_coords": {
"type": "boolean",
"description": "For --spliced_exons, --spliced_cds and -tr_cds options, write in the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"stop_dot": {
"type": "boolean",
"description": "For --tr_cds option, use '*' instead of '.' as stop codon translation.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"id_version": {
"type": "boolean",
"description": "Ensembl GTF to GFF3 conversion, adds version to IDs.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"trackname": {
"type": "string",
"description": "Use <trackname> in the 2nd column of each GFF/GTF output line.\n",
"help_text": "Type: `string`, multiple: `False`. "
},
"gtf_output": {
"type": "boolean",
"description": "Main output will be GTF instead of GFF3.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"bed": {
"type": "boolean",
"description": "Output records in BED format instead of default GFF3.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"tlf": {
"type": "boolean",
"description": "Output \"transcript line format\" which is like GFF but with exons and CDS related \nfeatures stored as GFF attributes in the transcript feature line, like this:\n exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords>\n<exons> is a comma-delimited list of exon_start-exon_end coordinates;\n<CDScoords> is CDS_start:CDS_end coordinates or a list like <exons>.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"table": {
"type": "array",
"items": {
"type": "string"
},
"description": "Output a simple tab delimited format instead of GFF, with columns having the values \nof GFF attributes given in <attrlist>; special pseudo-attributes (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds FASTA output files are enabled, the \nsame fields (excluding @id) are appended to the definition line of corresponding FASTA\nrecords.\n",
"help_text": "Type: `string`, multiple: `True`. "
},
"expose_dups": {
"type": "boolean",
"description": "Expose (warn about) duplicate transcript IDs and other potential problems with the \ngiven GFF/GTF records.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
}
}
},
"options": {
"title": "Options",
"type": "object",
"description": "No description",
"properties": {
"ids": {
"type": "string",
"format": "path",
"description": "Discard records/transcripts if their IDs are not listed in <IDs.lst>.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
},
"nids": {
"type": "string",
"format": "path",
"description": "Discard records/transcripts if their IDs are listed in <IDs.lst>.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
},
"maxintron": {
"type": "integer",
"description": "Discard transcripts having an intron larger than <maxintron>.\n",
"help_text": "Type: `integer`, multiple: `False`. "
},
"minlen": {
"type": "integer",
"description": "Discard transcripts shorter than <minlen> bases.\n",
"help_text": "Type: `integer`, multiple: `False`. "
},
"range": {
"type": "string",
"description": "Only show transcripts overlapping coordinate range <start>..<end> (on chromosome/contig \n<chr>, strand <strand> if provided).\n",
"help_text": "Type: `string`, multiple: `False`. "
},
"strict_range": {
"type": "boolean",
"description": "For --range option, discard all transcripts that are not fully contained within the given \nrange.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"jmatch": {
"type": "string",
"description": "Only output transcripts matching the given junction.\n",
"help_text": "Type: `string`, multiple: `False`. "
},
"no_single_exon": {
"type": "boolean",
"description": "Discard single-exon transcripts.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"coding": {
"type": "boolean",
"description": "Coding only: discard mRNAs that have no CDS features.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"nc": {
"type": "boolean",
"description": "Non-coding only: discard mRNAs that have CDS features.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"ignore_locus": {
"type": "boolean",
"description": "Discard locus features and attributes found in the input.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"description": {
"type": "boolean",
"description": "Use the description field from <seq_info.fsize> and add it as the value for a 'descr' \nattribute to the GFF record.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
}
}
},
"sorting": {
"title": "Sorting",
"type": "object",
"description": "No description",
"properties": {
"sort_alpha": {
"type": "boolean",
"description": "Chromosomes (reference sequences) are sorted alphabetically.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"sort_by": {
"type": "string",
"format": "path",
"description": "Sort the reference sequences by the order in which their names are given in the \n<refseq.lst> file.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
}
}
},
"misc options": {
"title": "Misc options",
"type": "object",
"description": "No description",
"properties": {
"keep_attrs": {
"type": "boolean",
"description": "Keep all GFF attributes (for non-exon features).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"keep_exon_attrs": {
"type": "boolean",
"description": "For -F option, do not attempt to reduce redundant exon/CDS attributes.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"no_exon_attrs": {
"type": "boolean",
"description": "Do not keep exon attributes, move them to the transcript feature (for GFF3 output).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"attrs": {
"type": "string",
"description": "Only output the GTF/GFF attributes listed in <attr-list> which is a comma delimited \nlist of attribute names to.\n",
"help_text": "Type: `string`, multiple: `False`. "
},
"keep_genes": {
"type": "boolean",
"description": "In transcript-only mode (default), also preserve gene records.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"keep_comments": {
"type": "boolean",
"description": "For GFF3 input/output, try to preserve comments.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"process_other": {
"type": "boolean",
"description": "process other non-transcript GFF records (by default non-transcript records are ignored).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"rm_stop_codons": {
"type": "boolean",
"description": "Discard any mRNAs with CDS having in-frame stop codons (requires --genome).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"adj_cds_start": {
"type": "boolean",
"description": "For --rm_stop_codons option, check and adjust the starting CDS phase if the original phase\nleads to a translation with an in-frame stop codon.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"opposite_strand": {
"type": "boolean",
"description": "For -V option, single-exon transcripts are also checked on the opposite strand (requires \n--genome)",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"coding_status": {
"type": "boolean",
"description": "Add transcript level GFF attributes about the coding status of each transcript, including \npartialness or in-frame stop codons (requires --genome).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"add_hasCDS": {
"type": "boolean",
"description": "Add a \"hasCDS\" attribute with value \"true\" for transcripts that have CDS features",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"adj_stop": {
"type": "boolean",
"description": "Stop codon adjustment: enables --coding_status and performs automatic adjustment of the CDS stop \ncoordinate if premature or downstream.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"rm_noncanon": {
"type": "boolean",
"description": "Discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus \n(i.e",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"complete_cds": {
"type": "boolean",
"description": "Discard any mRNAs that either lack initial START codon or the terminal STOP codon, or \nhave an in-frame stop codon (i.e",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"no_pseudo": {
"type": "boolean",
"description": "Filter out records matching the 'pseudo' keyword.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"in_bed": {
"type": "boolean",
"description": "Input should be parsed as BED format (automatic if the input filename ends with .bed*).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"in_tlf": {
"type": "boolean",
"description": "Input GFF-like one-line-per-transcript format without exon/CDS features (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"stream": {
"type": "boolean",
"description": "Fast processing of input GFF/BED transcripts as they are received (no sorting, exons must \nbe grouped by transcript in the input data).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
}
}
},
"clustering": {
"title": "Clustering",
"type": "object",
"description": "No description",
"properties": {
"merge": {
"type": "boolean",
"description": "Cluster the input transcripts into loci, discarding \"redundant\" transcripts (those with \nthe same exact introns and fully contained or equal boundaries).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"dupinfo": {
"type": "string",
"format": "path",
"description": "For --merge option, write duplication info to file <dupinfo>.\n",
"help_text": "Type: `file`, multiple: `False`, direction: `input`. "
},
"cluster_only": {
"type": "boolean",
"description": "Same as --merge but without discarding any of the \"duplicate\" transcripts, only create \n\"locus\" features.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"rm_redundant": {
"type": "boolean",
"description": "For --merge option: also discard as redundant the shorter, fully contained transcripts (intron \nchains matching a part of the container).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"no_boundary": {
"type": "boolean",
"description": "For --merge option, no longer require boundary containment when assessing redundancy (can be \ncombined with --rm_redundant); only introns have to match for multi-exon transcripts, and >=80%\noverlap for single-exon transcripts.\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
},
"no_overlap": {
"type": "boolean",
"description": "For --merge option, enforce --no_boundary but also discard overlapping single-exon transcripts,\neven on the opposite strand (can be combined with --rm_redudant).\n",
"help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
"default": false
}
}
},
"nextflow input-output arguments": {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type": "string",
"description": "Path to an output directory.",
"help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
}
}
}
},
"allOf": [
{
"$ref": "#/$defs/inputs"
},
{
"$ref": "#/$defs/outputs"
},
{
"$ref": "#/$defs/options"
},
{
"$ref": "#/$defs/sorting"
},
{
"$ref": "#/$defs/misc options"
},
{
"$ref": "#/$defs/clustering"
},
{
"$ref": "#/$defs/nextflow input-output arguments"
}
]
}