biobox/target/nextflow/gffread/nextflow_schema.json

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "gffread",
  "description": "Validate, filter, convert and perform various other operations on GFF files.",
  "type": "object",
  "$defs": {
    "inputs": {
      "title": "Inputs",
      "type": "object",
      "description": "No description",
      "properties": {
        "input": {
          "type": "string",
          "format": "path",
          "exists": true,
          "description": "A reference file in either the GFF3, GFF2 or GTF format.\n",
          "help_text": "Type: `file`, multiple: `False`, required, direction: `input`, example: `\"annotation.gff\"`. "
        },
        "chr_mapping": {
          "type": "string",
          "format": "path",
          "description": "<chr_replace> is a name mapping table for converting reference sequence names, \nhaving this 2-column format: <original_ref_ID> <new_ref_ID>.\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`. "
        },
        "seq_info": {
          "type": "string",
          "format": "path",
          "description": "<seq_info.fsize> is a tab-delimited file providing this info for each of the mapped \nsequences: <seq-name> <seq-length> <seq-description> (useful for --description option with \nmRNA/EST/protein mappings).\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`. "
        },
        "genome": {
          "type": "string",
          "format": "path",
          "description": "Full path to a multi-fasta file with the genomic sequences for all input mappings, \nOR a directory with single-fasta files (one per genomic sequence, with file names \nmatching sequence names).\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"genome.fa\"`. "
        }
      }
    },
    "outputs": {
      "title": "Outputs",
      "type": "object",
      "description": "No description",
      "properties": {
        "outfile": {
          "type": "string",
          "format": "path",
          "description": "Write the output records into <outfile>.\n",
          "help_text": "Type: `file`, multiple: `False`, required, default: `\"$id.$key.outfile.gff\"`, direction: `output`, example: `\"output.gff\"`. ",
          "default": "$id.$key.outfile.gff"
        },
        "force_exons": {
          "type": "boolean",
          "description": "Make sure that the lowest level GFF features are considered \"exon\" features.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "gene2exon": {
          "type": "boolean",
          "description": "For single-line genes not parenting any transcripts, add an exon feature spanning \nthe entire gene (treat it as a transcript).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "t_adopt": {
          "type": "boolean",
          "description": "Try to find a parent gene overlapping/containing a transcript that does not have \nany explicit gene Parent.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "decode": {
          "type": "boolean",
          "description": "Decode url encoded characters within attributes.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "merge_exons": {
          "type": "boolean",
          "description": "Merge very close exons into a single exon (when intron size<4).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "junctions": {
          "type": "boolean",
          "description": "Output the junctions and the corresponding transcripts.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "spliced_exons": {
          "type": "string",
          "format": "path",
          "description": "Write a fasta file with spliced exons for each transcript.\n",
          "help_text": "Type: `file`, multiple: `False`, default: `\"$id.$key.spliced_exons.fa\"`, direction: `output`, example: `\"exons.fa\"`. ",
          "default": "$id.$key.spliced_exons.fa"
        },
        "w_add": {
          "type": "integer",
          "description": "For the --spliced_exons option, extract additional <N> bases both upstream and \ndownstream of the transcript boundaries.\n",
          "help_text": "Type: `integer`, multiple: `False`. "
        },
        "w_nocds": {
          "type": "boolean",
          "description": "For --spliced_exons, disable the output of CDS info in the FASTA file.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "spliced_cds": {
          "type": "string",
          "format": "path",
          "description": "Write a fasta file with spliced CDS for each GFF transcript.\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"cds.fa\"`. "
        },
        "tr_cds": {
          "type": "string",
          "format": "path",
          "description": "Write a protein fasta file with the translation of CDS for each record.\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`, example: `\"tr_cds.fa\"`. "
        },
        "w_coords": {
          "type": "boolean",
          "description": "For --spliced_exons, --spliced_cds and -tr_cds options, write in the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "stop_dot": {
          "type": "boolean",
          "description": "For --tr_cds option, use '*' instead of '.' as stop codon translation.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "id_version": {
          "type": "boolean",
          "description": "Ensembl GTF to GFF3 conversion, adds version to IDs.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "trackname": {
          "type": "string",
          "description": "Use <trackname> in the 2nd column of each GFF/GTF output line.\n",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "gtf_output": {
          "type": "boolean",
          "description": "Main output will be GTF instead of GFF3.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "bed": {
          "type": "boolean",
          "description": "Output records in BED format instead of default GFF3.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "tlf": {
          "type": "boolean",
          "description": "Output \"transcript line format\" which is like GFF but with exons and CDS related \nfeatures stored as GFF attributes in the transcript feature line, like this:\n  exoncount=N;exons=<exons>;CDSphase=<N>;CDS=<CDScoords>\n<exons> is a comma-delimited list of exon_start-exon_end coordinates;\n<CDScoords> is CDS_start:CDS_end coordinates or a list like <exons>.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "table": {
          "type": "array",
          "items": {
            "type": "string"
          },
          "description": "Output a simple tab delimited format instead of GFF, with columns having the values \nof GFF attributes given in <attrlist>; special pseudo-attributes (prefixed by @) are \nrecognized:\n  @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds FASTA output files are enabled, the \nsame fields (excluding @id) are appended to the definition line of corresponding FASTA\nrecords.\n",
          "help_text": "Type: `string`, multiple: `True`. "
        },
        "expose_dups": {
          "type": "boolean",
          "description": "Expose (warn about) duplicate transcript IDs and other potential problems with the \ngiven GFF/GTF records.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        }
      }
    },
    "options": {
      "title": "Options",
      "type": "object",
      "description": "No description",
      "properties": {
        "ids": {
          "type": "string",
          "format": "path",
          "description": "Discard records/transcripts if their IDs are not listed in <IDs.lst>.\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`. "
        },
        "nids": {
          "type": "string",
          "format": "path",
          "description": "Discard records/transcripts if their IDs are listed in <IDs.lst>.\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`. "
        },
        "maxintron": {
          "type": "integer",
          "description": "Discard transcripts having an intron larger than <maxintron>.\n",
          "help_text": "Type: `integer`, multiple: `False`. "
        },
        "minlen": {
          "type": "integer",
          "description": "Discard transcripts shorter than <minlen> bases.\n",
          "help_text": "Type: `integer`, multiple: `False`. "
        },
        "range": {
          "type": "string",
          "description": "Only show transcripts overlapping coordinate range <start>..<end> (on chromosome/contig \n<chr>, strand <strand> if provided).\n",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "strict_range": {
          "type": "boolean",
          "description": "For --range option, discard all transcripts that are not fully contained within the given \nrange.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "jmatch": {
          "type": "string",
          "description": "Only output transcripts matching the given junction.\n",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "no_single_exon": {
          "type": "boolean",
          "description": "Discard single-exon transcripts.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "coding": {
          "type": "boolean",
          "description": "Coding only: discard mRNAs that have no CDS features.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "nc": {
          "type": "boolean",
          "description": "Non-coding only: discard mRNAs that have CDS features.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "ignore_locus": {
          "type": "boolean",
          "description": "Discard locus features and attributes found in the input.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "description": {
          "type": "boolean",
          "description": "Use the description field from <seq_info.fsize> and add it as the value for a 'descr' \nattribute to the GFF record.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        }
      }
    },
    "sorting": {
      "title": "Sorting",
      "type": "object",
      "description": "No description",
      "properties": {
        "sort_alpha": {
          "type": "boolean",
          "description": "Chromosomes (reference sequences) are sorted alphabetically.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "sort_by": {
          "type": "string",
          "format": "path",
          "description": "Sort the reference sequences by the order in which their names are given in the \n<refseq.lst> file.\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`. "
        }
      }
    },
    "misc options": {
      "title": "Misc options",
      "type": "object",
      "description": "No description",
      "properties": {
        "keep_attrs": {
          "type": "boolean",
          "description": "Keep all GFF attributes (for non-exon features).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "keep_exon_attrs": {
          "type": "boolean",
          "description": "For -F option, do not attempt to reduce redundant exon/CDS attributes.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "no_exon_attrs": {
          "type": "boolean",
          "description": "Do not keep exon attributes, move them to the transcript feature (for GFF3 output).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "attrs": {
          "type": "string",
          "description": "Only output the GTF/GFF attributes listed in <attr-list> which is a comma delimited \nlist of attribute names to.\n",
          "help_text": "Type: `string`, multiple: `False`. "
        },
        "keep_genes": {
          "type": "boolean",
          "description": "In transcript-only mode (default), also preserve gene records.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "keep_comments": {
          "type": "boolean",
          "description": "For GFF3 input/output, try to preserve comments.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "process_other": {
          "type": "boolean",
          "description": "process other non-transcript GFF records (by default non-transcript records are ignored).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "rm_stop_codons": {
          "type": "boolean",
          "description": "Discard any mRNAs with CDS having in-frame stop codons (requires --genome).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "adj_cds_start": {
          "type": "boolean",
          "description": "For --rm_stop_codons option, check and adjust the starting CDS phase if the original phase\nleads to a translation with an in-frame stop codon.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "opposite_strand": {
          "type": "boolean",
          "description": "For -V option, single-exon transcripts are also checked on the opposite strand (requires \n--genome)",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "coding_status": {
          "type": "boolean",
          "description": "Add transcript level GFF attributes about the coding status of each transcript, including \npartialness or in-frame stop codons (requires --genome).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "add_hasCDS": {
          "type": "boolean",
          "description": "Add a \"hasCDS\" attribute with value \"true\" for transcripts that have CDS features",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "adj_stop": {
          "type": "boolean",
          "description": "Stop codon adjustment: enables --coding_status and performs automatic adjustment of the CDS stop \ncoordinate if premature or downstream.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "rm_noncanon": {
          "type": "boolean",
          "description": "Discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus \n(i.e",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "complete_cds": {
          "type": "boolean",
          "description": "Discard any mRNAs that either lack initial START codon or the terminal STOP codon, or \nhave an in-frame stop codon (i.e",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "no_pseudo": {
          "type": "boolean",
          "description": "Filter out records matching the 'pseudo' keyword.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "in_bed": {
          "type": "boolean",
          "description": "Input should be parsed as BED format (automatic if the input filename ends with .bed*).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "in_tlf": {
          "type": "boolean",
          "description": "Input GFF-like one-line-per-transcript format without exon/CDS features (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "stream": {
          "type": "boolean",
          "description": "Fast processing of input GFF/BED transcripts as they are received (no sorting, exons must \nbe grouped by transcript in the input data).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        }
      }
    },
    "clustering": {
      "title": "Clustering",
      "type": "object",
      "description": "No description",
      "properties": {
        "merge": {
          "type": "boolean",
          "description": "Cluster the input transcripts into loci, discarding \"redundant\" transcripts (those with \nthe same exact introns and fully contained or equal boundaries).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "dupinfo": {
          "type": "string",
          "format": "path",
          "description": "For --merge option, write duplication info to file <dupinfo>.\n",
          "help_text": "Type: `file`, multiple: `False`, direction: `input`. "
        },
        "cluster_only": {
          "type": "boolean",
          "description": "Same as --merge but without discarding any of the \"duplicate\" transcripts, only create \n\"locus\" features.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "rm_redundant": {
          "type": "boolean",
          "description": "For --merge option: also discard as redundant the shorter, fully contained transcripts (intron \nchains matching a part of the container).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "no_boundary": {
          "type": "boolean",
          "description": "For --merge option, no longer require boundary containment when assessing redundancy (can be \ncombined with --rm_redundant); only introns have to match for multi-exon transcripts, and >=80%\noverlap for single-exon transcripts.\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        },
        "no_overlap": {
          "type": "boolean",
          "description": "For --merge option, enforce --no_boundary but also discard overlapping single-exon transcripts,\neven on the opposite strand (can be combined with --rm_redudant).\n",
          "help_text": "Type: `boolean_true`, multiple: `False`, default: `false`. ",
          "default": false
        }
      }
    },
    "nextflow input-output arguments": {
      "title": "Nextflow input-output arguments",
      "type": "object",
      "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
      "properties": {
        "publish_dir": {
          "type": "string",
          "description": "Path to an output directory.",
          "help_text": "Type: `string`, multiple: `False`, required, example: `\"output/\"`. "
        }
      }
    }
  },
  "allOf": [
    {
      "$ref": "#/$defs/inputs"
    },
    {
      "$ref": "#/$defs/outputs"
    },
    {
      "$ref": "#/$defs/options"
    },
    {
      "$ref": "#/$defs/sorting"
    },
    {
      "$ref": "#/$defs/misc options"
    },
    {
      "$ref": "#/$defs/clustering"
    },
    {
      "$ref": "#/$defs/nextflow input-output arguments"
    }
  ]
}