Build pipeline: viash-hub.biobox.main-bjgnf
Source commit: 2b29a47575
Source message: Bedtools GroupBY (#123)
* Initial Commit
* Update config.vsh.yaml
* config file
* script.sh
* adding some tests
* more test
* Update CHANGELOG.md
* deleted test_data
* bug fix
* Update config.vsh.yaml
* adding more links
* exit on error
* $TMPDIR
* Update script.sh
* Update config.vsh.yaml
* Suggested change on column option
---------
Co-authored-by: Jakub Majercik <57993790+jakubmajercik@users.noreply.github.com>
817 lines
35 KiB
JSON
817 lines
35 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema",
|
|
"title": "gffread",
|
|
"description": "Validate, filter, convert and perform various other operations on GFF files.",
|
|
"type": "object",
|
|
"definitions": {
|
|
|
|
|
|
|
|
"inputs" : {
|
|
"title": "Inputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"input": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, example: `annotation.gff`. A reference file in either the GFF3, GFF2 or GTF format",
|
|
"help_text": "Type: `file`, required, example: `annotation.gff`. A reference file in either the GFF3, GFF2 or GTF format.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chr_mapping": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. \u003cchr_replace\u003e is a name mapping table for converting reference sequence names, \nhaving this 2-column format: \u003coriginal_ref_ID\u003e \u003cnew_ref_ID\u003e",
|
|
"help_text": "Type: `file`. \u003cchr_replace\u003e is a name mapping table for converting reference sequence names, \nhaving this 2-column format: \u003coriginal_ref_ID\u003e \u003cnew_ref_ID\u003e.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seq_info": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. \u003cseq_info",
|
|
"help_text": "Type: `file`. \u003cseq_info.fsize\u003e is a tab-delimited file providing this info for each of the mapped \nsequences: \u003cseq-name\u003e \u003cseq-length\u003e \u003cseq-description\u003e (useful for --description option with \nmRNA/EST/protein mappings).\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"genome": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, example: `genome.fa`. Full path to a multi-fasta file with the genomic sequences for all input mappings, \nOR a directory with single-fasta files (one per genomic sequence, with file names \nmatching sequence names)",
|
|
"help_text": "Type: `file`, example: `genome.fa`. Full path to a multi-fasta file with the genomic sequences for all input mappings, \nOR a directory with single-fasta files (one per genomic sequence, with file names \nmatching sequence names).\n"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"outputs" : {
|
|
"title": "Outputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"outfile": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, default: `$id.$key.outfile.gff`, example: `output.gff`. Write the output records into \u003coutfile\u003e",
|
|
"help_text": "Type: `file`, required, default: `$id.$key.outfile.gff`, example: `output.gff`. Write the output records into \u003coutfile\u003e.\n"
|
|
,
|
|
"default": "$id.$key.outfile.gff"
|
|
}
|
|
|
|
|
|
,
|
|
"force_exons": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Make sure that the lowest level GFF features are considered \"exon\" features",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Make sure that the lowest level GFF features are considered \"exon\" features.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"gene2exon": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For single-line genes not parenting any transcripts, add an exon feature spanning \nthe entire gene (treat it as a transcript)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For single-line genes not parenting any transcripts, add an exon feature spanning \nthe entire gene (treat it as a transcript).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"t_adopt": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Try to find a parent gene overlapping/containing a transcript that does not have \nany explicit gene Parent",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Try to find a parent gene overlapping/containing a transcript that does not have \nany explicit gene Parent.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"decode": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Decode url encoded characters within attributes",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Decode url encoded characters within attributes.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"merge_exons": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Merge very close exons into a single exon (when intron size\u003c4)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Merge very close exons into a single exon (when intron size\u003c4).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"junctions": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Output the junctions and the corresponding transcripts",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Output the junctions and the corresponding transcripts.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"spliced_exons": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.spliced_exons.fa`, example: `exons.fa`. Write a fasta file with spliced exons for each transcript",
|
|
"help_text": "Type: `file`, default: `$id.$key.spliced_exons.fa`, example: `exons.fa`. Write a fasta file with spliced exons for each transcript.\n"
|
|
,
|
|
"default": "$id.$key.spliced_exons.fa"
|
|
}
|
|
|
|
|
|
,
|
|
"w_add": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. For the --spliced_exons option, extract additional \u003cN\u003e bases both upstream and \ndownstream of the transcript boundaries",
|
|
"help_text": "Type: `integer`. For the --spliced_exons option, extract additional \u003cN\u003e bases both upstream and \ndownstream of the transcript boundaries.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"w_nocds": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For --spliced_exons, disable the output of CDS info in the FASTA file",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For --spliced_exons, disable the output of CDS info in the FASTA file.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"spliced_cds": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, example: `cds.fa`. Write a fasta file with spliced CDS for each GFF transcript",
|
|
"help_text": "Type: `file`, example: `cds.fa`. Write a fasta file with spliced CDS for each GFF transcript.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"tr_cds": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, example: `tr_cds.fa`. Write a protein fasta file with the translation of CDS for each record",
|
|
"help_text": "Type: `file`, example: `tr_cds.fa`. Write a protein fasta file with the translation of CDS for each record.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"w_coords": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For --spliced_exons, --spliced_cds and -tr_cds options, write in the FASTA defline \nall the exon coordinates projected onto the spliced sequence",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For --spliced_exons, --spliced_cds and -tr_cds options, write in the FASTA defline \nall the exon coordinates projected onto the spliced sequence.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"stop_dot": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For --tr_cds option, use \u0027*\u0027 instead of \u0027",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For --tr_cds option, use \u0027*\u0027 instead of \u0027.\u0027 as stop codon translation.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"id_version": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Ensembl GTF to GFF3 conversion, adds version to IDs",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Ensembl GTF to GFF3 conversion, adds version to IDs.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"trackname": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Use \u003ctrackname\u003e in the 2nd column of each GFF/GTF output line",
|
|
"help_text": "Type: `string`. Use \u003ctrackname\u003e in the 2nd column of each GFF/GTF output line.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"gtf_output": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Main output will be GTF instead of GFF3",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Main output will be GTF instead of GFF3.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"bed": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Output records in BED format instead of default GFF3",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Output records in BED format instead of default GFF3.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"tlf": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Output \"transcript line format\" which is like GFF but with exons and CDS related \nfeatures stored as GFF attributes in the transcript feature line, like this:\n exoncount=N;exons=\u003cexons\u003e;CDSphase=\u003cN\u003e;CDS=\u003cCDScoords\u003e\n\u003cexons\u003e is a comma-delimited list of exon_start-exon_end coordinates;\n\u003cCDScoords\u003e is CDS_start:CDS_end coordinates or a list like \u003cexons\u003e",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Output \"transcript line format\" which is like GFF but with exons and CDS related \nfeatures stored as GFF attributes in the transcript feature line, like this:\n exoncount=N;exons=\u003cexons\u003e;CDSphase=\u003cN\u003e;CDS=\u003cCDScoords\u003e\n\u003cexons\u003e is a comma-delimited list of exon_start-exon_end coordinates;\n\u003cCDScoords\u003e is CDS_start:CDS_end coordinates or a list like \u003cexons\u003e.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"table": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. Output a simple tab delimited format instead of GFF, with columns having the values \nof GFF attributes given in \u003cattrlist\u003e; special pseudo-attributes (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds FASTA output files are enabled, the \nsame fields (excluding @id) are appended to the definition line of corresponding FASTA\nrecords",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. Output a simple tab delimited format instead of GFF, with columns having the values \nof GFF attributes given in \u003cattrlist\u003e; special pseudo-attributes (prefixed by @) are \nrecognized:\n @id, @geneid, @chr, @start, @end, @strand, @numexons, @exons, @cds, @covlen, @cdslen\nIf any of --spliced_exons/--tr_cds/--spliced_cds FASTA output files are enabled, the \nsame fields (excluding @id) are appended to the definition line of corresponding FASTA\nrecords.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"expose_dups": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Expose (warn about) duplicate transcript IDs and other potential problems with the \ngiven GFF/GTF records",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Expose (warn about) duplicate transcript IDs and other potential problems with the \ngiven GFF/GTF records.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"options" : {
|
|
"title": "Options",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"ids": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Discard records/transcripts if their IDs are not listed in \u003cIDs",
|
|
"help_text": "Type: `file`. Discard records/transcripts if their IDs are not listed in \u003cIDs.lst\u003e.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"nids": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Discard records/transcripts if their IDs are listed in \u003cIDs",
|
|
"help_text": "Type: `file`. Discard records/transcripts if their IDs are listed in \u003cIDs.lst\u003e.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"maxintron": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. Discard transcripts having an intron larger than \u003cmaxintron\u003e",
|
|
"help_text": "Type: `integer`. Discard transcripts having an intron larger than \u003cmaxintron\u003e.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"minlen": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. Discard transcripts shorter than \u003cminlen\u003e bases",
|
|
"help_text": "Type: `integer`. Discard transcripts shorter than \u003cminlen\u003e bases.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"range": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Only show transcripts overlapping coordinate range \u003cstart\u003e",
|
|
"help_text": "Type: `string`. Only show transcripts overlapping coordinate range \u003cstart\u003e..\u003cend\u003e (on chromosome/contig \n\u003cchr\u003e, strand \u003cstrand\u003e if provided).\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"strict_range": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For --range option, discard all transcripts that are not fully contained within the given \nrange",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For --range option, discard all transcripts that are not fully contained within the given \nrange.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"jmatch": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Only output transcripts matching the given junction",
|
|
"help_text": "Type: `string`. Only output transcripts matching the given junction.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"no_single_exon": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Discard single-exon transcripts",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Discard single-exon transcripts.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"coding": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Coding only: discard mRNAs that have no CDS features",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Coding only: discard mRNAs that have no CDS features.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"nc": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Non-coding only: discard mRNAs that have CDS features",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Non-coding only: discard mRNAs that have CDS features.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"ignore_locus": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Discard locus features and attributes found in the input",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Discard locus features and attributes found in the input.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"description": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Use the description field from \u003cseq_info",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Use the description field from \u003cseq_info.fsize\u003e and add it as the value for a \u0027descr\u0027 \nattribute to the GFF record.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"sorting" : {
|
|
"title": "Sorting",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"sort_alpha": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Chromosomes (reference sequences) are sorted alphabetically",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Chromosomes (reference sequences) are sorted alphabetically.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"sort_by": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. Sort the reference sequences by the order in which their names are given in the \n\u003crefseq",
|
|
"help_text": "Type: `file`. Sort the reference sequences by the order in which their names are given in the \n\u003crefseq.lst\u003e file.\n"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"misc options" : {
|
|
"title": "Misc options",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"keep_attrs": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Keep all GFF attributes (for non-exon features)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Keep all GFF attributes (for non-exon features).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"keep_exon_attrs": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For -F option, do not attempt to reduce redundant exon/CDS attributes",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For -F option, do not attempt to reduce redundant exon/CDS attributes.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"no_exon_attrs": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Do not keep exon attributes, move them to the transcript feature (for GFF3 output)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Do not keep exon attributes, move them to the transcript feature (for GFF3 output).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"attrs": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Only output the GTF/GFF attributes listed in \u003cattr-list\u003e which is a comma delimited \nlist of attribute names to",
|
|
"help_text": "Type: `string`. Only output the GTF/GFF attributes listed in \u003cattr-list\u003e which is a comma delimited \nlist of attribute names to.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"keep_genes": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. In transcript-only mode (default), also preserve gene records",
|
|
"help_text": "Type: `boolean_true`, default: `false`. In transcript-only mode (default), also preserve gene records.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"keep_comments": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For GFF3 input/output, try to preserve comments",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For GFF3 input/output, try to preserve comments.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"process_other": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. process other non-transcript GFF records (by default non-transcript records are ignored)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. process other non-transcript GFF records (by default non-transcript records are ignored).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"rm_stop_codons": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Discard any mRNAs with CDS having in-frame stop codons (requires --genome)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Discard any mRNAs with CDS having in-frame stop codons (requires --genome).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"adj_cds_start": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For --rm_stop_codons option, check and adjust the starting CDS phase if the original phase\nleads to a translation with an in-frame stop codon",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For --rm_stop_codons option, check and adjust the starting CDS phase if the original phase\nleads to a translation with an in-frame stop codon.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"opposite_strand": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For -V option, single-exon transcripts are also checked on the opposite strand (requires \n--genome)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For -V option, single-exon transcripts are also checked on the opposite strand (requires \n--genome). \n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"coding_status": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Add transcript level GFF attributes about the coding status of each transcript, including \npartialness or in-frame stop codons (requires --genome)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Add transcript level GFF attributes about the coding status of each transcript, including \npartialness or in-frame stop codons (requires --genome).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"add_hasCDS": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Add a \"hasCDS\" attribute with value \"true\" for transcripts that have CDS features",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Add a \"hasCDS\" attribute with value \"true\" for transcripts that have CDS features. \n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"adj_stop": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Stop codon adjustment: enables --coding_status and performs automatic adjustment of the CDS stop \ncoordinate if premature or downstream",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Stop codon adjustment: enables --coding_status and performs automatic adjustment of the CDS stop \ncoordinate if premature or downstream.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"rm_noncanon": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus \n(i",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Discard multi-exon mRNAs that have any intron with a non-canonical splice site consensus \n(i.e. not GT-AG, GC-AG or AT-AC).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"complete_cds": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Discard any mRNAs that either lack initial START codon or the terminal STOP codon, or \nhave an in-frame stop codon (i",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Discard any mRNAs that either lack initial START codon or the terminal STOP codon, or \nhave an in-frame stop codon (i.e. only print mRNAs with a complete CDS).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"no_pseudo": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Filter out records matching the \u0027pseudo\u0027 keyword",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Filter out records matching the \u0027pseudo\u0027 keyword.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"in_bed": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Input should be parsed as BED format (automatic if the input filename ends with ",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Input should be parsed as BED format (automatic if the input filename ends with .bed*).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"in_tlf": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Input GFF-like one-line-per-transcript format without exon/CDS features (see --tlf option \nbelow); automatic if the input filename ends with ",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Input GFF-like one-line-per-transcript format without exon/CDS features (see --tlf option \nbelow); automatic if the input filename ends with .tlf).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"stream": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Fast processing of input GFF/BED transcripts as they are received (no sorting, exons must \nbe grouped by transcript in the input data)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Fast processing of input GFF/BED transcripts as they are received (no sorting, exons must \nbe grouped by transcript in the input data).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"clustering" : {
|
|
"title": "Clustering",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"merge": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Cluster the input transcripts into loci, discarding \"redundant\" transcripts (those with \nthe same exact introns and fully contained or equal boundaries)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Cluster the input transcripts into loci, discarding \"redundant\" transcripts (those with \nthe same exact introns and fully contained or equal boundaries).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"dupinfo": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. For --merge option, write duplication info to file \u003cdupinfo\u003e",
|
|
"help_text": "Type: `file`. For --merge option, write duplication info to file \u003cdupinfo\u003e.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"cluster_only": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. Same as --merge but without discarding any of the \"duplicate\" transcripts, only create \n\"locus\" features",
|
|
"help_text": "Type: `boolean_true`, default: `false`. Same as --merge but without discarding any of the \"duplicate\" transcripts, only create \n\"locus\" features.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"rm_redundant": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For --merge option: also discard as redundant the shorter, fully contained transcripts (intron \nchains matching a part of the container)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For --merge option: also discard as redundant the shorter, fully contained transcripts (intron \nchains matching a part of the container).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"no_boundary": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For --merge option, no longer require boundary containment when assessing redundancy (can be \ncombined with --rm_redundant); only introns have to match for multi-exon transcripts, and \u003e=80%\noverlap for single-exon transcripts",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For --merge option, no longer require boundary containment when assessing redundancy (can be \ncombined with --rm_redundant); only introns have to match for multi-exon transcripts, and \u003e=80%\noverlap for single-exon transcripts.\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
,
|
|
"no_overlap": {
|
|
"type":
|
|
"boolean",
|
|
"description": "Type: `boolean_true`, default: `false`. For --merge option, enforce --no_boundary but also discard overlapping single-exon transcripts,\neven on the opposite strand (can be combined with --rm_redudant)",
|
|
"help_text": "Type: `boolean_true`, default: `false`. For --merge option, enforce --no_boundary but also discard overlapping single-exon transcripts,\neven on the opposite strand (can be combined with --rm_redudant).\n"
|
|
,
|
|
"default": "False"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"nextflow input-output arguments" : {
|
|
"title": "Nextflow input-output arguments",
|
|
"type": "object",
|
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
|
"properties": {
|
|
|
|
|
|
"publish_dir": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"param_list": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
|
"hidden": true
|
|
|
|
}
|
|
|
|
|
|
}
|
|
}
|
|
},
|
|
"allOf": [
|
|
|
|
{
|
|
"$ref": "#/definitions/inputs"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/outputs"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/options"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/sorting"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/misc options"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/clustering"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/nextflow input-output arguments"
|
|
}
|
|
]
|
|
}
|