Files
biobox/target/nextflow/featurecounts/nextflow_schema.json
CI 47c9784250 Build branch v0.3 with version v0.3.0 (d86bd5c)
Build pipeline: viash-hub.biobox.v0.3-cl68g

Source commit: d86bd5cf62

Source message: Merge remote-tracking branch 'origin/main' into v0.3
2024-12-03 11:09:46 +00:00

727 lines
29 KiB
JSON

{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "featurecounts",
"description": "featureCounts is a read summarization program for counting reads generated from either RNA or genomic DNA sequencing experiments by implementing highly efficient chromosome hashing and feature blocking techniques. It works with either single or paired-end reads and provides a wide range of options appropriate for different sequencing applications.\n",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "No description",
"properties": {
"annotation": {
"type":
"string",
"description": "Type: `file`, required, example: `annotation.gtf`. Name of an annotation file",
"help_text": "Type: `file`, required, example: `annotation.gtf`. Name of an annotation file. GTF/GFF format by default. See \u0027--format\u0027 option for more format information.\n"
}
,
"input": {
"type":
"string",
"description": "Type: List of `file`, required, example: `input_file1.bam`, multiple_sep: `\";\"`. A list of SAM or BAM format files separated by semi-colon (;)",
"help_text": "Type: List of `file`, required, example: `input_file1.bam`, multiple_sep: `\";\"`. A list of SAM or BAM format files separated by semi-colon (;). They can be either name or location sorted. Location-sorted paired-end reads are automatically sorted by read names.\n"
}
}
},
"outputs" : {
"title": "Outputs",
"type": "object",
"description": "No description",
"properties": {
"counts": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.counts.tsv`, example: `features.tsv`. Name of output file including read counts in tab delimited format",
"help_text": "Type: `file`, required, default: `$id.$key.counts.tsv`, example: `features.tsv`. Name of output file including read counts in tab delimited format.\n"
,
"default":"$id.$key.counts.tsv"
}
,
"summary": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.summary.tsv`, example: `summary.tsv`. Summary statistics of counting results in tab delimited format",
"help_text": "Type: `file`, default: `$id.$key.summary.tsv`, example: `summary.tsv`. Summary statistics of counting results in tab delimited format.\n"
,
"default":"$id.$key.summary.tsv"
}
,
"junctions": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.junctions.txt`, example: `junctions.txt`. Count number of reads supporting each exon-exon junction",
"help_text": "Type: `file`, default: `$id.$key.junctions.txt`, example: `junctions.txt`. Count number of reads supporting each exon-exon junction. Junctions were identified from those exon-spanning reads in the input (containing \u0027N\u0027 in CIGAR string).\n"
,
"default":"$id.$key.junctions.txt"
}
}
},
"annotation" : {
"title": "Annotation",
"type": "object",
"description": "No description",
"properties": {
"format": {
"type":
"string",
"description": "Type: `string`, example: `GTF`, choices: ``GTF`, `GFF`, `SAF``. Specify format of the provided annotation file",
"help_text": "Type: `string`, example: `GTF`, choices: ``GTF`, `GFF`, `SAF``. Specify format of the provided annotation file. Acceptable formats include \u0027GTF\u0027 (or compatible GFF format) and \u0027SAF\u0027. \u0027GTF\u0027 by default. \n",
"enum": ["GTF", "GFF", "SAF"]
}
,
"feature_type": {
"type":
"string",
"description": "Type: List of `string`, example: `exon`, multiple_sep: `\";\"`. Specify feature type(s) in a GTF annotation",
"help_text": "Type: List of `string`, example: `exon`, multiple_sep: `\";\"`. Specify feature type(s) in a GTF annotation. If multiple types are provided, they should be separated by \u0027;\u0027 with no space in between. \u0027exon\u0027 by default. Rows in the annotation with a matched feature will be extracted and used for read mapping.\n"
}
,
"attribute_type": {
"type":
"string",
"description": "Type: `string`, example: `gene_id`. Specify attribute type in GTF annotation",
"help_text": "Type: `string`, example: `gene_id`. Specify attribute type in GTF annotation. \u0027gene_id\u0027 by default. Meta-features used for read counting will be extracted from annotation using the provided value.\n"
}
,
"extra_attributes": {
"type":
"string",
"description": "Type: List of `string`, multiple_sep: `\";\"`. Extract extra attribute types from the provided GTF annotation and include them in the counting output",
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. Extract extra attribute types from the provided GTF annotation and include them in the counting output. These attribute types will not be used to group features. If more than one attribute type is provided they should be separated by semicolon (;).\n"
}
,
"chrom_alias": {
"type":
"string",
"description": "Type: `file`, example: `chrom_alias.csv`. Provide a chromosome name alias file to match chr names in annotation with those in the reads",
"help_text": "Type: `file`, example: `chrom_alias.csv`. Provide a chromosome name alias file to match chr names in annotation with those in the reads. This should be a two-column comma-delimited text file. Its first column should include chr names in the annotation and its second column should include chr names in the reads. Chr names are case sensitive. No column header should be included in the file.\n"
}
}
},
"level of summarization" : {
"title": "Level of summarization",
"type": "object",
"description": "No description",
"properties": {
"feature_level": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Perform read counting at feature level (eg",
"help_text": "Type: `boolean_true`, default: `false`. Perform read counting at feature level (eg. counting reads for exons rather than genes).\n"
,
"default":false
}
}
},
"overlap between reads and features" : {
"title": "Overlap between reads and features",
"type": "object",
"description": "No description",
"properties": {
"overlapping": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Assign reads to all their overlapping meta-features (or features if \u0027--feature_level\u0027 is specified)",
"help_text": "Type: `boolean_true`, default: `false`. Assign reads to all their overlapping meta-features (or features if \u0027--feature_level\u0027 is specified).\n"
,
"default":false
}
,
"min_overlap": {
"type":
"integer",
"description": "Type: `integer`, example: `1`. Minimum number of overlapping bases in a read that is required for read assignment",
"help_text": "Type: `integer`, example: `1`. Minimum number of overlapping bases in a read that is required for read assignment. 1 by default. Number of overlapping bases is counted from both reads if paired end. If a negative value is provided, then a gap of up to specified size will be allowed between read and the feature that the read is assigned to.\n"
}
,
"frac_overlap": {
"type":
"number",
"description": "Type: `double`, example: `0.0`. Minimum fraction of overlapping bases in a read that is required for read assignment",
"help_text": "Type: `double`, example: `0.0`. Minimum fraction of overlapping bases in a read that is required for read assignment. Value should be within range [0,1]. 0 by default. Number of overlapping bases is counted from both reads if paired end. Both this option and \u0027--min_overlap\u0027 option need to be satisfied for read assignment.\n"
}
,
"frac_overlap_feature": {
"type":
"number",
"description": "Type: `double`, example: `0.0`. Minimum fraction of overlapping bases in a feature that is required for read assignment",
"help_text": "Type: `double`, example: `0.0`. Minimum fraction of overlapping bases in a feature that is required for read assignment. Value should be within range [0,1]. 0 by default.\n"
}
,
"largest_overlap": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Assign reads to a meta-feature/feature that has the largest number of overlapping bases",
"help_text": "Type: `boolean_true`, default: `false`. Assign reads to a meta-feature/feature that has the largest number of overlapping bases.\n"
,
"default":false
}
,
"non_overlap": {
"type":
"integer",
"description": "Type: `integer`. Maximum number of non-overlapping bases in a read (or a read pair) that is allowed when being assigned to a feature",
"help_text": "Type: `integer`. Maximum number of non-overlapping bases in a read (or a read pair) that is allowed when being assigned to a feature. No limit is set by default.\n"
}
,
"non_overlap_feature": {
"type":
"integer",
"description": "Type: `integer`. Maximum number of non-overlapping bases in a feature that is allowed in read assignment",
"help_text": "Type: `integer`. Maximum number of non-overlapping bases in a feature that is allowed in read assignment. No limit is set by default.\n"
}
,
"read_extension5": {
"type":
"integer",
"description": "Type: `integer`. Reads are extended upstream by \u003cint\u003e bases from their 5\u0027 end",
"help_text": "Type: `integer`. Reads are extended upstream by \u003cint\u003e bases from their 5\u0027 end.\n"
}
,
"read_extension3": {
"type":
"integer",
"description": "Type: `integer`. Reads are extended upstream by \u003cint\u003e bases from their 3\u0027 end",
"help_text": "Type: `integer`. Reads are extended upstream by \u003cint\u003e bases from their 3\u0027 end.\n"
}
,
"read2pos": {
"type":
"integer",
"description": "Type: `integer`, choices: ``3`, `5``. Reduce reads to their 5\u0027 most base or 3\u0027 most base",
"help_text": "Type: `integer`, choices: ``3`, `5``. Reduce reads to their 5\u0027 most base or 3\u0027 most base. Read counting is then performed based on the single base the read is reduced to.\n",
"enum": [3, 5]
}
}
},
"multi-mapping reads" : {
"title": "Multi-mapping reads",
"type": "object",
"description": "No description",
"properties": {
"multi_mapping": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Multi-mapping reads will also be counted",
"help_text": "Type: `boolean_true`, default: `false`. Multi-mapping reads will also be counted. For a multi-mapping read, all its reported alignments will be counted. The \u0027NH\u0027 tag in BAM/SAM input is used to detect multi-mapping reads.\n"
,
"default":false
}
}
},
"fractional counting" : {
"title": "Fractional counting",
"type": "object",
"description": "No description",
"properties": {
"fraction": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Assign fractional counts to features",
"help_text": "Type: `boolean_true`, default: `false`. Assign fractional counts to features. This option must be used together with \u0027--multi_mapping\u0027 or \u0027--overlapping\u0027 or both. When \u0027--multi_mapping\u0027 is specified, each reported alignment from a multi-mapping read (identified via \u0027NH\u0027 tag) will carry a fractional count of 1/x, instead of 1 (one), where x is the total number of alignments reported for the same read. When \u0027--overlapping\u0027 is specified, each overlapping feature will receive a fractional count of 1/y, where y is the total number of features overlapping with the read. When both \u0027--multi_mapping\u0027 and \u0027--overlapping\u0027 are specified, each alignment will carry a fractional count of 1/(x*y).\n"
,
"default":false
}
}
},
"read filtering" : {
"title": "Read filtering",
"type": "object",
"description": "No description",
"properties": {
"min_map_quality": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. The minimum mapping quality score a read must satisfy in order to be counted",
"help_text": "Type: `integer`, example: `0`. The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 0 by default.\n"
}
,
"split_only": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Count split alignments only (ie",
"help_text": "Type: `boolean_true`, default: `false`. Count split alignments only (ie. alignments with CIGAR string containing \u0027N\u0027). An example of split alignments is exon-spanning reads in RNA-seq data.\n"
,
"default":false
}
,
"non_split_only": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. If specified, only non-split alignments (CIGAR strings do not contain letter \u0027N\u0027) will be counted",
"help_text": "Type: `boolean_true`, default: `false`. If specified, only non-split alignments (CIGAR strings do not contain letter \u0027N\u0027) will be counted. All the other alignments will be ignored.\n"
,
"default":false
}
,
"primary": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Count primary alignments only",
"help_text": "Type: `boolean_true`, default: `false`. Count primary alignments only. Primary alignments are identified using bit 0x100 in SAM/BAM FLAG field.\n"
,
"default":false
}
,
"ignore_dup": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Ignore duplicate reads in read counting",
"help_text": "Type: `boolean_true`, default: `false`. Ignore duplicate reads in read counting. Duplicate reads are identified using bit Ox400 in BAM/SAM FLAG field. The whole read pair is ignored if one of the reads is a duplicate read for paired end data.\n"
,
"default":false
}
}
},
"strandedness" : {
"title": "Strandedness",
"type": "object",
"description": "No description",
"properties": {
"strand": {
"type":
"integer",
"description": "Type: `integer`, example: `0`, choices: ``0`, `1`, `2``. Perform strand-specific read counting",
"help_text": "Type: `integer`, example: `0`, choices: ``0`, `1`, `2``. Perform strand-specific read counting. A single integer value (applied to all input files) should be provided. Possible values include: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). Default value is 0 (ie. unstranded read counting carried out for all input files).\n",
"enum": [0, 1, 2]
}
}
},
"exon-exon junctions" : {
"title": "Exon-exon junctions",
"type": "object",
"description": "No description",
"properties": {
"ref_fasta": {
"type":
"string",
"description": "Type: `file`, example: `reference.fasta`. Provide the name of a FASTA-format file that contains the reference sequences used in read mapping that produced the provided SAM/BAM files",
"help_text": "Type: `file`, example: `reference.fasta`. Provide the name of a FASTA-format file that contains the reference sequences used in read mapping that produced the provided SAM/BAM files.\n"
}
}
},
"parameters specific to paired end reads" : {
"title": "Parameters specific to paired end reads",
"type": "object",
"description": "No description",
"properties": {
"paired": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Specify that input data contain paired-end reads",
"help_text": "Type: `boolean_true`, default: `false`. Specify that input data contain paired-end reads. To perform fragment counting (ie. counting read pairs), the \u0027--countReadPairs\u0027 parameter should also be specified in addition to this parameter.\n"
,
"default":false
}
,
"count_read_pairs": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Count read pairs (fragments) instead of reads",
"help_text": "Type: `boolean_true`, default: `false`. Count read pairs (fragments) instead of reads. This option is only applicable for paired-end reads.\n"
,
"default":false
}
,
"both_aligned": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Count read pairs (fragments) instead of reads",
"help_text": "Type: `boolean_true`, default: `false`. Count read pairs (fragments) instead of reads. This option is only applicable for paired-end reads.\n"
,
"default":false
}
,
"check_pe_dist": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Check validity of paired-end distance when counting read pairs",
"help_text": "Type: `boolean_true`, default: `false`. Check validity of paired-end distance when counting read pairs. Use \u0027--min_length\u0027 and \u0027--max_length\u0027 to set thresholds.\n"
,
"default":false
}
,
"min_length": {
"type":
"integer",
"description": "Type: `integer`, example: `50`. Minimum fragment/template length, 50 by default",
"help_text": "Type: `integer`, example: `50`. Minimum fragment/template length, 50 by default.\n"
}
,
"max_length": {
"type":
"integer",
"description": "Type: `integer`, example: `600`. Maximum fragment/template length, 600 by default",
"help_text": "Type: `integer`, example: `600`. Maximum fragment/template length, 600 by default.\n"
}
,
"same_strand": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Do not count read pairs that have their two ends mapping to different chromosomes or mapping to same chromosome but on different strands",
"help_text": "Type: `boolean_true`, default: `false`. Do not count read pairs that have their two ends mapping to different chromosomes or mapping to same chromosome but on different strands.\n"
,
"default":false
}
,
"donotsort": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Do not sort reads in BAM/SAM input",
"help_text": "Type: `boolean_true`, default: `false`. Do not sort reads in BAM/SAM input. Note that reads from the same pair are required to be located next to each other in the input.\n"
,
"default":false
}
}
},
"read groups" : {
"title": "Read groups",
"type": "object",
"description": "No description",
"properties": {
"by_read_group": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Assign reads by read group",
"help_text": "Type: `boolean_true`, default: `false`. Assign reads by read group. \"RG\" tag is required to be present in the input BAM/SAM files.\n"
,
"default":false
}
}
},
"long reads" : {
"title": "Long reads",
"type": "object",
"description": "No description",
"properties": {
"long_reads": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Count long reads such as Nanopore and PacBio reads",
"help_text": "Type: `boolean_true`, default: `false`. Count long reads such as Nanopore and PacBio reads. Long read counting can only run in one thread and only reads (not read-pairs) can be counted. There is no limitation on the number of \u0027M\u0027 operations allowed in a CIGAR string in long read counting.\n"
,
"default":false
}
}
},
"assignment results for each read" : {
"title": "Assignment results for each read",
"type": "object",
"description": "No description",
"properties": {
"detailed_results": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.detailed_results.detailed_results`, example: `detailed_results`. Directory to save the detailed assignment results",
"help_text": "Type: `file`, default: `$id.$key.detailed_results.detailed_results`, example: `detailed_results`. Directory to save the detailed assignment results. Use `--detailed_results_format` to determine the format of the detailed results.\n"
,
"default":"$id.$key.detailed_results.detailed_results"
}
,
"detailed_results_format": {
"type":
"string",
"description": "Type: `string`, choices: ``CORE`, `SAM`, `BAM``. Output detailed assignment results for each read or read-pair",
"help_text": "Type: `string`, choices: ``CORE`, `SAM`, `BAM``. Output detailed assignment results for each read or read-pair. Results are saved to a file that is in one of the following formats: CORE, SAM and BAM. See documentaiton for more info about these formats.\n",
"enum": ["CORE", "SAM", "BAM"]
}
}
},
"miscellaneous" : {
"title": "Miscellaneous",
"type": "object",
"description": "No description",
"properties": {
"max_M_op": {
"type":
"integer",
"description": "Type: `integer`, example: `10`. Maximum number of \u0027M\u0027 operations allowed in a CIGAR string",
"help_text": "Type: `integer`, example: `10`. Maximum number of \u0027M\u0027 operations allowed in a CIGAR string. 10 by default. Both \u0027X\u0027 and \u0027=\u0027 are treated as \u0027M\u0027 and adjacent \u0027M\u0027 operations are merged in the CIGAR string.\n"
}
,
"verbose": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Output verbose information for debugging, such as un-matched chromosome/contig names",
"help_text": "Type: `boolean_true`, default: `false`. Output verbose information for debugging, such as un-matched chromosome/contig names.\n"
,
"default":false
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/outputs"
},
{
"$ref": "#/definitions/annotation"
},
{
"$ref": "#/definitions/level of summarization"
},
{
"$ref": "#/definitions/overlap between reads and features"
},
{
"$ref": "#/definitions/multi-mapping reads"
},
{
"$ref": "#/definitions/fractional counting"
},
{
"$ref": "#/definitions/read filtering"
},
{
"$ref": "#/definitions/strandedness"
},
{
"$ref": "#/definitions/exon-exon junctions"
},
{
"$ref": "#/definitions/parameters specific to paired end reads"
},
{
"$ref": "#/definitions/read groups"
},
{
"$ref": "#/definitions/long reads"
},
{
"$ref": "#/definitions/assignment results for each read"
},
{
"$ref": "#/definitions/miscellaneous"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}