Files
biobox/target/nextflow/fastp/nextflow_schema.json
CI 9c30676332 Build branch main with version main (d0c648f)
Build pipeline: vsh-ci-template-p9886

Source commit: d0c648fb7e

Source message: Delete src/bgzip directory (#64)

It was moved to toolbox
2024-06-24 08:40:34 +00:00

1132 lines
47 KiB
JSON

{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "fastp",
"description": "An ultra-fast all-in-one FASTQ preprocessor (QC/adapters/trimming/filtering/splitting/merging...).\n\nFeatures:\n\n - comprehensive quality profiling for both before and after filtering data (quality curves, base contents, KMER, Q20/Q30, GC Ratio, duplication, adapter contents...)\n - filter out bad reads (too low quality, too short, or too many N...)\n - cut low quality bases for per read in its 5\u0027 and 3\u0027 by evaluating the mean quality from a sliding window (like Trimmomatic but faster).\n - trim all reads in front and tail\n - cut adapters. Adapter sequences can be automatically detected, which means you don\u0027t have to input the adapter sequences to trim them.\n - correct mismatched base pairs in overlapped regions of paired end reads, if one base is with high quality while the other is with ultra low quality\n - trim polyG in 3\u0027 ends, which is commonly seen in NovaSeq/NextSeq data. Trim polyX in 3\u0027 ends to remove unwanted polyX tailing (i.e. polyA tailing for mRNA-Seq data)\n - preprocess unique molecular identifier (UMI) enabled data, shift UMI to sequence name.\n - report JSON format result for further interpreting.\n - visualize quality control and filtering results on a single HTML page (like FASTQC but faster and more informative).\n - split the output to multiple files (0001.R1.gz, 0002.R1.gz...) to support parallel processing. Two modes can be used, limiting the total split file number, or limitting the lines of each split file.\n - support long reads (data from PacBio / Nanopore devices).\n - support reading from STDIN and writing to STDOUT\n - support interleaved input\n - support ultra-fast FASTQ-level deduplication\n",
"type": "object",
"definitions": {
"inputs" : {
"title": "Inputs",
"type": "object",
"description": "`fastp` supports both single-end (SE) and paired-end (PE) input.\n\n- for SE data, you only have to specify read1 input by `-i` or `--in1`.\n- for PE data, you should also specify read2 input by `-I` or `--in2`.\n",
"properties": {
"in1": {
"type":
"string",
"description": "Type: `file`, required, example: `in.R1.fq.gz`. Input FastQ file",
"help_text": "Type: `file`, required, example: `in.R1.fq.gz`. Input FastQ file. Must be single-end or paired-end R1. Can be gzipped."
}
,
"in2": {
"type":
"string",
"description": "Type: `file`, example: `in.R2.fq.gz`. Input FastQ file",
"help_text": "Type: `file`, example: `in.R2.fq.gz`. Input FastQ file. Must be paired-end R2. Can be gzipped."
}
}
},
"outputs" : {
"title": "Outputs",
"type": "object",
"description": "\n- for SE data, you only have to specify read1 output by `-o` or `--out1`.\n- for PE data, you should also specify read2 output by `-O` or `--out2`.\n- if you don\u0027t specify the output file names, no output files will be written, but the QC will still be done for both data before and after filtering.\n- the output will be gzip-compressed if its file name ends with `.gz`\n",
"properties": {
"out1": {
"type":
"string",
"description": "Type: `file`, required, default: `$id.$key.out1.gz`, example: `out.R1.fq.gz`. The single-end or paired-end R1 reads that pass QC",
"help_text": "Type: `file`, required, default: `$id.$key.out1.gz`, example: `out.R1.fq.gz`. The single-end or paired-end R1 reads that pass QC. Will be gzipped if its file name ends with `.gz`."
,
"default": "$id.$key.out1.gz"
}
,
"out2": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.out2.gz`, example: `out.R2.fq.gz`. The paired-end R2 reads that pass QC",
"help_text": "Type: `file`, default: `$id.$key.out2.gz`, example: `out.R2.fq.gz`. The paired-end R2 reads that pass QC. Will be gzipped if its file name ends with `.gz`."
,
"default": "$id.$key.out2.gz"
}
,
"unpaired1": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.unpaired1.gz`, example: `unpaired.R1.fq.gz`. Store the reads that `read1` passes filters but its paired `read2` doesn\u0027t",
"help_text": "Type: `file`, default: `$id.$key.unpaired1.gz`, example: `unpaired.R1.fq.gz`. Store the reads that `read1` passes filters but its paired `read2` doesn\u0027t."
,
"default": "$id.$key.unpaired1.gz"
}
,
"unpaired2": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.unpaired2.gz`, example: `unpaired.R2.fq.gz`. Store the reads that `read2` passes filters but its paired `read1` doesn\u0027t",
"help_text": "Type: `file`, default: `$id.$key.unpaired2.gz`, example: `unpaired.R2.fq.gz`. Store the reads that `read2` passes filters but its paired `read1` doesn\u0027t."
,
"default": "$id.$key.unpaired2.gz"
}
,
"failed_out": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.failed_out.gz`, example: `failed.fq.gz`. Store the reads that fail filters",
"help_text": "Type: `file`, default: `$id.$key.failed_out.gz`, example: `failed.fq.gz`. Store the reads that fail filters.\n\nIf one read failed and is written to --failed_out, its failure reason will be appended to its read name. For example, failed_quality_filter, failed_too_short etc.\nFor PE data, if unpaired reads are not stored (by giving --unpaired1 or --unpaired2), the failed pair of reads will be put together. If one read passes the filters but its pair doesn\u0027t, the failure reason will be paired_read_is_failing.\n"
,
"default": "$id.$key.failed_out.gz"
}
,
"overlapped_out": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.overlapped_out.overlapped_out`. For each read pair, output the overlapped region if it has no any mismatched base",
"help_text": "Type: `file`, default: `$id.$key.overlapped_out.overlapped_out`. For each read pair, output the overlapped region if it has no any mismatched base.\n"
,
"default": "$id.$key.overlapped_out.overlapped_out"
}
}
},
"report output arguments" : {
"title": "Report output arguments",
"type": "object",
"description": "No description",
"properties": {
"json": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.json.json`, example: `out.json`. The json format report file name\n",
"help_text": "Type: `file`, default: `$id.$key.json.json`, example: `out.json`. The json format report file name\n"
,
"default": "$id.$key.json.json"
}
,
"html": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.html.html`, example: `out.html`. The html format report file name\n",
"help_text": "Type: `file`, default: `$id.$key.html.html`, example: `out.html`. The html format report file name\n"
,
"default": "$id.$key.html.html"
}
,
"report_title": {
"type":
"string",
"description": "Type: `string`, example: `fastp report`. The title of the html report, default is \"fastp report\"",
"help_text": "Type: `string`, example: `fastp report`. The title of the html report, default is \"fastp report\".\n"
}
}
},
"adapter trimming" : {
"title": "Adapter trimming",
"type": "object",
"description": "Adapter trimming is enabled by default, but you can disable it by `-A` or `--disable_adapter_trimming`. Adapter sequences can be automatically detected for both PE/SE data.\n\n- For SE data, the adapters are evaluated by analyzing the tails of first ~1M reads. This evaluation may be inacurrate, and you can specify the adapter sequence by `-a` or `--adapter_sequence` option. If adapter sequence is specified, the auto detection for SE data will be disabled.\n- For PE data, the adapters can be detected by per-read overlap analysis, which seeks for the overlap of each pair of reads. This method is robust and fast, so normally you don\u0027t have to input the adapter sequence even you know it. But you can still specify the adapter sequences for read1 by `--adapter_sequence`, and for read2 by `--adapter_sequence_r2`. If `fastp` fails to find an overlap (i.e. due to low quality bases), it will use these sequences to trim adapters for read1 and read2 respectively.\n- For PE data, the adapter sequence auto-detection is disabled by default since the adapters can be trimmed by overlap analysis. However, you can specify `--detect_adapter_for_pe` to enable it.\n- For PE data, `fastp` will run a little slower if you specify the sequence adapters or enable adapter auto-detection, but usually result in a slightly cleaner output, since the overlap analysis may fail due to sequencing errors or adapter dimers.\n- The most widely used adapter is the Illumina TruSeq adapters. If your data is from the TruSeq library, you can add `--adapter_sequence=AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT` to your command lines, or enable auto detection for PE data by specifing `detect_adapter_for_pe`.\n- `fastp` contains some built-in known adapter sequences for better auto-detection. If you want to make some adapters to be a part of the built-in adapters, please file an issue.\n\nYou can also specify --adapter_fasta to give a FASTA file to tell fastp to trim multiple adapters in this FASTA file. Here is a sample of such adapter FASTA file:\n\n```\n\u003eIllumina TruSeq Adapter Read 1\nAGATCGGAAGAGCACACGTCTGAACTCCAGTCA\n\u003eIllumina TruSeq Adapter Read 2\nAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT\n\u003epolyA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n```\n\nThe adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. And you can give whatever you want to trim, rather than regular sequencing adapters (i.e. polyA).\n\n`fastp` first trims the auto-detected adapter or the adapter sequences given by `--adapter_sequence | --adapter_sequence_r2`, then trims the adapters given by `--adapter_fasta` one by one.\n\nThe sequence distribution of trimmed adapters can be found at the HTML/JSON reports.\n",
"properties": {
"disable_adapter_trimming": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Disable adapter trimming",
"help_text": "Type: `boolean_true`, default: `false`. Disable adapter trimming.\n"
,
"default": "False"
}
,
"detect_adapter_for_pe": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. By default, the auto-detection for adapter is for SE data input only, turn on this option to enable it for PE data",
"help_text": "Type: `boolean_true`, default: `false`. By default, the auto-detection for adapter is for SE data input only, turn on this option to enable it for PE data.\n"
,
"default": "False"
}
,
"adapter_sequence": {
"type":
"string",
"description": "Type: `string`. The adapter sequences to be trimmed",
"help_text": "Type: `string`. The adapter sequences to be trimmed. For SE data, if not specified, the adapters will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped\n"
}
,
"adapter_sequence_r2": {
"type":
"string",
"description": "Type: `string`. The adapter sequences to be trimmed for R2",
"help_text": "Type: `string`. The adapter sequences to be trimmed for R2. This is used for PE data if R1/R2 are found overlapped.\n"
}
,
"adapter_fasta": {
"type":
"string",
"description": "Type: `file`. A FASTA file containing all the adapter sequences to be trimmed",
"help_text": "Type: `file`. A FASTA file containing all the adapter sequences to be trimmed. For SE data, if not specified, the adapters will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped.\n"
}
}
},
"base trimming" : {
"title": "Base trimming",
"type": "object",
"description": "No description",
"properties": {
"trim_front1": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. Trimming how many bases in front for read1, default is 0",
"help_text": "Type: `integer`, example: `0`. Trimming how many bases in front for read1, default is 0.\n"
}
,
"trim_tail1": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. Trimming how many bases in tail for read1, default is 0",
"help_text": "Type: `integer`, example: `0`. Trimming how many bases in tail for read1, default is 0.\n"
}
,
"max_len1": {
"type":
"integer",
"description": "Type: `integer`. If read1 is longer than max_len1, then trim read1 at its tail to make it as long as max_len1",
"help_text": "Type: `integer`. If read1 is longer than max_len1, then trim read1 at its tail to make it as long as max_len1. Default 0 means no limitation.\n"
}
,
"trim_front2": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. Trimming how many bases in front for read2, default is 0",
"help_text": "Type: `integer`, example: `0`. Trimming how many bases in front for read2, default is 0.\n"
}
,
"trim_tail2": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. Trimming how many bases in tail for read2, default is 0",
"help_text": "Type: `integer`, example: `0`. Trimming how many bases in tail for read2, default is 0.\n"
}
,
"max_len2": {
"type":
"integer",
"description": "Type: `integer`. If read2 is longer than max_len2, then trim read2 at its tail to make it as long as max_len2",
"help_text": "Type: `integer`. If read2 is longer than max_len2, then trim read2 at its tail to make it as long as max_len2. Default 0 means no limitation.\n"
}
}
},
"merging mode" : {
"title": "Merging mode",
"type": "object",
"description": "Allows merging paired-end reads into a single longer read if they are overlapping.",
"properties": {
"merge": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. For paired-end input, merge each pair of reads into a single read if they are overlapped",
"help_text": "Type: `boolean_true`, default: `false`. For paired-end input, merge each pair of reads into a single read if they are overlapped. The merged reads will be written to the file given by --merged_out, the unmerged reads will be written to the files specified by --out1 and --out2. The merging mode is disabled by default.\n"
,
"default": "False"
}
,
"merged_out": {
"type":
"string",
"description": "Type: `file`, default: `$id.$key.merged_out.gz`, example: `merged.fq.gz`. In the merging mode, specify the file name to store merged output, or specify --stdout to stream the merged output",
"help_text": "Type: `file`, default: `$id.$key.merged_out.gz`, example: `merged.fq.gz`. In the merging mode, specify the file name to store merged output, or specify --stdout to stream the merged output.\n"
,
"default": "$id.$key.merged_out.gz"
}
,
"include_unmerged": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. In the merging mode, write the unmerged or unpaired reads to the file specified by --merge",
"help_text": "Type: `boolean_true`, default: `false`. In the merging mode, write the unmerged or unpaired reads to the file specified by --merge. Disabled by default.\n"
,
"default": "False"
}
}
},
"additional input arguments" : {
"title": "Additional input arguments",
"type": "object",
"description": "Affects how the input is read.",
"properties": {
"interleaved_in": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Indicate that \u003cin1\u003e is an interleaved FASTQ which contains both read1 and read2",
"help_text": "Type: `boolean_true`, default: `false`. Indicate that \u003cin1\u003e is an interleaved FASTQ which contains both read1 and read2. Disabled by default.\n"
,
"default": "False"
}
,
"fix_mgi_id": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. The MGI FASTQ ID format is not compatible with many BAM operation tools, enable this option to fix it",
"help_text": "Type: `boolean_true`, default: `false`. The MGI FASTQ ID format is not compatible with many BAM operation tools, enable this option to fix it.\n"
,
"default": "False"
}
,
"phred64": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Indicate the input is using phred64 scoring (it\u0027ll be converted to phred33, so the output will still be phred33)\n",
"help_text": "Type: `boolean_true`, default: `false`. Indicate the input is using phred64 scoring (it\u0027ll be converted to phred33, so the output will still be phred33)\n"
,
"default": "False"
}
}
},
"additional output arguments" : {
"title": "Additional output arguments",
"type": "object",
"description": "Affects how the output is written.",
"properties": {
"compression": {
"type":
"integer",
"description": "Type: `integer`, example: `4`. Compression level for gzip output (1 ~ 9)",
"help_text": "Type: `integer`, example: `4`. Compression level for gzip output (1 ~ 9). 1 is fastest, 9 is smallest, default is 4.\n"
}
,
"dont_overwrite": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Don\u0027t overwrite existing files",
"help_text": "Type: `boolean_true`, default: `false`. Don\u0027t overwrite existing files. Overwritting is allowed by default.\n"
,
"default": "False"
}
}
},
"logging arguments" : {
"title": "Logging arguments",
"type": "object",
"description": "No description",
"properties": {
"verbose": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Output verbose log information (i",
"help_text": "Type: `boolean_true`, default: `false`. Output verbose log information (i.e. when every 1M reads are processed)."
,
"default": "False"
}
}
},
"processing arguments" : {
"title": "Processing arguments",
"type": "object",
"description": "No description",
"properties": {
"reads_to_process": {
"type":
"string",
"description": "Type: `long`, example: `1000000`. Specify how many reads/pairs to be processed",
"help_text": "Type: `long`, example: `1000000`. Specify how many reads/pairs to be processed. Default 0 means process all reads.\n"
}
}
},
"deduplication arguments" : {
"title": "Deduplication arguments",
"type": "object",
"description": "No description",
"properties": {
"dedup": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Enable deduplication to drop the duplicated reads/pairs\n",
"help_text": "Type: `boolean_true`, default: `false`. Enable deduplication to drop the duplicated reads/pairs\n"
,
"default": "False"
}
,
"dup_calc_accuracy": {
"type":
"integer",
"description": "Type: `integer`, example: `3`. Accuracy level to calculate duplication (1~6)",
"help_text": "Type: `integer`, example: `3`. Accuracy level to calculate duplication (1~6). Higher level uses more memory (1G, 2G, 4G, 8G, 16G, 24G). Default 1 for no-dedup mode, and 3 for dedup mode.\n"
}
,
"dont_eval_duplication": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Don\u0027t evaluate duplication rate to save time and use less memory",
"help_text": "Type: `boolean_true`, default: `false`. Don\u0027t evaluate duplication rate to save time and use less memory.\n"
,
"default": "False"
}
}
},
"polyg tail trimming arguments" : {
"title": "PolyG tail trimming arguments",
"type": "object",
"description": "No description",
"properties": {
"trim_poly_g": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n",
"help_text": "Type: `boolean_true`, default: `false`. Force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n"
,
"default": "False"
}
,
"poly_g_min_len": {
"type":
"integer",
"description": "Type: `integer`, example: `10`. The minimum length to detect polyG in the read tail",
"help_text": "Type: `integer`, example: `10`. The minimum length to detect polyG in the read tail. 10 by default.\n"
}
,
"disable_trim_poly_g": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n",
"help_text": "Type: `boolean_true`, default: `false`. Disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data\n"
,
"default": "False"
}
}
},
"polyx tail trimming arguments" : {
"title": "PolyX tail trimming arguments",
"type": "object",
"description": "No description",
"properties": {
"trim_poly_x": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Enable polyX trimming in 3\u0027 ends",
"help_text": "Type: `boolean_true`, default: `false`. Enable polyX trimming in 3\u0027 ends.\n"
,
"default": "False"
}
,
"poly_x_min_len": {
"type":
"integer",
"description": "Type: `integer`, example: `10`. The minimum length to detect polyX in the read tail",
"help_text": "Type: `integer`, example: `10`. The minimum length to detect polyX in the read tail. 10 by default.\n"
}
}
},
"cut arguments" : {
"title": "Cut arguments",
"type": "object",
"description": "No description",
"properties": {
"cut_front": {
"type":
"integer",
"description": "Type: `integer`. Move a sliding window from front (5\u0027) to tail, drop the bases in the window if its mean quality \u003c threshold, stop otherwise",
"help_text": "Type: `integer`. Move a sliding window from front (5\u0027) to tail, drop the bases in the window if its mean quality \u003c threshold, stop otherwise.\n"
}
,
"cut_tail": {
"type":
"integer",
"description": "Type: `integer`. Move a sliding window from tail (3\u0027) to front, drop the bases in the window if its mean quality \u003c threshold, stop otherwise",
"help_text": "Type: `integer`. Move a sliding window from tail (3\u0027) to front, drop the bases in the window if its mean quality \u003c threshold, stop otherwise.\n"
}
,
"cut_right": {
"type":
"integer",
"description": "Type: `integer`. Move a sliding window from front to tail, if meet one window with mean quality \u003c threshold, drop the bases in the window and the right part, and then stop",
"help_text": "Type: `integer`. Move a sliding window from front to tail, if meet one window with mean quality \u003c threshold, drop the bases in the window and the right part, and then stop.\n"
}
,
"cut_window_size": {
"type":
"integer",
"description": "Type: `integer`, example: `4`. The window size option shared by cut_front, cut_tail or cut_sliding",
"help_text": "Type: `integer`, example: `4`. The window size option shared by cut_front, cut_tail or cut_sliding. Range: 1~1000, default: 4.\n"
}
,
"cut_mean_quality": {
"type":
"integer",
"description": "Type: `integer`, example: `20`. The mean quality requirement option shared by cut_front, cut_tail or cut_sliding",
"help_text": "Type: `integer`, example: `20`. The mean quality requirement option shared by cut_front, cut_tail or cut_sliding. Range: 1~36 default: 20 (Q20)\n"
}
,
"cut_front_window_size": {
"type":
"integer",
"description": "Type: `integer`, example: `4`. The window size option of cut_front, default to cut_window_size if not specified",
"help_text": "Type: `integer`, example: `4`. The window size option of cut_front, default to cut_window_size if not specified.\n"
}
,
"cut_front_mean_quality": {
"type":
"integer",
"description": "Type: `integer`, example: `20`. The mean quality requirement option of cut_front, default to cut_mean_quality if not specified",
"help_text": "Type: `integer`, example: `20`. The mean quality requirement option of cut_front, default to cut_mean_quality if not specified.\n"
}
,
"cut_tail_window_size": {
"type":
"integer",
"description": "Type: `integer`, example: `4`. The window size option of cut_tail, default to cut_window_size if not specified",
"help_text": "Type: `integer`, example: `4`. The window size option of cut_tail, default to cut_window_size if not specified.\n"
}
,
"cut_tail_mean_quality": {
"type":
"integer",
"description": "Type: `integer`, example: `20`. The mean quality requirement option of cut_tail, default to cut_mean_quality if not specified",
"help_text": "Type: `integer`, example: `20`. The mean quality requirement option of cut_tail, default to cut_mean_quality if not specified.\n"
}
,
"cut_right_window_size": {
"type":
"integer",
"description": "Type: `integer`, example: `4`. The window size option of cut_right, default to cut_window_size if not specified",
"help_text": "Type: `integer`, example: `4`. The window size option of cut_right, default to cut_window_size if not specified.\n"
}
,
"cut_right_mean_quality": {
"type":
"integer",
"description": "Type: `integer`, example: `20`. The mean quality requirement option of cut_right, default to cut_mean_quality if not specified",
"help_text": "Type: `integer`, example: `20`. The mean quality requirement option of cut_right, default to cut_mean_quality if not specified.\n"
}
}
},
"quality filtering arguments" : {
"title": "Quality filtering arguments",
"type": "object",
"description": "No description",
"properties": {
"disable_quality_filtering": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Quality filtering is enabled by default",
"help_text": "Type: `boolean_true`, default: `false`. Quality filtering is enabled by default. If this option is specified, quality filtering is disabled.\n"
,
"default": "False"
}
,
"qualified_quality_phred": {
"type":
"integer",
"description": "Type: `integer`, example: `15`. The quality value that a base is qualified",
"help_text": "Type: `integer`, example: `15`. The quality value that a base is qualified. Default 15 means phred quality \u003e=Q15 is qualified.\n"
}
,
"unqualified_percent_limit": {
"type":
"integer",
"description": "Type: `integer`, example: `40`. How many percents of bases are allowed to be unqualified (0~100)",
"help_text": "Type: `integer`, example: `40`. How many percents of bases are allowed to be unqualified (0~100). Default 40 means 40%.\n"
}
,
"n_base_limit": {
"type":
"integer",
"description": "Type: `integer`, example: `5`. If one read\u0027s number of N base is \u003en_base_limit, then this read/pair is discarded",
"help_text": "Type: `integer`, example: `5`. If one read\u0027s number of N base is \u003en_base_limit, then this read/pair is discarded. Default is 5.\n"
}
,
"average_qual": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. If one read\u0027s average quality score \u003cavg_qual, then this read/pair is discarded",
"help_text": "Type: `integer`, example: `0`. If one read\u0027s average quality score \u003cavg_qual, then this read/pair is discarded. Default 0 means no requirement.\n"
}
}
},
"length filtering arguments" : {
"title": "Length filtering arguments",
"type": "object",
"description": "No description",
"properties": {
"disable_length_filtering": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Length filtering is enabled by default",
"help_text": "Type: `boolean_true`, default: `false`. Length filtering is enabled by default. If this option is specified, length filtering is disabled.\n"
,
"default": "False"
}
,
"length_required": {
"type":
"integer",
"description": "Type: `integer`, example: `15`. Reads shorter than length_required will be discarded, default is 15",
"help_text": "Type: `integer`, example: `15`. Reads shorter than length_required will be discarded, default is 15.\n"
}
,
"length_limit": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. Reads longer than length_limit will be discarded, default 0 means no limitation",
"help_text": "Type: `integer`, example: `0`. Reads longer than length_limit will be discarded, default 0 means no limitation.\n"
}
}
},
"low complexity filtering arguments" : {
"title": "Low complexity filtering arguments",
"type": "object",
"description": "No description",
"properties": {
"low_complexity_filter": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Enable low complexity filter",
"help_text": "Type: `boolean_true`, default: `false`. Enable low complexity filter. The complexity is defined as the percentage of base that is different from its next base (base[i] != base[i+1]).\n"
,
"default": "False"
}
,
"complexity_threshold": {
"type":
"integer",
"description": "Type: `integer`, example: `30`. The threshold for low complexity filter (0~100)",
"help_text": "Type: `integer`, example: `30`. The threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required.\n"
}
}
},
"index filtering arguments" : {
"title": "Index filtering arguments",
"type": "object",
"description": "No description",
"properties": {
"filter_by_index1": {
"type":
"string",
"description": "Type: `file`. Specify a file contains a list of barcodes of index1 to be filtered out, one barcode per line",
"help_text": "Type: `file`. Specify a file contains a list of barcodes of index1 to be filtered out, one barcode per line.\n"
}
,
"filter_by_index2": {
"type":
"string",
"description": "Type: `file`. Specify a file contains a list of barcodes of index2 to be filtered out, one barcode per line",
"help_text": "Type: `file`. Specify a file contains a list of barcodes of index2 to be filtered out, one barcode per line.\n"
}
,
"filter_by_index_threshold": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. The allowed difference of index barcode for index filtering, default 0 means completely identical",
"help_text": "Type: `integer`, example: `0`. The allowed difference of index barcode for index filtering, default 0 means completely identical.\n"
}
}
},
"overlapped region correction" : {
"title": "Overlapped region correction",
"type": "object",
"description": "No description",
"properties": {
"correction": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Enable base correction in overlapped regions (only for PE data), default is disabled",
"help_text": "Type: `boolean_true`, default: `false`. Enable base correction in overlapped regions (only for PE data), default is disabled.\n"
,
"default": "False"
}
,
"overlap_len_require": {
"type":
"integer",
"description": "Type: `integer`, example: `30`. The minimum length to detect overlapped region of PE reads",
"help_text": "Type: `integer`, example: `30`. The minimum length to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 30 by default.\n"
}
,
"overlap_diff_limit": {
"type":
"integer",
"description": "Type: `integer`, example: `5`. The maximum number of mismatched bases to detect overlapped region of PE reads",
"help_text": "Type: `integer`, example: `5`. The maximum number of mismatched bases to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 5 by default.\n"
}
,
"overlap_diff_percent_limit": {
"type":
"integer",
"description": "Type: `integer`, example: `20`. The maximum percentage of mismatched bases to detect overlapped region of PE reads",
"help_text": "Type: `integer`, example: `20`. The maximum percentage of mismatched bases to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. Default 20 means 20%.\n"
}
}
},
"umi arguments" : {
"title": "UMI arguments",
"type": "object",
"description": "No description",
"properties": {
"umi": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Enable unique molecular identifier (UMI) preprocessing",
"help_text": "Type: `boolean_true`, default: `false`. Enable unique molecular identifier (UMI) preprocessing.\n"
,
"default": "False"
}
,
"umi_loc": {
"type":
"string",
"description": "Type: `string`, choices: ``index1`, `index2`, `read1`, `read2`, `per_index`, `per_read``. Specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none",
"help_text": "Type: `string`, choices: ``index1`, `index2`, `read1`, `read2`, `per_index`, `per_read``. Specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none.\n",
"enum": ["index1", "index2", "read1", "read2", "per_index", "per_read"]
}
,
"umi_len": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. If the UMI is in read1/read2, its length should be provided",
"help_text": "Type: `integer`, example: `0`. If the UMI is in read1/read2, its length should be provided.\n"
}
,
"umi_prefix": {
"type":
"string",
"description": "Type: `string`. If specified, an underline will be used to connect prefix and UMI (i",
"help_text": "Type: `string`. If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default.\n"
}
,
"umi_skip": {
"type":
"integer",
"description": "Type: `integer`, example: `0`. If the UMI is in read1/read2, fastp can skip several bases following UMI, default is 0",
"help_text": "Type: `integer`, example: `0`. If the UMI is in read1/read2, fastp can skip several bases following UMI, default is 0.\n"
}
,
"umi_delim": {
"type":
"string",
"description": "Type: `string`. If the UMI is in index1/index2, fastp can use a delimiter to separate UMI from the read sequence, default is none",
"help_text": "Type: `string`. If the UMI is in index1/index2, fastp can use a delimiter to separate UMI from the read sequence, default is none.\n"
}
}
},
"overrepresentation analysis arguments" : {
"title": "Overrepresentation analysis arguments",
"type": "object",
"description": "No description",
"properties": {
"overrepresentation_analysis": {
"type":
"boolean",
"description": "Type: `boolean_true`, default: `false`. Enable overrepresentation analysis",
"help_text": "Type: `boolean_true`, default: `false`. Enable overrepresentation analysis.\n"
,
"default": "False"
}
,
"overrepresentation_sampling": {
"type":
"integer",
"description": "Type: `integer`, example: `20`. One in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower, default is 20",
"help_text": "Type: `integer`, example: `20`. One in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower, default is 20.\n"
}
}
},
"nextflow input-output arguments" : {
"title": "Nextflow input-output arguments",
"type": "object",
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
"properties": {
"publish_dir": {
"type":
"string",
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
}
,
"param_list": {
"type":
"string",
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/inputs"
},
{
"$ref": "#/definitions/outputs"
},
{
"$ref": "#/definitions/report output arguments"
},
{
"$ref": "#/definitions/adapter trimming"
},
{
"$ref": "#/definitions/base trimming"
},
{
"$ref": "#/definitions/merging mode"
},
{
"$ref": "#/definitions/additional input arguments"
},
{
"$ref": "#/definitions/additional output arguments"
},
{
"$ref": "#/definitions/logging arguments"
},
{
"$ref": "#/definitions/processing arguments"
},
{
"$ref": "#/definitions/deduplication arguments"
},
{
"$ref": "#/definitions/polyg tail trimming arguments"
},
{
"$ref": "#/definitions/polyx tail trimming arguments"
},
{
"$ref": "#/definitions/cut arguments"
},
{
"$ref": "#/definitions/quality filtering arguments"
},
{
"$ref": "#/definitions/length filtering arguments"
},
{
"$ref": "#/definitions/low complexity filtering arguments"
},
{
"$ref": "#/definitions/index filtering arguments"
},
{
"$ref": "#/definitions/overlapped region correction"
},
{
"$ref": "#/definitions/umi arguments"
},
{
"$ref": "#/definitions/overrepresentation analysis arguments"
},
{
"$ref": "#/definitions/nextflow input-output arguments"
}
]
}