Build pipeline: viash-hub.biobox.main-bjgnf
Source commit: 2b29a47575
Source message: Bedtools GroupBY (#123)
* Initial Commit
* Update config.vsh.yaml
* config file
* script.sh
* adding some tests
* more test
* Update CHANGELOG.md
* deleted test_data
* bug fix
* Update config.vsh.yaml
* adding more links
* exit on error
* $TMPDIR
* Update script.sh
* Update config.vsh.yaml
* Suggested change on column option
---------
Co-authored-by: Jakub Majercik <57993790+jakubmajercik@users.noreply.github.com>
2315 lines
108 KiB
JSON
2315 lines
108 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema",
|
|
"title": "star_align_reads",
|
|
"description": "Aligns reads to a reference genome using STAR.\n",
|
|
"type": "object",
|
|
"definitions": {
|
|
|
|
|
|
|
|
"inputs" : {
|
|
"title": "Inputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"input": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz`, multiple_sep: `\";\"`. The single-end or paired-end R1 FastQ files to be processed",
|
|
"help_text": "Type: List of `file`, required, example: `mysample_S1_L001_R1_001.fastq.gz`, multiple_sep: `\";\"`. The single-end or paired-end R1 FastQ files to be processed."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"input_r2": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The paired-end R2 FastQ files to be processed",
|
|
"help_text": "Type: List of `file`, example: `mysample_S1_L001_R2_001.fastq.gz`, multiple_sep: `\";\"`. The paired-end R2 FastQ files to be processed. Only required if --input is a paired-end R1 file."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"outputs" : {
|
|
"title": "Outputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"aligned_reads": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, default: `$id.$key.aligned_reads.bam`, example: `aligned_reads.bam`. The output file containing the aligned reads",
|
|
"help_text": "Type: `file`, required, default: `$id.$key.aligned_reads.bam`, example: `aligned_reads.bam`. The output file containing the aligned reads."
|
|
,
|
|
"default": "$id.$key.aligned_reads.bam"
|
|
}
|
|
|
|
|
|
,
|
|
"reads_per_gene": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.reads_per_gene.tsv`, example: `reads_per_gene.tsv`. The output file containing the number of reads per gene",
|
|
"help_text": "Type: `file`, default: `$id.$key.reads_per_gene.tsv`, example: `reads_per_gene.tsv`. The output file containing the number of reads per gene."
|
|
,
|
|
"default": "$id.$key.reads_per_gene.tsv"
|
|
}
|
|
|
|
|
|
,
|
|
"unmapped": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.unmapped.fastq`, example: `unmapped.fastq`. The output file containing the unmapped reads",
|
|
"help_text": "Type: `file`, default: `$id.$key.unmapped.fastq`, example: `unmapped.fastq`. The output file containing the unmapped reads."
|
|
,
|
|
"default": "$id.$key.unmapped.fastq"
|
|
}
|
|
|
|
|
|
,
|
|
"unmapped_r2": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.unmapped_r2.fastq`, example: `unmapped_r2.fastq`. The output file containing the unmapped R2 reads",
|
|
"help_text": "Type: `file`, default: `$id.$key.unmapped_r2.fastq`, example: `unmapped_r2.fastq`. The output file containing the unmapped R2 reads."
|
|
,
|
|
"default": "$id.$key.unmapped_r2.fastq"
|
|
}
|
|
|
|
|
|
,
|
|
"chimeric_junctions": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.chimeric_junctions.tsv`, example: `chimeric_junctions.tsv`. The output file containing the chimeric junctions",
|
|
"help_text": "Type: `file`, default: `$id.$key.chimeric_junctions.tsv`, example: `chimeric_junctions.tsv`. The output file containing the chimeric junctions."
|
|
,
|
|
"default": "$id.$key.chimeric_junctions.tsv"
|
|
}
|
|
|
|
|
|
,
|
|
"log": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.log.txt`, example: `log.txt`. The output file containing the log of the alignment process",
|
|
"help_text": "Type: `file`, default: `$id.$key.log.txt`, example: `log.txt`. The output file containing the log of the alignment process."
|
|
,
|
|
"default": "$id.$key.log.txt"
|
|
}
|
|
|
|
|
|
,
|
|
"splice_junctions": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.splice_junctions.tsv`, example: `splice_junctions.tsv`. The output file containing the splice junctions",
|
|
"help_text": "Type: `file`, default: `$id.$key.splice_junctions.tsv`, example: `splice_junctions.tsv`. The output file containing the splice junctions."
|
|
,
|
|
"default": "$id.$key.splice_junctions.tsv"
|
|
}
|
|
|
|
|
|
,
|
|
"reads_aligned_to_transcriptome": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, default: `$id.$key.reads_aligned_to_transcriptome.bam`, example: `transcriptome_aligned.bam`. The output file containing the alignments to transcriptome in BAM formats",
|
|
"help_text": "Type: `file`, default: `$id.$key.reads_aligned_to_transcriptome.bam`, example: `transcriptome_aligned.bam`. The output file containing the alignments to transcriptome in BAM formats. This file is generated when --quantMode is set to TranscriptomeSAM."
|
|
,
|
|
"default": "$id.$key.reads_aligned_to_transcriptome.bam"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"run parameters" : {
|
|
"title": "Run Parameters",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"run_rng_seed": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `777`. random number generator seed",
|
|
"help_text": "Type: `integer`, example: `777`. random number generator seed."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"genome parameters" : {
|
|
"title": "Genome Parameters",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"genome_dir": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, example: `./GenomeDir`. path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome)",
|
|
"help_text": "Type: `file`, required, example: `./GenomeDir`. path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"genome_load": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `NoSharedMemory`. mode of shared memory usage for the genome files",
|
|
"help_text": "Type: `string`, example: `NoSharedMemory`. mode of shared memory usage for the genome files. Only used with --runMode alignReads.\n\n- LoadAndKeep ... load genome into shared and keep it in memory after run\n- LoadAndRemove ... load genome into shared but remove it after run\n- LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs\n- Remove ... do not map anything, just remove loaded genome from memory\n- NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"genome_fasta_files": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `file`, multiple_sep: `\";\"`. path(s) to the fasta files with the genome sequences, separated by spaces",
|
|
"help_text": "Type: List of `file`, multiple_sep: `\";\"`. path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped.\n\nRequired for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins)."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"genome_file_sizes": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. genome files exact sizes in bytes",
|
|
"help_text": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. genome files exact sizes in bytes. Typically, this should not be defined by the user."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"genome_transform_output": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. which output to transform back to original genome\n\n- SAM ",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. which output to transform back to original genome\n\n- SAM ... SAM/BAM alignments\n- SJ ... splice junctions (SJ.out.tab)\n- Quant ... quantifications (from --quant_mode option)\n- None ... no transformation of the output"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"genome_chr_set_mitochondrial": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `chrM;M;MT`, multiple_sep: `\";\"`. names of the mitochondrial chromosomes",
|
|
"help_text": "Type: List of `string`, example: `chrM;M;MT`, multiple_sep: `\";\"`. names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"splice junctions database" : {
|
|
"title": "Splice Junctions Database",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"sjdb_file_chr_start_end": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. path to the files with genomic coordinates (chr \u003ctab\u003e start \u003ctab\u003e end \u003ctab\u003e strand) for the splice junction introns",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. path to the files with genomic coordinates (chr \u003ctab\u003e start \u003ctab\u003e end \u003ctab\u003e strand) for the splice junction introns. Multiple files can be supplied and will be concatenated."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_gtf_file": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. path to the GTF file with annotations",
|
|
"help_text": "Type: `file`. path to the GTF file with annotations"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_gtf_chr_prefix": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. prefix for chromosome names in a GTF file (e",
|
|
"help_text": "Type: `string`. prefix for chromosome names in a GTF file (e.g. \u0027chr\u0027 for using ENSMEBL annotations with UCSC genomes)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_gtf_feature_exon": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `exon`. feature type in GTF file to be used as exons for building transcripts",
|
|
"help_text": "Type: `string`, example: `exon`. feature type in GTF file to be used as exons for building transcripts"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_gtf_tag_exon_parent_transcript": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `transcript_id`. GTF attribute name for parent transcript ID (default \"transcript_id\" works for GTF files)",
|
|
"help_text": "Type: `string`, example: `transcript_id`. GTF attribute name for parent transcript ID (default \"transcript_id\" works for GTF files)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_gtf_tag_exon_parent_gene": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `gene_id`. GTF attribute name for parent gene ID (default \"gene_id\" works for GTF files)",
|
|
"help_text": "Type: `string`, example: `gene_id`. GTF attribute name for parent gene ID (default \"gene_id\" works for GTF files)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_gtf_tag_exon_parent_gene_name": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `gene_name`, multiple_sep: `\";\"`. GTF attribute name for parent gene name",
|
|
"help_text": "Type: List of `string`, example: `gene_name`, multiple_sep: `\";\"`. GTF attribute name for parent gene name"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_gtf_tag_exon_parent_gene_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `gene_type;gene_biotype`, multiple_sep: `\";\"`. GTF attribute name for parent gene type",
|
|
"help_text": "Type: List of `string`, example: `gene_type;gene_biotype`, multiple_sep: `\";\"`. GTF attribute name for parent gene type"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_overhang": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `100`. length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)",
|
|
"help_text": "Type: `integer`, example: `100`. length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_score": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `2`. extra alignment score for alignments that cross database junctions",
|
|
"help_text": "Type: `integer`, example: `2`. extra alignment score for alignments that cross database junctions"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"sjdb_insert_save": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Basic`. which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ",
|
|
"help_text": "Type: `string`, example: `Basic`. which files to save when sjdb junctions are inserted on the fly at the mapping step\n\n- Basic ... only small junction / transcript files\n- All ... all files including big Genome, SA and SAindex - this will create a complete genome directory"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"variation parameters" : {
|
|
"title": "Variation parameters",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"var_vcf_file": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. path to the VCF file that contains variation data",
|
|
"help_text": "Type: `string`. path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"read parameters" : {
|
|
"title": "Read Parameters",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"read_files_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Fastx`. format of input read files\n\n- Fastx ",
|
|
"help_text": "Type: `string`, example: `Fastx`. format of input read files\n\n- Fastx ... FASTA or FASTQ\n- SAM SE ... SAM or BAM single-end reads; for BAM use --read_files_command samtools view\n- SAM PE ... SAM or BAM paired-end reads; for BAM use --read_files_command samtools view"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read_files_sam_attr_keep": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `All`, multiple_sep: `\";\"`. for --read_files_type SAM SE/PE, which SAM tags to keep in the output BAM, e",
|
|
"help_text": "Type: List of `string`, example: `All`, multiple_sep: `\";\"`. for --read_files_type SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL\n\n- All ... keep all tags\n- None ... do not keep any tags"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read_files_manifest": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`. path to the \"manifest\" file with the names of read files",
|
|
"help_text": "Type: `file`. path to the \"manifest\" file with the names of read files. The manifest file should contain 3 tab-separated columns:\n\npaired-end reads: read1_file_name $tab$ read2_file_name $tab$ read_group_line.\nsingle-end reads: read1_file_name $tab$ - $tab$ read_group_line.\nSpaces, but not tabs are allowed in file names.\nIf read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it.\nIf read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read_files_prefix": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. prefix for the read files names, i",
|
|
"help_text": "Type: `string`. prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read_files_command": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. command line to execute for each of the input file",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout\n\nFor example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read_map_number": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-1`. number of reads to map from the beginning of the file\n\n-1: map all reads",
|
|
"help_text": "Type: `integer`, example: `-1`. number of reads to map from the beginning of the file\n\n-1: map all reads"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read_mates_lengths_in": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `NotEqual`. Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same",
|
|
"help_text": "Type: `string`, example: `NotEqual`. Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read_name_separator": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `/`, multiple_sep: `\";\"`. character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)",
|
|
"help_text": "Type: List of `string`, example: `/`, multiple_sep: `\";\"`. character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"read_quality_score_base": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `33`. number to be subtracted from the ASCII code to get Phred quality score",
|
|
"help_text": "Type: `integer`, example: `33`. number to be subtracted from the ASCII code to get Phred quality score"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"read clipping" : {
|
|
"title": "Read Clipping",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"clip_adapter_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Hamming`. adapter clipping type\n\n- Hamming ",
|
|
"help_text": "Type: `string`, example: `Hamming`. adapter clipping type\n\n- Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp\n- CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal\n- None ... no adapter clipping, all other clip* parameters are disregarded"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"clip3p_nbases": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number(s) of bases to clip from 3p of each mate",
|
|
"help_text": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"clip3p_adapter_seq": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. adapter sequences to clip from 3p of each mate",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.\n\n- polyA ... polyA sequence with the length equal to read length"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"clip3p_adapter_mm_p": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `double`, example: `0.1`, multiple_sep: `\";\"`. max proportion of mismatches for 3p adapter clipping for each mate",
|
|
"help_text": "Type: List of `double`, example: `0.1`, multiple_sep: `\";\"`. max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"clip3p_after_adapter_nbases": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number of bases to clip from 3p of each mate after the adapter clipping",
|
|
"help_text": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"clip5p_nbases": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number(s) of bases to clip from 5p of each mate",
|
|
"help_text": "Type: List of `integer`, example: `0`, multiple_sep: `\";\"`. number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"limits" : {
|
|
"title": "Limits",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"limit_genome_generate_ram": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `long`, example: `31000000000`. maximum available RAM (bytes) for genome generation",
|
|
"help_text": "Type: `long`, example: `31000000000`. maximum available RAM (bytes) for genome generation"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"limit_io_buffer_size": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `long`, example: `30000000;50000000`, multiple_sep: `\";\"`. max available buffers size (bytes) for input/output, per thread",
|
|
"help_text": "Type: List of `long`, example: `30000000;50000000`, multiple_sep: `\";\"`. max available buffers size (bytes) for input/output, per thread"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"limit_out_sam_one_read_bytes": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `long`, example: `100000`. max size of the SAM record (bytes) for one read",
|
|
"help_text": "Type: `long`, example: `100000`. max size of the SAM record (bytes) for one read. Recommended value: \u003e(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"limit_out_sj_one_read": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1000`. max number of junctions for one read (including all multi-mappers)",
|
|
"help_text": "Type: `integer`, example: `1000`. max number of junctions for one read (including all multi-mappers)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"limit_out_sj_collapsed": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1000000`. max number of collapsed junctions",
|
|
"help_text": "Type: `integer`, example: `1000000`. max number of collapsed junctions"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"limit_bam_sort_ram": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `long`, example: `0`. maximum available RAM (bytes) for sorting BAM",
|
|
"help_text": "Type: `long`, example: `0`. maximum available RAM (bytes) for sorting BAM. If =0, it will be set to the genome index size. 0 value can only be used with --genome_load NoSharedMemory option."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"limit_sjdb_insert_nsj": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1000000`. maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run",
|
|
"help_text": "Type: `integer`, example: `1000000`. maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"limit_nreads_soft": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-1`. soft limit on the number of reads",
|
|
"help_text": "Type: `integer`, example: `-1`. soft limit on the number of reads"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"output: general" : {
|
|
"title": "Output: general",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"out_tmp_keep": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. whether to keep the temporary files after STAR runs is finished\n\n- None ",
|
|
"help_text": "Type: `string`. whether to keep the temporary files after STAR runs is finished\n\n- None ... remove all temporary files\n- All ... keep all files"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_std": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Log`. which output will be directed to stdout (standard out)\n\n- Log ",
|
|
"help_text": "Type: `string`, example: `Log`. which output will be directed to stdout (standard out)\n\n- Log ... log messages\n- SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out\n- BAM_Unsorted ... alignments in BAM format, unsorted. Requires --out_sam_type BAM Unsorted\n- BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --out_sam_type BAM SortedByCoordinate\n- BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quant_mode TranscriptomeSAM"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_reads_unmapped": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. output of unmapped and partially mapped (i",
|
|
"help_text": "Type: `string`. output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s).\n\n- None ... no output\n- Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_qs_conversion_add": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. add this number to the quality score (e",
|
|
"help_text": "Type: `integer`, example: `0`. add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_multimapper_order": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Old_2.4`. order of multimapping alignments in the output files\n\n- Old_2",
|
|
"help_text": "Type: `string`, example: `Old_2.4`. order of multimapping alignments in the output files\n\n- Old_2.4 ... quasi-random order used before 2.5.0\n- Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"output: sam and bam" : {
|
|
"title": "Output: SAM and BAM",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"out_sam_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `SAM`, multiple_sep: `\";\"`. type of SAM/BAM output\n\n1st word:\n- BAM ",
|
|
"help_text": "Type: List of `string`, example: `SAM`, multiple_sep: `\";\"`. type of SAM/BAM output\n\n1st word:\n- BAM ... output BAM without sorting\n- SAM ... output SAM without sorting\n- None ... no SAM/BAM output\n2nd, 3rd:\n- Unsorted ... standard unsorted\n- SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limit_bam_sort_ram."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_mode": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Full`. mode of SAM output\n\n- None ",
|
|
"help_text": "Type: `string`, example: `Full`. mode of SAM output\n\n- None ... no SAM output\n- Full ... full SAM output\n- NoQS ... full SAM but without quality scores"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_strand_field": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Cufflinks-like strand field flag\n\n- None ",
|
|
"help_text": "Type: `string`. Cufflinks-like strand field flag\n\n- None ... not used\n- intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_attributes": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `Standard`, multiple_sep: `\";\"`. a string of desired SAM attributes, in the order desired for the output SAM",
|
|
"help_text": "Type: List of `string`, example: `Standard`, multiple_sep: `\";\"`. a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order.\n\n***Presets:\n- None ... no attributes\n- Standard ... NH HI AS nM\n- All ... NH HI AS nM NM MD jM jI MC ch\n***Alignment:\n- NH ... number of loci the reads maps to: =1 for unique mappers, \u003e1 for multimappers. Standard SAM tag.\n- HI ... multiple alignment index, starts with --out_sam_attr_ih_start (=1 by default). Standard SAM tag.\n- AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag.\n- nM ... number of mismatches. For PE reads, sum over two mates.\n- NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag.\n- MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag.\n- jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value.\n- jI ... start and end of introns for all junctions (1-based).\n- XS ... alignment strand according to --out_sam_strand_field.\n- MC ... mate\u0027s CIGAR string. Standard SAM tag.\n- ch ... marks all segment of all chimeric alingments for --chim_out_type WithinBAM output.\n- cN ... number of bases clipped from the read ends: 5\u0027 and 3\u0027\n***Variation:\n- vA ... variant allele\n- vG ... genomic coordinate of the variant overlapped by the read.\n- vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --wasp_output_mode SAMtag.\n- ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid .\n***STARsolo:\n- CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing.\n- GX GN ... gene ID and gene name for unique-gene reads.\n- gx gn ... gene IDs and gene names for unique- and multi-gene reads.\n- CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --out_sam_type BAM SortedByCoordinate.\n- sM ... assessment of CB and UMI.\n- sS ... sequence of the entire barcode (CB,UMI,adapter).\n- sQ ... quality of the entire barcode.\n- sF ... type of feature overlap and number of features for each alignment\n***Unsupported/undocumented:\n- rB ... alignment block read/genomic coordinates.\n- vR ... read coordinate of the variant."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_attr_ih_start": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. start value for the IH attribute",
|
|
"help_text": "Type: `integer`, example: `1`. start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_unmapped": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. output of unmapped reads in the SAM format\n\n1st word:\n- None ",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. output of unmapped reads in the SAM format\n\n1st word:\n- None ... no output\n- Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam)\n2nd word:\n- KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_order": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Paired`. type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files",
|
|
"help_text": "Type: `string`, example: `Paired`. type of sorting for the SAM output\n\nPaired: one mate after the other for all paired alignments\nPairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_primary_flag": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `OneBestScore`. which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ",
|
|
"help_text": "Type: `string`, example: `OneBestScore`. which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG\n\n- OneBestScore ... only one alignment with the best score is primary\n- AllBestScore ... all alignments with the best score are primary"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_read_id": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Standard`. read ID record type\n\n- Standard ",
|
|
"help_text": "Type: `string`, example: `Standard`. read ID record type\n\n- Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end\n- Number ... read number (index) in the FASTx file"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_mapq_unique": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `255`. 0 to 255: the MAPQ value for unique mappers",
|
|
"help_text": "Type: `integer`, example: `255`. 0 to 255: the MAPQ value for unique mappers"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_flag_or": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. 0 to 65535: sam FLAG will be bitwise OR\u0027d with this value, i",
|
|
"help_text": "Type: `integer`, example: `0`. 0 to 65535: sam FLAG will be bitwise OR\u0027d with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_flag_and": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `65535`. 0 to 65535: sam FLAG will be bitwise AND\u0027d with this value, i",
|
|
"help_text": "Type: `integer`, example: `65535`. 0 to 65535: sam FLAG will be bitwise AND\u0027d with this value, i.e. FLAG=FLAG \u0026 outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_attr_rg_line": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. SAM/BAM read group line",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. SAM/BAM read group line. The first word contains the read group identifier and must start with \"ID:\", e.g. --out_sam_attr_rg_line ID:xxx CN:yy \"DS:z z z\".\n\nxxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.\nComma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g.\n--out_sam_attr_rg_line ID:xxx , ID:zzz \"DS:z z\" , ID:yyy DS:yyyy"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_header_hd": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. @HD (header) line of the SAM header",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. @HD (header) line of the SAM header"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_header_pg": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. extra @PG (software) line of the SAM header (in addition to STAR)",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. extra @PG (software) line of the SAM header (in addition to STAR)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_header_comment_file": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. path to the file with @CO (comment) lines of the SAM header",
|
|
"help_text": "Type: `string`. path to the file with @CO (comment) lines of the SAM header"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_filter": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. filter the output into main SAM/BAM files\n\n- KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genome_fasta_files at the mapping stage.\n- KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genome_fasta_files at the mapping stage."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_mult_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-1`. max number of multiple alignments for a read that will be output to the SAM/BAM files",
|
|
"help_text": "Type: `integer`, example: `-1`. max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first\n\n- -1 ... all alignments (up to --out_filter_multimap_nmax) will be output"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sam_tlen": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ",
|
|
"help_text": "Type: `integer`, example: `1`. calculation method for the TLEN field in the SAM/BAM files\n\n- 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate\n- 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_bam_compression": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. -1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression",
|
|
"help_text": "Type: `integer`, example: `1`. -1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_bam_sorting_thread_n": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. \u003e=0: number of threads for BAM sorting",
|
|
"help_text": "Type: `integer`, example: `0`. \u003e=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN)."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_bam_sorting_bins_n": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `50`. \u003e0: number of genome bins for coordinate-sorting",
|
|
"help_text": "Type: `integer`, example: `50`. \u003e0: number of genome bins for coordinate-sorting"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"bam processing" : {
|
|
"title": "BAM processing",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"bam_remove_duplicates_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ",
|
|
"help_text": "Type: `string`. mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only\n\n- - ... no duplicate removal/marking\n- UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical\n- UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"bam_remove_duplicates_mate2bases_n": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. number of bases from the 5\u0027 of mate 2 to use in collapsing (e",
|
|
"help_text": "Type: `integer`, example: `0`. number of bases from the 5\u0027 of mate 2 to use in collapsing (e.g. for RAMPAGE)"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"output wiggle" : {
|
|
"title": "Output Wiggle",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"out_wig_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. type of signal output, e",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. type of signal output, e.g. \"bedGraph\" OR \"bedGraph read1_5p\". Requires sorted BAM: --out_sam_type BAM SortedByCoordinate .\n\n1st word:\n- None ... no signal output\n- bedGraph ... bedGraph format\n- wiggle ... wiggle format\n2nd word:\n- read1_5p ... signal from only 5\u0027 of the 1st read, useful for CAGE/RAMPAGE etc\n- read2 ... signal from only 2nd read"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_wig_strand": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Stranded`. strandedness of wiggle/bedGraph output\n\n- Stranded ",
|
|
"help_text": "Type: `string`, example: `Stranded`. strandedness of wiggle/bedGraph output\n\n- Stranded ... separate strands, str1 and str2\n- Unstranded ... collapsed strands"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_wig_references_prefix": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. prefix matching reference names to include in the output wiggle file, e",
|
|
"help_text": "Type: `string`. prefix matching reference names to include in the output wiggle file, e.g. \"chr\", default \"-\" - include all references"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_wig_norm": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `RPM`. type of normalization for the signal\n\n- RPM ",
|
|
"help_text": "Type: `string`, example: `RPM`. type of normalization for the signal\n\n- RPM ... reads per million of mapped reads\n- None ... no normalization, \"raw\" counts"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"output filtering" : {
|
|
"title": "Output Filtering",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"out_filter_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Normal`. type of filtering\n\n- Normal ",
|
|
"help_text": "Type: `string`, example: `Normal`. type of filtering\n\n- Normal ... standard filtering using only current alignment\n- BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_multimap_score_range": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. the score range below the maximum score for multimapping alignments",
|
|
"help_text": "Type: `integer`, example: `1`. the score range below the maximum score for multimapping alignments"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_multimap_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10`. maximum number of loci the read is allowed to map to",
|
|
"help_text": "Type: `integer`, example: `10`. maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value.\n\nOtherwise no alignments will be output, and the read will be counted as \"mapped to too many loci\" in the Log.final.out ."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_mismatch_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10`. alignment will be output only if it has no more mismatches than this value",
|
|
"help_text": "Type: `integer`, example: `10`. alignment will be output only if it has no more mismatches than this value."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_mismatch_nover_lmax": {
|
|
"type":
|
|
"number",
|
|
"description": "Type: `double`, example: `0.3`. alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value",
|
|
"help_text": "Type: `double`, example: `0.3`. alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_mismatch_nover_read_lmax": {
|
|
"type":
|
|
"number",
|
|
"description": "Type: `double`, example: `1.0`. alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value",
|
|
"help_text": "Type: `double`, example: `1.0`. alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_score_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. alignment will be output only if its score is higher than or equal to this value",
|
|
"help_text": "Type: `integer`, example: `0`. alignment will be output only if its score is higher than or equal to this value."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_score_min_over_lread": {
|
|
"type":
|
|
"number",
|
|
"description": "Type: `double`, example: `0.66`. same as outFilterScoreMin, but normalized to read length (sum of mates\u0027 lengths for paired-end reads)",
|
|
"help_text": "Type: `double`, example: `0.66`. same as outFilterScoreMin, but normalized to read length (sum of mates\u0027 lengths for paired-end reads)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_match_nmin": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. alignment will be output only if the number of matched bases is higher than or equal to this value",
|
|
"help_text": "Type: `integer`, example: `0`. alignment will be output only if the number of matched bases is higher than or equal to this value."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_match_nmin_over_lread": {
|
|
"type":
|
|
"number",
|
|
"description": "Type: `double`, example: `0.66`. sam as outFilterMatchNmin, but normalized to the read length (sum of mates\u0027 lengths for paired-end reads)",
|
|
"help_text": "Type: `double`, example: `0.66`. sam as outFilterMatchNmin, but normalized to the read length (sum of mates\u0027 lengths for paired-end reads)."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_intron_motifs": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. filter alignment using their motifs\n\n- None ",
|
|
"help_text": "Type: `string`. filter alignment using their motifs\n\n- None ... no filtering\n- RemoveNoncanonical ... filter out alignments that contain non-canonical junctions\n- RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_filter_intron_strands": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `RemoveInconsistentStrands`. filter alignments\n\n- RemoveInconsistentStrands ",
|
|
"help_text": "Type: `string`, example: `RemoveInconsistentStrands`. filter alignments\n\n- RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands\n- None ... no filtering"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"output splice junctions (sj.out.tab)" : {
|
|
"title": "Output splice junctions (SJ.out.tab)",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"out_sj_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Standard`. type of splice junction output\n\n- Standard ",
|
|
"help_text": "Type: `string`, example: `Standard`. type of splice junction output\n\n- Standard ... standard SJ.out.tab output\n- None ... no splice junction output"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"output filtering: splice junctions" : {
|
|
"title": "Output Filtering: Splice Junctions",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"out_sj_filter_reads": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `All`. which reads to consider for collapsed splice junctions output\n\n- All ",
|
|
"help_text": "Type: `string`, example: `All`. which reads to consider for collapsed splice junctions output\n\n- All ... all reads, unique- and multi-mappers\n- Unique ... uniquely mapping reads only"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sj_filter_overhang_min": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `30;12;12;12`, multiple_sep: `\";\"`. minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif",
|
|
"help_text": "Type: List of `integer`, example: `30;12;12;12`, multiple_sep: `\";\"`. minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\ndoes not apply to annotated junctions"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sj_filter_count_unique_min": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `3;1;1;1`, multiple_sep: `\";\"`. minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif",
|
|
"help_text": "Type: List of `integer`, example: `3;1;1;1`, multiple_sep: `\";\"`. minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sj_filter_count_total_min": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `3;1;1;1`, multiple_sep: `\";\"`. minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif",
|
|
"help_text": "Type: List of `integer`, example: `3;1;1;1`, multiple_sep: `\";\"`. minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif\n\nJunctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied\ndoes not apply to annotated junctions"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sj_filter_dist_to_other_sj_min": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `10;0;5;10`, multiple_sep: `\";\"`. minimum allowed distance to other junctions\u0027 donor/acceptor\n\ndoes not apply to annotated junctions",
|
|
"help_text": "Type: List of `integer`, example: `10;0;5;10`, multiple_sep: `\";\"`. minimum allowed distance to other junctions\u0027 donor/acceptor\n\ndoes not apply to annotated junctions"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"out_sj_filter_intron_max_vs_read_n": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `50000;100000;200000`, multiple_sep: `\";\"`. maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni",
|
|
"help_text": "Type: List of `integer`, example: `50000;100000;200000`, multiple_sep: `\";\"`. maximum gap allowed for junctions supported by 1,2,3,,,N reads\n\ni.e. by default junctions supported by 1 read can have gaps \u003c=50000b, by 2 reads: \u003c=100000b, by 3 reads: \u003c=200000. by \u003e=4 reads any gap \u003c=alignIntronMax\ndoes not apply to annotated junctions"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"scoring" : {
|
|
"title": "Scoring",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"score_gap": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. splice junction penalty (independent on intron motif)",
|
|
"help_text": "Type: `integer`, example: `0`. splice junction penalty (independent on intron motif)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_gap_noncan": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-8`. non-canonical junction penalty (in addition to scoreGap)",
|
|
"help_text": "Type: `integer`, example: `-8`. non-canonical junction penalty (in addition to scoreGap)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_gap_gcag": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-4`. GC/AG and CT/GC junction penalty (in addition to scoreGap)",
|
|
"help_text": "Type: `integer`, example: `-4`. GC/AG and CT/GC junction penalty (in addition to scoreGap)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_gap_atac": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-8`. AT/AC and GT/AT junction penalty (in addition to scoreGap)",
|
|
"help_text": "Type: `integer`, example: `-8`. AT/AC and GT/AT junction penalty (in addition to scoreGap)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_genomic_length_log2scale": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)",
|
|
"help_text": "Type: `integer`, example: `0`. extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_del_open": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-2`. deletion open penalty",
|
|
"help_text": "Type: `integer`, example: `-2`. deletion open penalty"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_del_base": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-2`. deletion extension penalty per base (in addition to scoreDelOpen)",
|
|
"help_text": "Type: `integer`, example: `-2`. deletion extension penalty per base (in addition to scoreDelOpen)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_ins_open": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-2`. insertion open penalty",
|
|
"help_text": "Type: `integer`, example: `-2`. insertion open penalty"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_ins_base": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-2`. insertion extension penalty per base (in addition to scoreInsOpen)",
|
|
"help_text": "Type: `integer`, example: `-2`. insertion extension penalty per base (in addition to scoreInsOpen)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"score_stitch_sj_shift": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. maximum score reduction while searching for SJ boundaries in the stitching step",
|
|
"help_text": "Type: `integer`, example: `1`. maximum score reduction while searching for SJ boundaries in the stitching step"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"alignments and seeding" : {
|
|
"title": "Alignments and Seeding",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"seed_search_start_lmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `50`. defines the search start point through the read - the read is split into pieces no longer than this value",
|
|
"help_text": "Type: `integer`, example: `50`. defines the search start point through the read - the read is split into pieces no longer than this value"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seed_search_start_lmax_over_lread": {
|
|
"type":
|
|
"number",
|
|
"description": "Type: `double`, example: `1.0`. seedSearchStartLmax normalized to read length (sum of mates\u0027 lengths for paired-end reads)",
|
|
"help_text": "Type: `double`, example: `1.0`. seedSearchStartLmax normalized to read length (sum of mates\u0027 lengths for paired-end reads)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seed_search_lmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. defines the maximum length of the seeds, if =0 seed length is not limited",
|
|
"help_text": "Type: `integer`, example: `0`. defines the maximum length of the seeds, if =0 seed length is not limited"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seed_multimap_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10000`. only pieces that map fewer than this value are utilized in the stitching procedure",
|
|
"help_text": "Type: `integer`, example: `10000`. only pieces that map fewer than this value are utilized in the stitching procedure"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seed_per_read_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1000`. max number of seeds per read",
|
|
"help_text": "Type: `integer`, example: `1000`. max number of seeds per read"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seed_per_window_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `50`. max number of seeds per window",
|
|
"help_text": "Type: `integer`, example: `50`. max number of seeds per window"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seed_none_loci_per_window": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10`. max number of one seed loci per window",
|
|
"help_text": "Type: `integer`, example: `10`. max number of one seed loci per window"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seed_split_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `12`. min length of the seed sequences split by Ns or mate gap",
|
|
"help_text": "Type: `integer`, example: `12`. min length of the seed sequences split by Ns or mate gap"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"seed_map_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `5`. min length of seeds to be mapped",
|
|
"help_text": "Type: `integer`, example: `5`. min length of seeds to be mapped"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_intron_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `21`. minimum intron size, genomic gap is considered intron if its length\u003e=alignIntronMin, otherwise it is considered Deletion",
|
|
"help_text": "Type: `integer`, example: `21`. minimum intron size, genomic gap is considered intron if its length\u003e=alignIntronMin, otherwise it is considered Deletion"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_intron_max": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins",
|
|
"help_text": "Type: `integer`, example: `0`. maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_mates_gap_max": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins",
|
|
"help_text": "Type: `integer`, example: `0`. maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_sj_overhang_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `5`. minimum overhang (i",
|
|
"help_text": "Type: `integer`, example: `5`. minimum overhang (i.e. block size) for spliced alignments"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_sj_stitch_mismatch_nmax": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `integer`, example: `0;-1;0;0`, multiple_sep: `\";\"`. maximum number of mismatches for stitching of the splice junctions (-1: no limit)",
|
|
"help_text": "Type: List of `integer`, example: `0;-1;0;0`, multiple_sep: `\";\"`. maximum number of mismatches for stitching of the splice junctions (-1: no limit).\n\n(1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_sjdb_overhang_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `3`. minimum overhang (i",
|
|
"help_text": "Type: `integer`, example: `3`. minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_spliced_mate_map_lmin": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. minimum mapped length for a read mate that is spliced",
|
|
"help_text": "Type: `integer`, example: `0`. minimum mapped length for a read mate that is spliced"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_spliced_mate_map_lmin_over_lmate": {
|
|
"type":
|
|
"number",
|
|
"description": "Type: `double`, example: `0.66`. alignSplicedMateMapLmin normalized to mate length",
|
|
"help_text": "Type: `double`, example: `0.66`. alignSplicedMateMapLmin normalized to mate length"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_windows_per_read_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10000`. max number of windows per read",
|
|
"help_text": "Type: `integer`, example: `10000`. max number of windows per read"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_transcripts_per_window_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `100`. max number of transcripts per window",
|
|
"help_text": "Type: `integer`, example: `100`. max number of transcripts per window"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_transcripts_per_read_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10000`. max number of different alignments per read to consider",
|
|
"help_text": "Type: `integer`, example: `10000`. max number of different alignments per read to consider"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_ends_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Local`. type of read ends alignment\n\n- Local ",
|
|
"help_text": "Type: `string`, example: `Local`. type of read ends alignment\n\n- Local ... standard local alignment with soft-clipping allowed\n- EndToEnd ... force end-to-end read alignment, do not soft-clip\n- Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment\n- Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_ends_protrude": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `0 ConcordantPair`. allow protrusion of alignment ends, i",
|
|
"help_text": "Type: `string`, example: `0 ConcordantPair`. allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate\n\n1st word: int: maximum number of protrusion bases allowed\n2nd word: string:\n- ConcordantPair ... report alignments with non-zero protrusion as concordant pairs\n- DiscordantPair ... report alignments with non-zero protrusion as discordant pairs"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_soft_clip_at_reference_ends": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Yes`. allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ",
|
|
"help_text": "Type: `string`, example: `Yes`. allow the soft-clipping of the alignments past the end of the chromosomes\n\n- Yes ... allow\n- No ... prohibit, useful for compatibility with Cufflinks"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"align_insertion_flush": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. how to flush ambiguous insertion positions\n\n- None ",
|
|
"help_text": "Type: `string`. how to flush ambiguous insertion positions\n\n- None ... insertions are not flushed\n- Right ... insertions are flushed to the right"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"paired-end reads" : {
|
|
"title": "Paired-End reads",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"pe_overlap_nbases_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. minimum number of overlapping bases to trigger mates merging and realignment",
|
|
"help_text": "Type: `integer`, example: `0`. minimum number of overlapping bases to trigger mates merging and realignment. Specify \u003e0 value to switch on the \"merginf of overlapping mates\" algorithm."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"pe_overlap_mm_p": {
|
|
"type":
|
|
"number",
|
|
"description": "Type: `double`, example: `0.01`. maximum proportion of mismatched bases in the overlap area",
|
|
"help_text": "Type: `double`, example: `0.01`. maximum proportion of mismatched bases in the overlap area"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"windows, anchors, binning" : {
|
|
"title": "Windows, Anchors, Binning",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"win_anchor_multimap_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `50`. max number of loci anchors are allowed to map to",
|
|
"help_text": "Type: `integer`, example: `50`. max number of loci anchors are allowed to map to"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"win_bin_nbits": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `16`. =log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins",
|
|
"help_text": "Type: `integer`, example: `16`. =log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"win_anchor_dist_nbins": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `9`. max number of bins between two anchors that allows aggregation of anchors into one window",
|
|
"help_text": "Type: `integer`, example: `9`. max number of bins between two anchors that allows aggregation of anchors into one window"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"win_flank_nbins": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `4`. log2(winFlank), where win Flank is the size of the left and right flanking regions for each window",
|
|
"help_text": "Type: `integer`, example: `4`. log2(winFlank), where win Flank is the size of the left and right flanking regions for each window"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"win_read_coverage_relative_min": {
|
|
"type":
|
|
"number",
|
|
"description": "Type: `double`, example: `0.5`. minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only",
|
|
"help_text": "Type: `double`, example: `0.5`. minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"win_read_coverage_bases_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. minimum number of bases covered by the seeds in a window , for STARlong algorithm only",
|
|
"help_text": "Type: `integer`, example: `0`. minimum number of bases covered by the seeds in a window , for STARlong algorithm only."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"chimeric alignments" : {
|
|
"title": "Chimeric Alignments",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"chim_out_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `Junctions`, multiple_sep: `\";\"`. type of chimeric output\n\n- Junctions ",
|
|
"help_text": "Type: List of `string`, example: `Junctions`, multiple_sep: `\";\"`. type of chimeric output\n\n- Junctions ... Chimeric.out.junction\n- SeparateSAMold ... output old SAM into separate Chimeric.out.sam file\n- WithinBAM ... output into main aligned BAM files (Aligned.*.bam)\n- WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present)\n- WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_segment_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. minimum length of chimeric segment length, if ==0, no chimeric output",
|
|
"help_text": "Type: `integer`, example: `0`. minimum length of chimeric segment length, if ==0, no chimeric output"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_score_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. minimum total (summed) score of the chimeric segments",
|
|
"help_text": "Type: `integer`, example: `0`. minimum total (summed) score of the chimeric segments"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_score_drop_max": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `20`. max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length",
|
|
"help_text": "Type: `integer`, example: `20`. max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_score_separation": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10`. minimum difference (separation) between the best chimeric score and the next one",
|
|
"help_text": "Type: `integer`, example: `10`. minimum difference (separation) between the best chimeric score and the next one"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_score_junction_non_gtag": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-1`. penalty for a non-GT/AG chimeric junction",
|
|
"help_text": "Type: `integer`, example: `-1`. penalty for a non-GT/AG chimeric junction"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_junction_overhang_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `20`. minimum overhang for a chimeric junction",
|
|
"help_text": "Type: `integer`, example: `20`. minimum overhang for a chimeric junction"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_segment_read_gap_max": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. maximum gap in the read sequence between chimeric segments",
|
|
"help_text": "Type: `integer`, example: `0`. maximum gap in the read sequence between chimeric segments"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_filter": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `banGenomicN`, multiple_sep: `\";\"`. different filters for chimeric alignments\n\n- None ",
|
|
"help_text": "Type: List of `string`, example: `banGenomicN`, multiple_sep: `\";\"`. different filters for chimeric alignments\n\n- None ... no filtering\n- banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_main_segment_mult_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10`. maximum number of multi-alignments for the main chimeric segment",
|
|
"help_text": "Type: `integer`, example: `10`. maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_multimap_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. maximum number of chimeric multi-alignments\n\n- 0 ",
|
|
"help_text": "Type: `integer`, example: `0`. maximum number of chimeric multi-alignments\n\n- 0 ... use the old scheme for chimeric detection which only considered unique alignments"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_multimap_score_range": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. the score range for multi-mapping chimeras below the best chimeric score",
|
|
"help_text": "Type: `integer`, example: `1`. the score range for multi-mapping chimeras below the best chimeric score. Only works with --chim_multimap_nmax \u003e 1"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_nonchim_score_drop_min": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `20`. to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value",
|
|
"help_text": "Type: `integer`, example: `20`. to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"chim_out_junction_format": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. formatting type for the Chimeric",
|
|
"help_text": "Type: `integer`, example: `0`. formatting type for the Chimeric.out.junction file\n\n- 0 ... no comment lines/headers\n- 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"quantification of annotations" : {
|
|
"title": "Quantification of Annotations",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"quant_mode": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. types of quantification requested\n\n- - ",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. types of quantification requested\n\n- - ... none\n- TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file\n- GeneCounts ... count reads per gene"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"quant_transcriptome_bam_compression": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. -2 to 10 transcriptome BAM compression level\n\n- -2 ",
|
|
"help_text": "Type: `integer`, example: `1`. -2 to 10 transcriptome BAM compression level\n\n- -2 ... no BAM output\n- -1 ... default compression (6?)\n- 0 ... no compression\n- 10 ... maximum compression"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"quant_transcriptome_sam_output": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `BanSingleEnd_BanIndels_ExtendSoftclip`. alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip ",
|
|
"help_text": "Type: `string`, example: `BanSingleEnd_BanIndels_ExtendSoftclip`. alignment filtering for TranscriptomeSAM output\n\n- BanSingleEnd_BanIndels_ExtendSoftclip ... prohibit indels and single-end alignments, extend softclips - compatible with RSEM\n- BanSingleEnd ... prohibit single-end alignments, allow indels and softclips\n- BanSingleEnd_ExtendSoftclip ... prohibit single-end alignments, extend softclips, allow indels"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"2-pass mapping" : {
|
|
"title": "2-pass Mapping",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"twopass_mode": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. 2-pass mapping mode",
|
|
"help_text": "Type: `string`. 2-pass mapping mode.\n\n- None ... 1-pass mapping\n- Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"twopass1reads_n": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `-1`. number of reads to process for the 1st step",
|
|
"help_text": "Type: `integer`, example: `-1`. number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step."
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"wasp parameters" : {
|
|
"title": "WASP parameters",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"wasp_output_mode": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. WASP allele-specific output type",
|
|
"help_text": "Type: `string`. WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad \u0026 Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 .\n\n- SAMtag ... add WASP tags to the alignments that pass WASP filtering"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"starsolo (single cell rna-seq) parameters" : {
|
|
"title": "STARsolo (single cell RNA-seq) parameters",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"solo_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. type of single-cell RNA-seq\n\n- CB_UMI_Simple ",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. type of single-cell RNA-seq\n\n- CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium.\n- CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq).\n- CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --out_sam_type BAM Unsorted [and/or SortedByCoordinate]\n- SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases)"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_cb_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Sequence`. cell barcode type\n\nSequence: cell barcode is a sequence (standard option)\nString: cell barcode is an arbitrary string",
|
|
"help_text": "Type: `string`, example: `Sequence`. cell barcode type\n\nSequence: cell barcode is a sequence (standard option)\nString: cell barcode is an arbitrary string"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_cb_whitelist": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. file(s) with whitelist(s) of cell barcodes",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. file(s) with whitelist(s) of cell barcodes. Only --solo_type CB_UMI_Complex allows more than one whitelist file.\n\n- None ... no whitelist: all cell barcodes are allowed"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_cb_start": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. cell barcode start base",
|
|
"help_text": "Type: `integer`, example: `1`. cell barcode start base"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_cb_len": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `16`. cell barcode length",
|
|
"help_text": "Type: `integer`, example: `16`. cell barcode length"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_umi_start": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `17`. UMI start base",
|
|
"help_text": "Type: `integer`, example: `17`. UMI start base"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_umi_len": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `10`. UMI length",
|
|
"help_text": "Type: `integer`, example: `10`. UMI length"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_barcode_read_length": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. length of the barcode read\n\n- 1 ",
|
|
"help_text": "Type: `integer`, example: `1`. length of the barcode read\n\n- 1 ... equal to sum of soloCBlen+soloUMIlen\n- 0 ... not defined, do not check"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_barcode_mate": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `0`. identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ",
|
|
"help_text": "Type: `integer`, example: `0`. identifies which read mate contains the barcode (CB+UMI) sequence\n\n- 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed\n- 1 ... barcode sequence is a part of mate 1\n- 2 ... barcode sequence is a part of mate 2"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_cb_position": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. position of Cell Barcode(s) on the barcode read",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. position of Cell Barcode(s) on the barcode read.\n\nPresently only works with --solo_type CB_UMI_Complex, and barcodes are assumed to be on Read2.\nFormat for each barcode: startAnchor_startPosition_endAnchor_endPosition\nstart(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end\nstart(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base\nString for different barcodes are separated by space.\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--solo_cb_position 0_0_2_-1 3_1_3_8"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_umi_position": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat",
|
|
"help_text": "Type: `string`. position of the UMI on the barcode read, same as soloCBposition\n\nExample: inDrop (Zilionis et al, Nat. Protocols, 2017):\n--solo_cb_position 3_9_3_14"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_adapter_sequence": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. adapter sequence to anchor barcodes",
|
|
"help_text": "Type: `string`. adapter sequence to anchor barcodes. Only one adapter sequence is allowed."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_adapter_mismatches_nmax": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, example: `1`. maximum number of mismatches allowed in adapter sequence",
|
|
"help_text": "Type: `integer`, example: `1`. maximum number of mismatches allowed in adapter sequence."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_cb_match_wl_type": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `1MM_multi`. matching the Cell Barcodes to the WhiteList\n\n- Exact ",
|
|
"help_text": "Type: `string`, example: `1MM_multi`. matching the Cell Barcodes to the WhiteList\n\n- Exact ... only exact matches allowed\n- 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match.\n- 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches.\nAllowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0\n- 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes.\n- 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger \u003e= 3.0.0\n- EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --solo_type CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_input_sam_attr_barcode_seq": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order)",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_seq CR UR .\nThis parameter is required when running STARsolo with input from SAM."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_input_sam_attr_barcode_qual": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order)",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order).\n\nFor instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_qual CY UY .\nIf this parameter is \u0027-\u0027 (default), the quality \u0027H\u0027 will be assigned to all bases."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_strand": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `Forward`. strandedness of the solo libraries:\n\n- Unstranded ",
|
|
"help_text": "Type: `string`, example: `Forward`. strandedness of the solo libraries:\n\n- Unstranded ... no strand information\n- Forward ... read strand same as the original RNA molecule\n- Reverse ... read strand opposite to the original RNA molecule"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_features": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `Gene`, multiple_sep: `\";\"`. genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ",
|
|
"help_text": "Type: List of `string`, example: `Gene`, multiple_sep: `\";\"`. genomic features for which the UMI counts per Cell Barcode are collected\n\n- Gene ... genes: reads match the gene transcript\n- SJ ... splice junctions: reported in SJ.out.tab\n- GeneFull ... full gene (pre-mRNA): count all reads overlapping genes\u0027 exons and introns\n- GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes\u0027 exons and introns: prioritize 100% overlap with exons\n- GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes\u0027 exons and introns: prioritize \u003e50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_multi_mappers": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `Unique`, multiple_sep: `\";\"`. counting method for reads mapping to multiple genes\n\n- Unique ",
|
|
"help_text": "Type: List of `string`, example: `Unique`, multiple_sep: `\";\"`. counting method for reads mapping to multiple genes\n\n- Unique ... count only reads that map to unique genes\n- Uniform ... uniformly distribute multi-genic UMIs to all genes\n- Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM)\n- PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not.\n- EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_umi_dedup": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `1MM_All`, multiple_sep: `\";\"`. type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ",
|
|
"help_text": "Type: List of `string`, example: `1MM_All`, multiple_sep: `\";\"`. type of UMI deduplication (collapsing) algorithm\n\n- 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once).\n- 1MM_Directional_UMItools ... follows the \"directional\" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017).\n- 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs\n- Exact ... only exactly matching UMIs are collapsed.\n- NoDedup ... no deduplication of UMIs, count all reads.\n- 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_umi_filtering": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, multiple_sep: `\";\"`. type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ",
|
|
"help_text": "Type: List of `string`, multiple_sep: `\";\"`. type of UMI filtering (for reads uniquely mapping to genes)\n\n- - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0).\n- MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene.\n- MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene.\n- MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger \u003e 3.0.0 .\nOnly works with --solo_umi_dedup 1MM_CR"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_out_file_names": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `Solo.out/;features.tsv;barcodes.tsv;matrix.mtx`, multiple_sep: `\";\"`. file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix",
|
|
"help_text": "Type: List of `string`, example: `Solo.out/;features.tsv;barcodes.tsv;matrix.mtx`, multiple_sep: `\";\"`. file names for STARsolo output:\n\nfile_name_prefix gene_names barcode_sequences cell_feature_count_matrix"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_cell_filter": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `CellRanger2.2;3000;0.99;10`, multiple_sep: `\";\"`. cell filtering type and parameters\n\n- None ",
|
|
"help_text": "Type: List of `string`, example: `CellRanger2.2;3000;0.99;10`, multiple_sep: `\";\"`. cell filtering type and parameters\n\n- None ... do not output filtered cells\n- TopCells ... only report top cells by UMI count, followed by the exact number of cells\n- CellRanger2.2 ... simple filtering of CellRanger 2.2.\nCan be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count\nThe harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10\n- EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y\nCan be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN\nThe harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_out_format_features_gene_field3": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: List of `string`, example: `Gene Expression`, multiple_sep: `\";\"`. field 3 in the Gene features",
|
|
"help_text": "Type: List of `string`, example: `Gene Expression`, multiple_sep: `\";\"`. field 3 in the Gene features.tsv file. If \"-\", then no 3rd field is output."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"solo_cell_read_stats": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. Output reads statistics for each CB\n\n- Standard ",
|
|
"help_text": "Type: `string`. Output reads statistics for each CB\n\n- Standard ... standard output"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"nextflow input-output arguments" : {
|
|
"title": "Nextflow input-output arguments",
|
|
"type": "object",
|
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
|
"properties": {
|
|
|
|
|
|
"publish_dir": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"param_list": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
|
"hidden": true
|
|
|
|
}
|
|
|
|
|
|
}
|
|
}
|
|
},
|
|
"allOf": [
|
|
|
|
{
|
|
"$ref": "#/definitions/inputs"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/outputs"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/run parameters"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/genome parameters"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/splice junctions database"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/variation parameters"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/read parameters"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/read clipping"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/limits"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/output: general"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/output: sam and bam"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/bam processing"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/output wiggle"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/output filtering"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/output splice junctions (sj.out.tab)"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/output filtering: splice junctions"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/scoring"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/alignments and seeding"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/paired-end reads"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/windows, anchors, binning"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/chimeric alignments"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/quantification of annotations"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/2-pass mapping"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/wasp parameters"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/starsolo (single cell rna-seq) parameters"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/nextflow input-output arguments"
|
|
}
|
|
]
|
|
}
|