Build pipeline: vsh-ci-dev-gckj5
Source commit: da62b4ffe3
Source message: Add labels to qc_test component
229 lines
8.3 KiB
JSON
229 lines
8.3 KiB
JSON
{
|
|
"$schema": "http://json-schema.org/draft-07/schema",
|
|
"title": "pad_tokenize",
|
|
"description": "Tokenize and pad a batch of data for scGPT integration zero-shot inference or fine-tuning.\n",
|
|
"type": "object",
|
|
"definitions": {
|
|
|
|
|
|
|
|
"inputs" : {
|
|
"title": "Inputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"input": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, example: `input.h5mu`. The input h5mu file of pre-processed data",
|
|
"help_text": "Type: `file`, required, example: `input.h5mu`. The input h5mu file of pre-processed data.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"modality": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `rna`. ",
|
|
"help_text": "Type: `string`, default: `rna`. "
|
|
,
|
|
"default": "rna"
|
|
}
|
|
|
|
|
|
,
|
|
"model_vocab": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, example: `vocab.json`. Path to model vocabulary file",
|
|
"help_text": "Type: `file`, required, example: `vocab.json`. Path to model vocabulary file.\n"
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"input_layer": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `binned`. The name of the layer to be padded and tokenized",
|
|
"help_text": "Type: `string`, default: `binned`. The name of the layer to be padded and tokenized.\n"
|
|
,
|
|
"default": "binned"
|
|
}
|
|
|
|
|
|
,
|
|
"var_gene_names": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`. The name of the ",
|
|
"help_text": "Type: `string`. The name of the .var column containing gene names. When no gene_name_layer is provided, the .var index will be used.\n"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"outputs" : {
|
|
"title": "Outputs",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"output": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. The output h5mu file containing obsm arrays for gene tokens, tokenized data and padding mask",
|
|
"help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. The output h5mu file containing obsm arrays for gene tokens, tokenized data and padding mask.\n"
|
|
,
|
|
"default": "$id.$key.output.h5mu"
|
|
}
|
|
|
|
|
|
,
|
|
"output_compression": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression type for the output file",
|
|
"help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression type for the output file.\n",
|
|
"enum": ["gzip", "lzf"]
|
|
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"obsm_gene_tokens": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `gene_id_tokens`, example: `values.pt`. The key of the ",
|
|
"help_text": "Type: `string`, default: `gene_id_tokens`, example: `values.pt`. The key of the .obsm array containing the gene token ids\n"
|
|
,
|
|
"default": "gene_id_tokens"
|
|
}
|
|
|
|
|
|
,
|
|
"obsm_tokenized_values": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `values_tokenized`. The key of the ",
|
|
"help_text": "Type: `string`, default: `values_tokenized`. The key of the .obsm array containing the count values of the tokenized genes\n"
|
|
,
|
|
"default": "values_tokenized"
|
|
}
|
|
|
|
|
|
,
|
|
"obsm_padding_mask": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `padding_mask`. The key of the ",
|
|
"help_text": "Type: `string`, default: `padding_mask`. The key of the .obsm array containing the padding mask.\n"
|
|
,
|
|
"default": "padding_mask"
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"arguments" : {
|
|
"title": "Arguments",
|
|
"type": "object",
|
|
"description": "No description",
|
|
"properties": {
|
|
|
|
|
|
"pad_token": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, default: `\u003cpad\u003e`. Token used for padding",
|
|
"help_text": "Type: `string`, default: `\u003cpad\u003e`. Token used for padding.\n"
|
|
,
|
|
"default": "<pad>"
|
|
}
|
|
|
|
|
|
,
|
|
"pad_value": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`, default: `-2`. The value of the padding token",
|
|
"help_text": "Type: `integer`, default: `-2`. The value of the padding token.\n"
|
|
,
|
|
"default": "-2"
|
|
}
|
|
|
|
|
|
,
|
|
"max_seq_len": {
|
|
"type":
|
|
"integer",
|
|
"description": "Type: `integer`. The maximum sequence length of the tokenized data",
|
|
"help_text": "Type: `integer`. The maximum sequence length of the tokenized data.\n"
|
|
|
|
}
|
|
|
|
|
|
}
|
|
},
|
|
|
|
|
|
"nextflow input-output arguments" : {
|
|
"title": "Nextflow input-output arguments",
|
|
"type": "object",
|
|
"description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
|
|
"properties": {
|
|
|
|
|
|
"publish_dir": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, required, example: `output/`. Path to an output directory",
|
|
"help_text": "Type: `string`, required, example: `output/`. Path to an output directory."
|
|
|
|
}
|
|
|
|
|
|
,
|
|
"param_list": {
|
|
"type":
|
|
"string",
|
|
"description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
|
|
"help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
|
|
"hidden": true
|
|
|
|
}
|
|
|
|
|
|
}
|
|
}
|
|
},
|
|
"allOf": [
|
|
|
|
{
|
|
"$ref": "#/definitions/inputs"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/outputs"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/arguments"
|
|
},
|
|
|
|
{
|
|
"$ref": "#/definitions/nextflow input-output arguments"
|
|
}
|
|
]
|
|
}
|