openpipeline/target/nextflow/scgpt/pad_tokenize/nextflow_schema.json

{
"$schema": "http://json-schema.org/draft-07/schema",
"title": "pad_tokenize",
"description": "Tokenize and pad a batch of data for scGPT integration zero-shot inference or fine-tuning.\n",
"type": "object",
"definitions": {


    "inputs" : {
    "title": "Inputs",
    "type": "object",
    "description": "No description",
    "properties": {


                "input": {
                "type":
                "string",
                "description": "Type: `file`, required, example: `input.h5mu`. The input h5mu file of pre-processed data",
                "help_text": "Type: `file`, required, example: `input.h5mu`. The input h5mu file of pre-processed data.\n"

            }


        ,
                "modality": {
                "type":
                "string",
                "description": "Type: `string`, default: `rna`. ",
                "help_text": "Type: `string`, default: `rna`. "
            ,
                "default": "rna"
            }


        ,
                "model_vocab": {
                "type":
                "string",
                "description": "Type: `file`, required, example: `vocab.json`. Path to model vocabulary file",
                "help_text": "Type: `file`, required, example: `vocab.json`. Path to model vocabulary file.\n"

            }


        ,
                "input_layer": {
                "type":
                "string",
                "description": "Type: `string`, default: `binned`. The name of the layer to be padded and tokenized",
                "help_text": "Type: `string`, default: `binned`. The name of the layer to be padded and tokenized.\n"
            ,
                "default": "binned"
            }


        ,
                "var_gene_names": {
                "type":
                "string",
                "description": "Type: `string`. The name of the ",
                "help_text": "Type: `string`. The name of the .var column containing gene names. When no gene_name_layer is provided, the .var index will be used.\n"

            }


}
},


    "outputs" : {
    "title": "Outputs",
    "type": "object",
    "description": "No description",
    "properties": {


                "output": {
                "type":
                "string",
                "description": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. The output h5mu file containing obsm arrays for gene tokens, tokenized data and padding mask",
                "help_text": "Type: `file`, required, default: `$id.$key.output.h5mu`, example: `output.h5mu`. The output h5mu file containing obsm arrays for gene tokens, tokenized data and padding mask.\n"
            ,
                "default": "$id.$key.output.h5mu"
            }


        ,
                "output_compression": {
                "type":
                "string",
                "description": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression type for the output file",
                "help_text": "Type: `string`, example: `gzip`, choices: ``gzip`, `lzf``. The compression type for the output file.\n",
                "enum": ["gzip", "lzf"]


            }


        ,
                "obsm_gene_tokens": {
                "type":
                "string",
                "description": "Type: `string`, default: `gene_id_tokens`, example: `values.pt`. The key of the ",
                "help_text": "Type: `string`, default: `gene_id_tokens`, example: `values.pt`. The key of the .obsm array containing the gene token ids\n"
            ,
                "default": "gene_id_tokens"
            }


        ,
                "obsm_tokenized_values": {
                "type":
                "string",
                "description": "Type: `string`, default: `values_tokenized`. The key of the ",
                "help_text": "Type: `string`, default: `values_tokenized`. The key of the .obsm array containing the count values of the tokenized genes\n"
            ,
                "default": "values_tokenized"
            }


        ,
                "obsm_padding_mask": {
                "type":
                "string",
                "description": "Type: `string`, default: `padding_mask`. The key of the ",
                "help_text": "Type: `string`, default: `padding_mask`. The key of the .obsm array containing the padding mask.\n"
            ,
                "default": "padding_mask"
            }


}
},


    "arguments" : {
    "title": "Arguments",
    "type": "object",
    "description": "No description",
    "properties": {


                "pad_token": {
                "type":
                "string",
                "description": "Type: `string`, default: `\u003cpad\u003e`. Token used for padding",
                "help_text": "Type: `string`, default: `\u003cpad\u003e`. Token used for padding.\n"
            ,
                "default": "<pad>"
            }


        ,
                "pad_value": {
                "type":
                "integer",
                "description": "Type: `integer`, default: `-2`. The value of the padding token",
                "help_text": "Type: `integer`, default: `-2`. The value of the padding token.\n"
            ,
                "default": "-2"
            }


        ,
                "max_seq_len": {
                "type":
                "integer",
                "description": "Type: `integer`. The maximum sequence length of the tokenized data",
                "help_text": "Type: `integer`. The maximum sequence length of the tokenized data.\n"

            }


}
},


    "nextflow input-output arguments" : {
    "title": "Nextflow input-output arguments",
    "type": "object",
    "description": "Input/output parameters for Nextflow itself. Please note that both publishDir and publish_dir are supported but at least one has to be configured.",
    "properties": {


                "publish_dir": {
                "type":
                "string",
                "description": "Type: `string`, required, example: `output/`. Path to an output directory",
                "help_text": "Type: `string`, required, example: `output/`. Path to an output directory."

            }


        ,
                "param_list": {
                "type":
                "string",
                "description": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel",
                "help_text": "Type: `string`, example: `my_params.yaml`. Allows inputting multiple parameter sets to initialise a Nextflow channel. A `param_list` can either be a list of maps, a csv file, a json file, a yaml file, or simply a yaml blob.\n\n* A list of maps (as-is) where the keys of each map corresponds to the arguments of the pipeline. Example: in a `nextflow.config` file: `param_list: [ [\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027], [\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027] ]`.\n* A csv file should have column names which correspond to the different arguments of this pipeline. Example: `--param_list data.csv` with columns `id,input`.\n* A json or a yaml file should be a list of maps, each of which has keys corresponding to the arguments of the pipeline. Example: `--param_list data.json` with contents `[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]`.\n* A yaml blob can also be passed directly as a string. Example: `--param_list \"[ {\u0027id\u0027: \u0027foo\u0027, \u0027input\u0027: \u0027foo.txt\u0027}, {\u0027id\u0027: \u0027bar\u0027, \u0027input\u0027: \u0027bar.txt\u0027} ]\"`.\n\nWhen passing a csv, json or yaml file, relative path names are relativized to the location of the parameter file. No relativation is performed when `param_list` is a list of maps (as-is) or a yaml blob.",
                "hidden": true

            }


}
}
},
"allOf": [

    {
    "$ref": "#/definitions/inputs"
    },

    {
    "$ref": "#/definitions/outputs"
    },

    {
    "$ref": "#/definitions/arguments"
    },

    {
    "$ref": "#/definitions/nextflow input-output arguments"
    }
]
}