delete /execs
delete /execs/{execId}
post /execs/{execId}/deploy
get /execs/{execId}
get /execs/{execId}/log
get /execs/{execId}/result
post /execs/{execId}/stop
get /execs/frameworks
get /execs
post /execs
post /hypersearch/algorithm/debug
delete /hypersearch
delete /hypersearch/{hpoName}
delete /hypersearch/algorithm/{algoName}
get /hypersearch
get /hypersearch/algorithm
get /hypersearch/algorithm/{algoName}
get /hypersearch/{hpoName}
post /hypersearch/algorithm/install
put /hypersearch/{hpoName}/restart
post /hypersearch
put /hypersearch/{hpoName}
put /hypersearch/{hpoName}/force
post /resplans/resplan
delete /resplans/resplan/{resplan_name}
get /resplans/resplan/{resplan_name}
get /resplans/resplantree
put /resplans/resplan
get /scheduler/applications/{appid}/driver/logs/{type}/download
get /scheduler/applications/{appid}/executor/{executorid}/logs/{type}/download
get /scheduler/applicationStatistic
get /scheduler/applications
get /scheduler/applications/{appid}/driver/logs/{type}
get /scheduler/applications/{appid}/executor/{executorid}/logs/{type}
application/json
{
"example_key" : "example_key"
}
application/json
application/json
application/json
application/json
application/json
application/json
{
"uid" : "uid",
"href" : "href"
}
application/json
application/json
{
"args" : "args",
"creator" : "creator",
"submissionId" : "submissionId",
"appName" : "appName",
"schedulerUrl" : "schedulerUrl",
"appId" : "appId",
"id" : "id",
"workDir" : "workDir",
"state" : "state",
"events" : "events"
}
application/json
application/json
""
application/json
application/json
""
application/json
application/json
application/json
application/json
[ {
"distributeStrategy" : "MultiWorkerMirroredStrategy",
"frameworkVersion" : "frameworkVersion",
"name" : "name",
"description" : "description",
"numPs" : 0,
"desc" : [ "desc", "desc" ]
}, {
"distributeStrategy" : "MultiWorkerMirroredStrategy",
"frameworkVersion" : "frameworkVersion",
"name" : "name",
"description" : "description",
"numPs" : 0,
"desc" : [ "desc", "desc" ]
} ]
application/json
application/json
[ {
"args" : "args",
"creator" : "creator",
"submissionId" : "submissionId",
"appName" : "appName",
"schedulerUrl" : "schedulerUrl",
"appId" : "appId",
"id" : "id",
"workDir" : "workDir",
"state" : "state",
"events" : "events"
}, {
"args" : "args",
"creator" : "creator",
"submissionId" : "submissionId",
"appName" : "appName",
"schedulerUrl" : "schedulerUrl",
"appId" : "appId",
"id" : "id",
"workDir" : "workDir",
"state" : "state",
"events" : "events"
} ]
application/json
Starts a task through Execute.
It can have a data
parameter to specify the task arguments and data sources.
{
'args': 'Arguments to the task. It has the same format as the as the `args` in the request parameters, except the `--cs-datastore-meta` options can be override by below `dataSource` configure.',
'projectId': 'project Id',
# hardwareSpec define hardware specification for worker and driver, if hardwareSpec is specified, hardware specification in args will be ignored.
# if id or name of hardwareSpec is defined, it will use the existing hardware specification, otherwise, it will use the hardware specification entity defined in nodes
'hardwareSpec': {
'id': 'id of hardware specification id',
'name': 'name of hardware specification',
'nodes': {
'cpu': {
'units': 'number of worker cpu units',
},
'mem': {
'size': 'worker memory size',
},
'gpu': {
'num_gpu': 'number of worker gpu',
'gpu_profile': 'gpu type, one of: generic, full, slice',
'mig_profile': 'MIG profile for slice type gpu, e.g. 1g.5gb, 2g.10gb',
},
'num_nodes': 'number of workers',
'num_drivers': 'number of drivers',
'drivers': {
'cpu': {
'units': 'number of driver cpu units',
},
'mem': {
'size': 'driver memory size',
}
}
}
# override values of the hardware specification
'asset_params':[
{
'path' : 'path of params begin from /nodes, e.g. /nodes/num_nodes',
'value': 'new value of the params',
},
]
}
'dataSource': [
{
'type': 'Type of the data source, it can be `fs`, `connection` or `data_asset`',
'asset': {
'asset_id': 'CP4D asset id for `connection` or `data_asset` asset.',
'project_id': 'CP4D project id where the asset locates',
'catalog_id': 'CP4D catalog id where the asset locates',
'space_id': 'CP4D space id where the asset locates',
},
'location': {
# for `connection` or `data_asset` type data source, configure data connection interaction properties for the asset.
# for `fs` type data source, below configurations are allowed.
'paths': 'string, optional, relative data path in wmla data pvc.',
'volume': 'string, optional, CP4D storage volume name'
},
'parameters': {
'read_to_file': 'bool, optional, to indicate if read data source into memory or download it as a file, default is False',
'save_root_path': 'string, optional, only valid if read_to_file=True, to indicate where the file will be saved to. Check more about it after this.',
'asset_name': "string, optional, when read_to_file=True this asset_name will be used as the file name to save, when read_to_file=False, it will be used as the key in the dict output of WMLADataManager.create_from_data_source().read_pandas() to distinguish the result",
'batch_size': 'int, optional, flight service parameter to decide batch size in each chunck to read, default 10000 when read_to_file=False, 1000 when read_to_file=True.',
'num_partitions': 'int, optional, flight service parameter to decide how to participate the data source when reading it. default 4.',
}
}]
}
application/json
application/json
Generate and download a fake task_attr.pb for local algorithm debugging.
Pass simulated hpo task submit request in the request body, which should be json format as below:
{
'hpoName': 'optional, string, name/id for the hpo task, will generate one if none specified here.',
'modelSpec':
{
'args': 'required, string, same as BYOF training'
},
'algoDef':
{
'algorithm': 'required, string, it can be build in algorithms like Random, Tpe, Hyperband and ExperimentGridSearch, or user installed algorithms',
'maxRunTime': 'optional, int, max running time of the hpo task in minutes, default -1(unlimited)',
'maxJobNum': 'optional, int, max number of training job to submitted for hpo task, default -1(unlimited)',
'maxParalleJob': 'optinal, int, max number of training job to run in parallel, default 1',
'objectiveMetric': 'required, string, name of metric will be optimized, same one in the val_dict_list.json',
'objective': 'required, string, optimize policy, one of minimize, maximize',
'algoParams': 'optional, list like [{'name':'', value:''}], additional algorithm parameters and it could be different for each algorithm which will be covered in later part'
},
'hyperParams':
[
{
'name': 'required, string, hyperparameter name, the same name will be used in the config.json so user model can load it',
'type': 'required, string, one of Range, Discrete',
'dataType': 'required, string, one of int, double, str',
'minDbVal': 'double, required if type=Range and datatype=double',
'maxDbVal': 'double, required if type=Range and datatype=double',
'minIntVal': 'int, required if type=Range and datatype=int',
'maxIntVal': 'int, required if type=Range and datatype=int',
'discreteDbVal': 'double, list like [0.1, 0.2], required if type=Discrete and dataType=double',
'discreteIntVal': 'int, list like [1, 2], required if type=Discrete and datatype=int',
'discreateStrVal': 'string, list like ['1', '2'], required if type=Discrete and datatype=str',
'power': 'a number value in string format, the base value for power calculation. ONLY valid when type is Range',
'step': 'a number value in string format, step size to split the Range space. ONLY valid when type is Range'
}
]
}
New hpo task request should use hyperParams
in the request body.
For Random, algoParams
can be provided as this:
'algoParams':
[
{
'name': 'RandomSeed',
'value': 'Optional, string, the random seed used to propose hyperparameter combinations.'
}
]
For Hyperband, algoParams
can be provided as this:
'algoParams':
[
{
'name': 'RandomSeed',
'value': 'Optional, string, the random seed used by Hyperband to propose hyperparameter combinations in the first rung of brackets.'
},
{
'name': 'eta',
'value': 'Optional, string, the reduction factor to control the proportion of configurations discarded in each Hyperband brackets. Default 3.'
},
{
'name': 'ResourceName',
'value': 'Required, string, the parameter name that will be taken as resource in Hyperband, normally training epochs or iterations. User can get this parameter from config.json just like other hyper-parameters.'
},
{
'name': 'ResourceValue',
'value': 'Required, int value in string format, it is the corresponding upper limited value for the ResourceName.'
}
]
For Tpe, algoParams
can be provided as this:
'algoParams':
[
{
'name': 'RandomSeed',
'value': 'Optional, string, the random seed used for the initial warm up hyperparameter combinations and the random generator of Gaussian Mixture Model.'
},
{
'name': 'WarmUp',
'value': 'Optional, string, the number of initial warm up hyperparameter combinations. It should be bigger than 2. If maxJobNum is smaller than this value, maxJobNum will be taken as the value. Default 20.'
},
{
'name': 'EICandidate',
'value': 'Optional, string, the number of hyperparameter combinations proposed each round as the candidates for Expected Improvement to propose the final one hyperparameter combination. It should be bigger than 1. Default 24.'
},
{
'name': 'GoodRatio',
'value': 'Optional, string, the fraction to use as good hyperparameter combinations from previous completed experiment training to build the good Gaussian Mixture Model. It should be bigger than 0. Default 0.25.'
},
{
'name': 'GoodMax',
'value': 'Optional, string, the max number of good hyperparameter combinations from previous completed experiment training to build the good Gaussian Mixture Model. It should be bigger than 1. Default 25.'
}
]
application/json
""
application/json
application/json
application/json
application/json
application/json
application/json
application/json
application/json
[ {
"running" : 0,
"duration" : "duration",
"creator" : "creator",
"createtime" : "createtime",
"hpoName" : "hpoName",
"experiments" : [ {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
}, {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
} ],
"progress" : "progress",
"best" : {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
},
"state" : "state",
"failed" : 1,
"complete" : 6
}, {
"running" : 0,
"duration" : "duration",
"creator" : "creator",
"createtime" : "createtime",
"hpoName" : "hpoName",
"experiments" : [ {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
}, {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
} ],
"progress" : "progress",
"best" : {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
},
"state" : "state",
"failed" : 1,
"complete" : 6
} ]
application/json
application/json
[ {
"path" : "path",
"creator" : "creator",
"createtime" : "createtime",
"logLevel" : "logLevel",
"condaEnv" : "condaEnv",
"name" : "name",
"condaHome" : "condaHome",
"type" : "type",
"remoteExec" : true
}, {
"path" : "path",
"creator" : "creator",
"createtime" : "createtime",
"logLevel" : "logLevel",
"condaEnv" : "condaEnv",
"name" : "name",
"condaHome" : "condaHome",
"type" : "type",
"remoteExec" : true
} ]
application/json
application/json
{
"path" : "path",
"creator" : "creator",
"createtime" : "createtime",
"logLevel" : "logLevel",
"condaEnv" : "condaEnv",
"name" : "name",
"condaHome" : "condaHome",
"type" : "type",
"remoteExec" : true
}
application/json
application/json
{
"running" : 0,
"duration" : "duration",
"creator" : "creator",
"createtime" : "createtime",
"hpoName" : "hpoName",
"experiments" : [ {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
}, {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
} ],
"progress" : "progress",
"best" : {
"maxiteration" : 3,
"appId" : "appId",
"metricVal" : 5.637376656633329,
"startTime" : "startTime",
"id" : 5,
"state" : "state",
"metrics" : [ {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
}, {
"min" : 2.3021358869347655,
"max" : 7.061401241503109,
"name" : "name",
"latest" : 9.301444243932576
} ],
"endTime" : "endTime",
"hyperParams" : [ {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
}, {
"maxIntVal" : 1,
"fixedVal" : "fixedVal",
"minIntVal" : 7,
"dataType" : "int",
"userDefined" : true,
"discreateStrVal" : [ "discreateStrVal", "discreateStrVal" ],
"type" : "range",
"discreteIntVal" : [ 1, 1 ],
"maxDbVal" : 4.145608029883936,
"name" : "name",
"step" : "step",
"power" : "power",
"discreteDbVal" : [ 1.0246457001441578, 1.0246457001441578 ],
"minDbVal" : 2.027123023002322
} ]
},
"state" : "state",
"failed" : 1,
"complete" : 6
}
application/json
Install a new hpo plugin algorithm by providing algorithm scipts as well as other required parameters.
To install a new hpo plugin algorithm, we need string format of input parameters, which is python dict or json format as below:
{
'name': 'required, string, name/id for the plugin algorithm, should be unique.',
'path': 'optional, string, the path for plugin algorithm scripts on server, required for local installation mode.',
'condaHome': 'optional, string, the CONDA_HOME to run the algorithm scripts, it will use the DLI_CONDA_HOME if not specified.',
'condaEnv': 'optional, string, the conda environment to run the algorithm scripts, it will use the DLI default conda environment if not specified.',
'remoteExec': 'optional, boolean, whether to deploy algorithm execution remotely, the default value is false.',
'logLevel': 'optional, string, the log level of the plugin algorithm, the default value is INFO.'
}
multipart/form-data
application/x-www-form-urlencoded
{
"uid" : "uid",
"href" : "href"
}
application/json
application/json
application/json
Start a new hpo task by providing required parameters.
To start a hpo task, we need string format of input parameters, which is python dict or json format as below:
{
'hpoName': 'optional, string, name/id for the hpo task, will generate one if none specified here.',
'modelSpec':
{
'args': 'required, string, same as BYOF training',
# hardwareSpec define hardware specification for worker and driver, if hardwareSpec is specified, hardware specification in args will be ignored.
# if id or name of hardwareSpec is defined, it will use the existing hardware specification, otherwise, it will use the hardware specification entity defined in nodes
'hardwareSpec': {
'id': 'id of hardware specification id',
'name': 'name of hardware specification',
'nodes': {
'cpu': {
'units': 'number of worker cpu units',
},
'mem': {
'size': 'worker memory size',
},
'gpu': {
'num_gpu': 'number of worker gpu',
'gpu_profile': 'gpu type, one of: generic, full, slice',
'mig_profile': 'MIG profile for slice type gpu, e.g. 1g.5gb, 2g.10gb',
},
'num_nodes': 'number of workers',
'num_drivers': 'number of drivers',
'drivers': {
'cpu': {
'units': 'number of driver cpu units',
},
'mem': {
'size': 'driver memory size',
}
}
}
# override values of the hardware specification
'asset_params':[
{
'path' : 'path of params begin from /nodes, e.g. /nodes/num_nodes',
'value': 'new value of the params',
},
]
}
'dataSource': [
{
'type': 'Type of the data source, it can be `fs`, `connection` or `data_asset`',
'asset': {
'asset_id': 'CP4D asset id for `connection` or `data_asset` asset.',
'project_id': 'CP4D project id where the asset locates',
'catalog_id': 'CP4D catalog id where the asset locates',
'space_id': 'CP4D space id where the asset locates',
},
'location': {
# for `connection` or `data_asset` type data source, configure data connection interaction properties for the asset.
# for `fs` type data source, below configurations are allowed.
'paths': 'string, optional, relative data path in wmla data pvc.',
'volume': 'string, optional, CP4D storage volume name'
},
'parameters': {
'read_to_file': 'bool, optional, to indicate if read data source into memory or download it as a file, default is False',
'save_root_path': 'string, optional, only valid if read_to_file=True, to indicate where the file will be saved to. Check more about it after this.',
'asset_name': "string, optional, when read_to_file=True this asset_name will be used as the file name to save, when read_to_file=False, it will be used as the key in the dict output of WMLADataManager.create_from_data_source().read_pandas() to distinguish the result",
'batch_size': 'int, optional, flight service parameter to decide batch size in each chunck to read, default 10000 when read_to_file=False, 1000 when read_to_file=True.',
'num_partitions': 'int, optional, flight service parameter to decide how to participate the data source when reading it. default 4.',
}
}]
},
'algoDef':
{
'algorithm': 'required, string, it can be build in algorithms like Random, Tpe, Hyperband and ExperimentGridSearch, or user installed algorithms',
'maxRunTime': 'optional, int, max running time of the hpo task in minutes, default -1(unlimited)',
'maxJobNum': 'optional, int, max number of training job to submitted for hpo task, default -1(unlimited)',
'maxParalleJob': 'optinal, int, max number of training job to run in parallel, default 1',
'objectiveMetric': 'required, string, name of metric will be optimized, same one in the val_dict_list.json',
'objective': 'required, string, optimize policy, one of minimize, maximize',
'additionalMetrics': 'optional, dict like {'metric_name': 'metric strategy'}, where metric stragety can be one of minimize, maximize, latest. latest will be used as the strategy if other names than those three is specified.',
'algoParams': 'optional, list like [{'name':'', value:''}], additional algorithm parameters and it could be different for each algorithm which will be covered in later part'
},
'hyperParams':
[
{
'name': 'required, string, hyperparameter name, the same name will be used in the config.json so user model can load it',
'type': 'required, string, one of Range, Discrete',
'dataType': 'required, string, one of int, double, str',
'minDbVal': 'double, required if type=Range and datatype=double',
'maxDbVal': 'double, required if type=Range and datatype=double',
'minIntVal': 'int, required if type=Range and datatype=int',
'maxIntVal': 'int, required if type=Range and datatype=int',
'discreteDbVal': 'double, list like [0.1, 0.2], required if type=Discrete and dataType=double',
'discreteIntVal': 'int, list like [1, 2], required if type=Discrete and datatype=int',
'discreateStrVal': 'string, list like ['1', '2'], required if type=Discrete and datatype=str',
'power': 'a number value in string format, the base value for power calculation. ONLY valid when type is Range',
'step': 'a number value in string format, step size to split the Range space. ONLY valid when type is Range'
}
],
'experiments':
[
{
'id': 'required, int, hyperparameter experiment id',
'hyperParams':
[
{
'name': 'required, string, hyperparameter name, the same name will be used in the config.json so user model can load it',
'dataType': 'required, string, one of int, double, str',
'fixedVal': 'required, the same type with datatype specified, if dataTye=double, need fixedVal type doulbe'
}
]
}
]
}
Each new hpo task request could only choose one from hyperParams
and experiments
, for search algorithm ExperimentGridSearch, only experiments
is supported, for other algorithms, only hyperParams
is supported:
For Random, algoParams
can be provided as this:
'algoParams':
[
{
'name': 'RandomSeed',
'value': 'Optional, string, the random seed used to propose hyperparameter combinations.'
}
]
For Hyperband, algoParams
can be provided as this:
'algoParams':
[
{
'name': 'RandomSeed',
'value': 'Optional, string, the random seed used by Hyperband to propose hyperparameter combinations in the first rung of brackets.'
},
{
'name': 'eta',
'value': 'Optional, string, the reduction factor to control the proportion of configurations discarded in each Hyperband brackets. Default 3.'
},
{
'name': 'ResourceName',
'value': 'Required, string, the parameter name that will be taken as resource in Hyperband, normally training epochs or iterations. User can get this parameter from config.json just like other hyper-parameters.'
},
{
'name': 'ResourceValue',
'value': 'Required, int value in string format, it is the corresponding upper limited value for the ResourceName.'
}
]
For Tpe, algoParams
can be provided as this:
'algoParams':
[
{
'name': 'RandomSeed',
'value': 'Optional, string, the random seed used for the initial warm up hyperparameter combinations and the random generator of Gaussian Mixture Model.'
},
{
'name': 'WarmUp',
'value': 'Optional, string, the number of initial warm up hyperparameter combinations. It should be bigger than 2. If maxJobNum is smaller than this value, maxJobNum will be taken as the value. Default 20.'
},
{
'name': 'EICandidate',
'value': 'Optional, string, the number of hyperparameter combinations proposed each round as the candidates for Expected Improvement to propose the final one hyperparameter combination. It should be bigger than 1. Default 24.'
},
{
'name': 'GoodRatio',
'value': 'Optional, string, the fraction to use as good hyperparameter combinations from previous completed experiment training to build the good Gaussian Mixture Model. It should be bigger than 0. Default 0.25.'
},
{
'name': 'GoodMax',
'value': 'Optional, string, the max number of good hyperparameter combinations from previous completed experiment training to build the good Gaussian Mixture Model. It should be bigger than 1. Default 25.'
}
]
multipart/form-data
application/x-www-form-urlencoded
{
"uid" : "uid",
"href" : "href"
}
application/json
application/json
application/json
application/json
application/json
application/json
application/json
application/json
application/json
application/json
{
"path" : "path",
"usedgpu" : "usedgpu",
"requestgpu" : "requestgpu",
"usedcpu" : "usedcpu",
"requestcpu" : "requestcpu"
}
application/json
application/json
{
"path" : "path",
"isParent" : "isParent",
"childTreeDto" : [ null, null ],
"V1Resplan" : {
"path" : "path",
"usedgpu" : "usedgpu",
"requestgpu" : "requestgpu",
"usedcpu" : "usedcpu",
"requestcpu" : "requestcpu"
},
"name" : "name",
"pid" : "pid",
"id" : "id"
}
application/json
application/json
application/json
application/json
application/octet-stream
application/json
application/octet-stream
application/json
[ {
"gpuUsed" : 5.962133916683182,
"gpuReq" : 1.4658129805029452,
"cpuUsed" : 6.027456183070403,
"cpuReq" : 0.8008281904610115,
"jobPending" : 5,
"jobRunning" : 2,
"username" : "username"
}, {
"gpuUsed" : 5.962133916683182,
"gpuReq" : 1.4658129805029452,
"cpuUsed" : 6.027456183070403,
"cpuReq" : 0.8008281904610115,
"jobPending" : 5,
"jobRunning" : 2,
"username" : "username"
} ]
application/json
application/json
[ {
"memused" : 5.637376656633329,
"hosts" : 1,
"schedulerUrl" : "schedulerUrl",
"endtime" : 3,
"starttime" : 9,
"type" : "BATCH",
"applicationname" : "applicationname",
"dltype" : "Caffe",
"tunningname" : "tunningname",
"slots" : 0,
"submittedtime" : 7,
"appReason" : "appReason",
"appFailureDetail" : "appFailureDetail",
"demandslots" : 6,
"coresused" : 5.962133916683182,
"apprunduration" : 2.027123023002322,
"model" : "model",
"state" : "state",
"applicationid" : "applicationid",
"dataset" : "dataset",
"username" : "username",
"timestamp" : 2
}, {
"memused" : 5.637376656633329,
"hosts" : 1,
"schedulerUrl" : "schedulerUrl",
"endtime" : 3,
"starttime" : 9,
"type" : "BATCH",
"applicationname" : "applicationname",
"dltype" : "Caffe",
"tunningname" : "tunningname",
"slots" : 0,
"submittedtime" : 7,
"appReason" : "appReason",
"appFailureDetail" : "appFailureDetail",
"demandslots" : 6,
"coresused" : 5.962133916683182,
"apprunduration" : 2.027123023002322,
"model" : "model",
"state" : "state",
"applicationid" : "applicationid",
"dataset" : "dataset",
"username" : "username",
"timestamp" : 2
} ]
application/json
application/json
text/plain
application/json
text/plain
ApplicationDetail
- ApplicationStatistic
- Attr
- Batch
- Children
- CreationResponse
- DLFramework
- EDIModelDescription
- Envs
- EventData
- EventDataCreateParam
- HpoAlgorithmDesc
- HpoExperiment
- HpoHyperParameter
- HpoMetric
- HpoTaskDetail
- HpoTaskInput
- HpoTaskState
- Labels_Map
- Resplan
- Spec
- StringMap
- TreeDto
- V1Resplan
- algoDef
- algoParams
- fixedHyperParam
- resDef
- searchExperiment
- searchGrid
- ApplicationDetail
- UpApplicationStatistic
- UpBatch
- UpDLFramework
- UpEDIModelDescription
- UpEventData
- UpEventDataCreateParam
- UpHpoAlgorithmDesc
- UpHpoExperiment
- UpHpoHyperParameter
- UpHpoTaskDetail
- UpHpoTaskInput
- UpHpoTaskState
- UpResplan
- UpSpec
- UpStringMap
- UpTreeDto
- UpalgoDef
- UpresDef
- UpsearchExperiment
- UpsearchGrid
- Up