CLI (v2) managed online endpoint YAML schema

Important

This feature is currently in public preview. This preview version is provided without a service-level agreement, and it's not recommended for production workloads. Certain features might not be supported or might have constrained capabilities. For more information, see Supplemental Terms of Use for Microsoft Azure Previews.

Note

A fully specified sample YAML for managed online endpoints is available for reference

Schema

The source JSON schema can be found at https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json. The schema is provided below in JSON and YAML formats for convenience.

{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "definitions": {
    "ManagedOnlineDeploymentSchema": {
      "type": "object",
      "required": [
        "name"
      ],
      "properties": {
        "app_insights_enabled": {
          "title": "app_insights_enabled",
          "type": "boolean"
        },
        "code_configuration": {
          "type": "object",
          "$ref": "#/definitions/CodeConfigurationSchema"
        },
        "environment": {
          "anyOf": [
            {
              "type": "string",
              "pattern": "^azureml:.*",
              "arm_type": "environments",
              "title": "environment"
            },
            {
              "type": "object",
              "$ref": "#/definitions/EnvironmentSchema"
            },
            {
              "type": "string",
              "pattern": "^file:.*"
            }
          ]
        },
        "environment_variables": {
          "title": "environment_variables",
          "type": "object",
          "additionalProperties": {}
        },
        "id": {
          "title": "id",
          "type": "string"
        },
        "instance_type": {
          "title": "instance_type",
          "type": "string"
        },
        "liveness_probe": {
          "type": "object",
          "$ref": "#/definitions/LivenessProbeSchema"
        },
        "model": {
          "anyOf": [
            {
              "type": "string",
              "pattern": "^azureml:.*",
              "arm_type": "models",
              "title": "model"
            },
            {
              "type": "object",
              "$ref": "#/definitions/ModelSchema"
            }
          ]
        },
        "name": {
          "title": "name",
          "type": "string"
        },
        "properties": {
          "title": "properties",
          "type": "object",
          "additionalProperties": {}
        },
        "provisioning_state": {
          "title": "provisioning_state",
          "type": "string"
        },
        "readiness_probe": {
          "type": "object",
          "$ref": "#/definitions/LivenessProbeSchema"
        },
        "request_settings": {
          "type": "object",
          "$ref": "#/definitions/RequestSettingsSchema"
        },
        "scale_settings": {
          "anyOf": [
            {
              "type": "object",
              "$ref": "#/definitions/AutoScaleSettingsSchema"
            },
            {
              "type": "object",
              "$ref": "#/definitions/ManualScaleSettingsSchema"
            }
          ]
        },
        "$schema": {
          "title": "$schema",
          "type": "string",
          "readonly": true
        },
        "tags": {
          "title": "tags",
          "type": "object",
          "additionalProperties": {}
        },
        "type": {
          "title": "type",
          "type": "string"
        }
      },
      "additionalProperties": false
    },
    "CodeConfigurationSchema": {
      "type": "object",
      "properties": {
        "code": {
          "anyOf": [
            {
              "type": "string",
              "pattern": "^azureml:.*",
              "arm_type": "codes",
              "title": "code"
            },
            {
              "type": "object",
              "$ref": "#/definitions/CodeAssetSchema"
            },
            {
              "type": "string",
              "pattern": "^file:.*"
            }
          ]
        },
        "$schema": {
          "title": "$schema",
          "type": "string",
          "readonly": true
        },
        "scoring_script": {
          "title": "scoring_script",
          "type": "string"
        }
      },
      "additionalProperties": false
    },
    "CodeAssetSchema": {
      "type": "object",
      "properties": {
        "arm_id": {
          "type": "string",
          "pattern": "^azureml:.*",
          "arm_type": "data",
          "title": "arm_id",
          "readonly": true
        },
        "creation_context": {
          "type": "object",
          "$ref": "#/definitions/CreationContextSchema"
        },
        "datastore": {
          "type": "string",
          "pattern": "^azureml:.*",
          "arm_type": "datastores",
          "title": "datastore"
        },
        "description": {
          "title": "description",
          "type": "string"
        },
        "id": {
          "title": "id",
          "type": "string"
        },
        "local_path": {
          "title": "local_path",
          "type": "string",
          "description": "the path from which the data gets uploaded to the cloud"
        },
        "name": {
          "title": "name",
          "type": "string"
        },
        "path": {
          "title": "path",
          "type": "string",
          "description": "URI pointing to a file or directory."
        },
        "$schema": {
          "title": "$schema",
          "type": "string",
          "readonly": true
        },
        "tags": {
          "title": "tags",
          "type": "object",
          "additionalProperties": {}
        },
        "version": {
          "title": "version",
          "type": "number",
          "format": "integer"
        }
      },
      "additionalProperties": false
    },
    "CreationContextSchema": {
      "type": "object",
      "properties": {
        "created_at": {
          "title": "created_at",
          "type": "string",
          "format": "date-time"
        },
        "created_by": {
          "title": "created_by",
          "type": "string"
        },
        "created_by_type": {
          "title": "created_by_type",
          "type": "string"
        },
        "last_modified_at": {
          "title": "last_modified_at",
          "type": "string",
          "format": "date-time"
        },
        "last_modified_by": {
          "title": "last_modified_by",
          "type": "string"
        },
        "last_modified_by_type": {
          "title": "last_modified_by_type",
          "type": "string"
        }
      },
      "additionalProperties": false
    },
    "EnvironmentSchema": {
      "type": "object",
      "properties": {
        "conda_file": {
          "anyOf": [
            {
              "type": "string",
              "title": "conda_file"
            },
            {
              "title": "conda_file",
              "type": "string"
            }
          ]
        },
        "creation_context": {
          "type": "object",
          "$ref": "#/definitions/CreationContextSchema"
        },
        "description": {
          "title": "description",
          "type": "string"
        },
        "docker": {
          "type": "object",
          "$ref": "#/definitions/DockerSchema"
        },
        "id": {
          "type": "string",
          "pattern": "^azureml:.*",
          "arm_type": "environments",
          "title": "id",
          "readonly": true
        },
        "inference_config": {
          "type": "object",
          "$ref": "#/definitions/InferenceConfigSchema"
        },
        "name": {
          "title": "name",
          "type": "string"
        },
        "os_type": {
          "type": "string",
          "enum": [
            "linux",
            "windows"
          ],
          "title": "os_type"
        },
        "path": {
          "title": "path",
          "type": "string"
        },
        "$schema": {
          "title": "$schema",
          "type": "string",
          "readonly": true
        },
        "tags": {
          "title": "tags",
          "type": "object",
          "additionalProperties": {
            "title": "tags",
            "type": "string"
          }
        },
        "version": {
          "title": "version",
          "type": "number",
          "format": "integer"
        }
      },
      "additionalProperties": false
    },
    "DockerSchema": {
      "type": "object",
      "properties": {
        "build": {
          "type": "object",
          "$ref": "#/definitions/DockerBuildSchema"
        },
        "image": {
          "title": "image",
          "type": "string"
        }
      },
      "additionalProperties": false
    },
    "DockerBuildSchema": {
      "type": "object",
      "properties": {
        "dockerfile": {
          "anyOf": [
            {
              "type": "string",
              "title": "dockerfile"
            },
            {
              "title": "dockerfile",
              "type": "string"
            }
          ]
        }
      },
      "additionalProperties": false
    },
    "InferenceConfigSchema": {
      "type": "object",
      "required": [
        "liveness_route",
        "readiness_route",
        "scoring_route"
      ],
      "properties": {
        "liveness_route": {
          "type": "object",
          "$ref": "#/definitions/RouteSchema"
        },
        "readiness_route": {
          "type": "object",
          "$ref": "#/definitions/RouteSchema"
        },
        "scoring_route": {
          "type": "object",
          "$ref": "#/definitions/RouteSchema"
        }
      },
      "additionalProperties": false
    },
    "RouteSchema": {
      "type": "object",
      "required": [
        "path",
        "port"
      ],
      "properties": {
        "path": {
          "title": "path",
          "type": "string"
        },
        "port": {
          "title": "port",
          "type": "number",
          "format": "integer"
        }
      },
      "additionalProperties": false
    },
    "LivenessProbeSchema": {
      "type": "object",
      "properties": {
        "failure_threshold": {
          "title": "failure_threshold",
          "type": "number",
          "format": "integer"
        },
        "initial_delay": {
          "title": "initial_delay",
          "type": "number",
          "format": "integer"
        },
        "period": {
          "title": "period",
          "type": "number",
          "format": "integer"
        },
        "success_threshold": {
          "title": "success_threshold",
          "type": "number",
          "format": "integer"
        },
        "timeout": {
          "title": "timeout",
          "type": "number",
          "format": "integer"
        }
      },
      "additionalProperties": false
    },
    "ModelSchema": {
      "type": "object",
      "required": [
        "name",
        "version"
      ],
      "properties": {
        "creation_context": {
          "type": "object",
          "$ref": "#/definitions/CreationContextSchema"
        },
        "datastore": {
          "type": "string",
          "pattern": "^azureml:.*",
          "arm_type": "datastores",
          "title": "datastore"
        },
        "description": {
          "title": "description",
          "type": "string"
        },
        "flavors": {
          "title": "flavors",
          "type": "object",
          "additionalProperties": {}
        },
        "id": {
          "type": "string",
          "pattern": "^azureml:.*",
          "arm_type": "models",
          "title": "id",
          "readonly": true
        },
        "local_path": {
          "title": "local_path",
          "type": "string"
        },
        "name": {
          "title": "name",
          "type": "string"
        },
        "path": {
          "title": "path",
          "type": "string"
        },
        "properties": {
          "title": "properties",
          "type": "object",
          "additionalProperties": {}
        },
        "$schema": {
          "title": "$schema",
          "type": "string",
          "readonly": true
        },
        "tags": {
          "title": "tags",
          "type": "object",
          "additionalProperties": {}
        },
        "utc_time_created": {
          "title": "utc_time_created",
          "type": "string",
          "format": "date-time"
        },
        "version": {
          "title": "version",
          "type": "number",
          "format": "integer"
        }
      },
      "additionalProperties": false
    },
    "RequestSettingsSchema": {
      "type": "object",
      "properties": {
        "max_concurrent_requests_per_instance": {
          "title": "max_concurrent_requests_per_instance",
          "type": "number",
          "format": "integer"
        },
        "max_queue_wait_ms": {
          "title": "max_queue_wait_ms",
          "type": "number",
          "format": "integer"
        },
        "request_timeout_ms": {
          "title": "request_timeout_ms",
          "type": "number",
          "format": "integer"
        }
      },
      "additionalProperties": false
    },
    "AutoScaleSettingsSchema": {
      "type": "object",
      "required": [
        "scale_type"
      ],
      "properties": {
        "max_instances": {
          "title": "max_instances",
          "type": "number",
          "format": "integer"
        },
        "min_instances": {
          "title": "min_instances",
          "type": "number",
          "format": "integer"
        },
        "polling_interval": {
          "title": "polling_interval",
          "type": "number",
          "format": "integer"
        },
        "scale_type": {
          "type": "string",
          "enum": [
            "Auto"
          ],
          "title": "scale_type"
        },
        "target_utilization_percentage": {
          "title": "target_utilization_percentage",
          "type": "number",
          "format": "integer"
        }
      },
      "additionalProperties": false
    },
    "ManualScaleSettingsSchema": {
      "type": "object",
      "required": [
        "instance_count",
        "scale_type"
      ],
      "properties": {
        "instance_count": {
          "title": "instance_count",
          "type": "number",
          "format": "integer"
        },
        "max_instances": {
          "title": "max_instances",
          "type": "number",
          "format": "integer"
        },
        "min_instances": {
          "title": "min_instances",
          "type": "number",
          "format": "integer"
        },
        "scale_type": {
          "type": "string",
          "enum": [
            "Manual"
          ],
          "title": "scale_type"
        }
      },
      "additionalProperties": false
    },
    "IdentitySchema": {
      "type": "object",
      "properties": {
        "principal_id": {
          "title": "principal_id",
          "type": "string"
        },
        "tenant_id": {
          "title": "tenant_id",
          "type": "string"
        },
        "type": {
          "type": "string",
          "enum": [
            "system_assigned",
            "user_assigned",
            "none"
          ],
          "title": "type"
        },
        "user_assigned_identities": {
          "title": "user_assigned_identities",
          "type": "array",
          "items": {
            "title": "user_assigned_identities",
            "type": "object",
            "additionalProperties": {
              "title": "user_assigned_identities",
              "type": "string"
            }
          }
        }
      },
      "additionalProperties": false
    },
    "ManagedOnlineEndpointSchema": {
      "type": "object",
      "required": [
        "name"
      ],
      "properties": {
        "auth_mode": {
          "type": "string",
          "enum": [
            "aml_token",
            "key",
            "aad_token"
          ],
          "title": "auth_mode"
        },
        "deployments": {
          "title": "deployments",
          "type": "array",
          "items": {
            "type": "object",
            "$ref": "#/definitions/ManagedOnlineDeploymentSchema"
          }
        },
        "description": {
          "title": "description",
          "type": "string"
        },
        "id": {
          "title": "id",
          "type": "string"
        },
        "identity": {
          "type": "object",
          "$ref": "#/definitions/IdentitySchema"
        },
        "location": {
          "title": "location",
          "type": "string"
        },
        "name": {
          "title": "name",
          "type": "string"
        },
        "properties": {
          "title": "properties",
          "type": "object",
          "additionalProperties": {}
        },
        "provisioning_state": {
          "title": "provisioning_state",
          "type": "string"
        },
        "$schema": {
          "title": "$schema",
          "type": "string",
          "readonly": true
        },
        "scoring_uri": {
          "title": "scoring_uri",
          "type": "string",
          "description": "the endpoint uri that can be used for scoring"
        },
        "swagger_uri": {
          "title": "swagger_uri",
          "type": "string"
        },
        "tags": {
          "title": "tags",
          "type": "object",
          "additionalProperties": {}
        },
        "traffic": {
          "title": "traffic",
          "type": "object",
          "additionalProperties": {
            "title": "traffic",
            "type": "number",
            "format": "integer"
          }
        },
        "type": {
          "title": "type",
          "type": "string"
        }
      },
      "additionalProperties": false
    }
  },
  "$ref": "#/definitions/ManagedOnlineEndpointSchema"
}

Remarks

Key Description
$schema [Optional] The YAML schema. You can view the schema in the above example in a browser to see all available options in the YAML file.
name Name of the endpoint. Needs to be unique at the Azure region level.
traffic Percentage of traffic from endpoint to divert to each deployment. Traffic values need to sum to 100.
auth_mode use key for key based authentication and aml_token for Azure Machine Learning token-based authentication. key doesn't expire but aml_token does. Get the most recent token with the az ml endpoint list-keys command).
identity Used to configure system-assigned and user-assigned managed identities.
app_insights_enabled True to enable integration with Azure AppInsights associated with your Azure Machine Learning workspace. False by default.
tags Dictionary of Azure Tags to be associated with the Endpoint.
description Description of the endpoint.
target If this key isn't defined, the endpoint will be deployed as a managed online endpoint. To use AKS, set the value of this key to the name of the registered compute target, such as target:azureml:my-aks.
deployments Contains a list of deployments to be created in the endpoint. In this case, we have only one deployment, named blue.

Attributes of the deployments key

Key Description
name The name of the deployment.
model The name of the registered model version in the form model: azureml:my-model:1. You can also specify model properties inline: name, version, and local_path. The model files will be uploaded and registered automatically. A downside of inline specification is that you must increment the version manually if you want to update the model files.
code_configuration.code.local_path The directory that contains all the Python source code for scoring the model. Nested directories/packages are supported.
code_configuration.scoring_script The Python file in the above scoring directory. This Python code must have an init() function and a run() function. The function init() will be called after the model is created or updated (you can use it to cache the model in memory, and so forth). The run() function is called at every invocation of the endpoint to do the actual scoring/prediction.
environment Contains the details of the Azure Machine Learning environment to host the model and code. As a best practice for production, you should separately register the model and environment and specify the registered name and version in the YAML. For example, environment: azureml:my-env:1.
instance_type The VM SKU to host your deployment instances. For more information, see Managed online endpoints supported VM SKUs.
scale_settings.scale_type Currently, this value must be manual. To scale up or scale down after the endpoint and deployment are created, update the instance_count in the YAML and run the command az ml endpoint update -n $ENDPOINT_NAME --file <yaml filepath>.
scale_settings.instance_count The number of instances in the deployment. Base the value on the workload you expect. For high availability, Microsoft recommends you set it to at least 3.
scale_settings.min_instances The minimum number of instances to always be present.
scale_settings.max_instances The maximum number of instances that the deployment can scale to. The quota will be reserved for max_instances.
request_settings.request_timeout_ms The scoring timeout in milliseconds. The default value is 5000 for managed online endpoints.
request_settings.max_concurrent_requests_per_instance The number of maximum concurrent requests per node allowed per deployment. Defaults to 1. Do not change this setting from the default value of 1 unless instructed by Microsoft Technical Support or a member of Azure Machine Learning team.
request_settings.max_queue_wait_ms The maximum amount of time a request will stay in the queue (in milliseconds). Defaults to 500.
liveness_probe Liveness probe monitors the health of the container regularly.
liveness_probe.period How often (in seconds) to perform the liveness probe. Defaults to 10 seconds. Minimum value is 1.
liveness_probe.initial_delay The number of seconds after the container has started before liveness probes are initiated. Defaults to 10.
liveness_probe.timeout The number of seconds after which the liveness probe times out. Defaults to 2 seconds. Minimum value is 1.
liveness_probe.failure_threshold The system will try failure_threshold times before giving up. Defaults to 30. Minimum value is 1.
liveness_probe.success_threshold The minimum consecutive successes for the liveness probe to be considered successful after having failed. Defaults to 1. Minimum value is 1.
readiness_probe Readiness probe validates if the container is ready to serve traffic. The properties and defaults are the same as liveness probe.
tags A dictionary of Azure Tags you want associated with the deployment.
description A description of the deployment.

Next steps