华为云用户手册

  • URI PUT /apis/kubeflow.org/v1/namespaces/{namespace}/mxjobs/{name} 表1 Path参数 参数 是否必选 描述 name Yes name of the MXJob namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 pretty No If 'true’, then the output is pretty printed.
  • 响应消息 响应参数: 响应参数的详细描述请参见表162。 响应示例: { "apiVersion": "kubeflow.org/v1", "kind": "MXJob", "metadata": { "creationTimestamp": "2019-07-29T03:34:33Z", "generation": 2, "name": "mxnet-job", "namespace": "kube-test", "resourceVersion": "75615427", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs/mxnet-job", "uid": "c82e664f-b1b1-11e9-b310-b44326d0c915" }, "spec": { "cleanPodPolicy": "Running", "jobMode": "MXTrain", "mxReplicaSpecs": { "Scheduler": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "100.79.1.215:20202/paas_cci_w00427225/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Server": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "100.79.1.215:20202/paas_cci_w00427225/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Worker": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py --benchmark 1 --network resnet --batch-size 1 --num-epochs 1 --kv-store dist_sync --num-examples 500" ], "command": [ "/bin/bash" ], "image": "100.79.1.215:20202/paas_cci_w00427225/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } } }, "ttlSecondsAfterFinished": 10000 }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-29T03:34:33Z", "lastUpdateTime": "2019-07-29T03:34:33Z", "message": "MXJob mxnet-job is created.", "reason": "MXJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-29T03:34:33Z", "lastUpdateTime": "2019-07-29T03:35:22Z", "message": "MXJob mxnet-job is running.", "reason": "MXJobRunning", "status": "True", "type": "Running" } ], "mxReplicaStatuses": { "Scheduler": { "active": 1 }, "Server": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-29T03:34:33Z" } }
  • 请求消息 请求参数: 请求参数的详细描述请参见表162。 请求示例: 更改MXJob的结束存活时间ttlSecondsAfterFinished: { "apiVersion": "kubeflow.org/v1", "kind": "MXJob", "metadata": { "creationTimestamp": "2019-07-29T03:34:33Z", "generation": 2, "name": "mxnet-job", "namespace": "kube-test", "resourceVersion": "75615427", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs/mxnet-job", "uid": "c82e664f-b1b1-11e9-b310-b44326d0c915" }, "spec": { "cleanPodPolicy": "Running", "jobMode": "MXTrain", "mxReplicaSpecs": { "Scheduler": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "100.79.1.215:20202/paas_cci_w00427225/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Server": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "100.79.1.215:20202/paas_cci_w00427225/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Worker": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py --benchmark 1 --network resnet --batch-size 1 --num-epochs 1 --kv-store dist_sync --num-examples 500" ], "command": [ "/bin/bash" ], "image": "100.79.1.215:20202/paas_cci_w00427225/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } } }, "ttlSecondsAfterFinished": 10000 }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-29T03:34:33Z", "lastUpdateTime": "2019-07-29T03:34:33Z", "message": "MXJob mxnet-job is created.", "reason": "MXJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-29T03:34:33Z", "lastUpdateTime": "2019-07-29T03:35:22Z", "message": "MXJob mxnet-job is running.", "reason": "MXJobRunning", "status": "True", "type": "Running" } ], "mxReplicaStatuses": { "Scheduler": { "active": 1 }, "Server": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-29T03:34:33Z" } }
  • 响应消息 响应参数: 响应参数的详细描述请参见表162。 响应示例: { "apiVersion": "kubeflow.org/v1", "kind": "MXJob", "metadata": { "creationTimestamp": "2019-07-24T08:58:47Z", "generation": 1, "labels": { "app": "test" }, "name": "mxnet-job", "namespace": "kube-test", "resourceVersion": "72482855", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs/mxnet-job", "uid": "3f304b53-adf1-11e9-8041-340a9837e2a7" }, "spec": { "cleanPodPolicy": "Running", "jobMode": "MXTrain", "mxReplicaSpecs": { "Scheduler": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Server": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Worker": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py --benchmark 1 --network resnet --batch-size 1 --num-epochs 1 --kv-store dist_sync --num-examples 500" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } } } }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-24T08:58:47Z", "lastUpdateTime": "2019-07-24T08:58:47Z", "message": "MXJob mxnet-job is created.", "reason": "MXJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-24T08:58:47Z", "lastUpdateTime": "2019-07-24T08:58:52Z", "message": "MXJob mxnet-job is running.", "reason": "MXJobRunning", "status": "True", "type": "Running" } ], "mxReplicaStatuses": { "Scheduler": { "active": 1 }, "Server": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-24T08:58:47Z" } }
  • URI PATCH /apis/kubeflow.org/v1/namespaces/{namespace}/mxjobs/{name} 表1 Path参数 参数 是否必选 描述 name Yes name of the MXJob namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 pretty No If 'true’, then the output is pretty printed.
  • 请求消息 请求参数: “Content-Type”消息头说明请参见PATCH请求方法操作说明。 目前只支持“Merge Patch”。 请求示例: Content-Type: application/merge-patch+json { "metadata": { "labels": { "app": "test" } } }
  • URI DELETE /apis/kubeflow.org/v1/namespaces/{namespace}/mxjobs/{name} 表1 Path参数 参数 是否必选 描述 name Yes name of the MXJob namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 dryRun No When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed gracePeriodSeconds No The duration in seconds before the object should be deleted. Value must be non-negative integer. The value zero indicates delete immediately. If this value is nil, the default grace period for the specified type will be used. Defaults to a per object value if not specified. zero means delete immediately. orphanDependents No Deprecated: please use the PropagationPolicy, this field will be deprecated in 1.7. Should the dependent objects be orphaned. If true/false, the “orphan” finalizer will be added to/removed from the object’s finalizers list. Either this field or PropagationPolicy may be set, but not both. propagationPolicy No Whether and how garbage collection will be performed. Either this field or OrphanDependents may be set, but not both. The default policy is decided by the existing finalizer set in the metadata.finalizers and the resource-specific default policy. Acceptable values are: ‘Orphan’ - orphan the dependents; ‘Background’ - allow the garbage collector to delete the dependents in the background; ‘Foreground’ - a cascading policy that deletes all dependents in the foreground. pretty No If 'true’, then the output is pretty printed.
  • 响应消息 响应参数: 响应参数的详细描述请参见表72。 响应示例: { "kind": "Status", "apiVersion": "v1", "metadata": {}, "status": "Success", "details": { "name": "mxnet-job", "group": "kubeflow.org", "kind": "mxjobs", "uid": "fac6dcd2-adee-11e9-8041-340a9837e2a7" } }
  • URI DELETE /apis/kubeflow.org/v1/namespaces/{namespace}/mxjobs 表1 Path参数 参数 是否必选 描述 namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 fieldSelector No A selector to restrict the list of returned objects by their fields. Defaults to everything. labelSelector No A selector to restrict the list of returned objects by their labels. Defaults to everything. limit No limit is a maximum number of responses to return for a list call. If more items exist, the server will set the continue field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. resourceVersion No When specified with a watch call, shows changes that occur after that particular version of a resource. Defaults to changes from the beginning of history. When specified for list: - if unset, then the result is returned from remote storage based on quorum-read flag; - if it’s 0, then we simply return what we currently have in cache, no guarantee; - if set to non zero, then the result is at least as fresh as given rv. timeoutSeconds No Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. watch No Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion.
  • 响应消息 响应参数: 响应参数的详细描述请参见表72。 响应示例: { "apiVersion": "kubeflow.org/v1", "items": [ { "apiVersion": "kubeflow.org/v1", "kind": "MXJob", "metadata": { "creationTimestamp": "2019-07-24T08:57:01Z", "generation": 1, "name": "mxnet-job", "namespace": "kube-test", "resourceVersion": "72481787", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs/mxnet-job", "uid": "001b5d2a-adf1-11e9-ba3a-b44326d0c915" }, "spec": { "cleanPodPolicy": "Running", "jobMode": "MXTrain", "mxReplicaSpecs": { "Scheduler": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Server": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Worker": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py --benchmark 1 --network resnet --batch-size 1 --num-epochs 1 --kv-store dist_sync --num-examples 500" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } } } }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-24T08:57:01Z", "lastUpdateTime": "2019-07-24T08:57:01Z", "message": "MXJob mxnet-job is created.", "reason": "MXJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-24T08:57:01Z", "lastUpdateTime": "2019-07-24T08:57:06Z", "message": "MXJob mxnet-job is running.", "reason": "MXJobRunning", "status": "True", "type": "Running" } ], "mxReplicaStatuses": { "Scheduler": { "active": 1 }, "Server": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-24T08:57:01Z" } } ], "kind": "MXJobList", "metadata": { "continue": "", "resourceVersion": "72482111", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs" } }
  • URI GET /apis/kubeflow.org/v1/namespaces/{namespace}/mxjobs 表1 Path参数 参数 是否必选 描述 namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 fieldSelector No A selector to restrict the list of returned objects by their fields. Defaults to everything. labelSelector No A selector to restrict the list of returned objects by their labels. Defaults to everything. limit No limit is a maximum number of responses to return for a list call. If more items exist, the server will set the continue field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. resourceVersion No When specified with a watch call, shows changes that occur after that particular version of a resource. Defaults to changes from the beginning of history. When specified for list: - if unset, then the result is returned from remote storage based on quorum-read flag; - if it’s 0, then we simply return what we currently have in cache, no guarantee; - if set to non zero, then the result is at least as fresh as given rv. timeoutSeconds No Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. watch No Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion.
  • 响应消息 响应参数: 响应参数的详细描述请参见表169。 响应示例: { "apiVersion": "kubeflow.org/v1", "items": [ { "apiVersion": "kubeflow.org/v1", "kind": "MXJob", "metadata": { "creationTimestamp": "2019-07-24T08:42:33Z", "generation": 1, "name": "mxnet-job", "namespace": "kube-test", "resourceVersion": "72476154", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs/mxnet-job", "uid": "fac6dcd2-adee-11e9-8041-340a9837e2a7" }, "spec": { "cleanPodPolicy": "Running", "jobMode": "MXTrain", "mxReplicaSpecs": { "Scheduler": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Server": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Worker": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py --benchmark 1 --network resnet --batch-size 1 --num-epochs 1 --kv-store dist_sync --num-examples 500" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } } } }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-24T08:42:33Z", "lastUpdateTime": "2019-07-24T08:42:33Z", "message": "MXJob mxnet-job is created.", "reason": "MXJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-24T08:42:33Z", "lastUpdateTime": "2019-07-24T08:42:52Z", "message": "MXJob mxnet-job is running.", "reason": "MXJobRunning", "status": "True", "type": "Running" } ], "mxReplicaStatuses": { "Scheduler": { "active": 1 }, "Server": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-24T08:42:33Z" } } ], "kind": "MXJobList", "metadata": { "continue": "", "resourceVersion": "72478902", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs" } }
  • URI GET /apis/kubeflow.org/v1/namespaces/{namespace}/mxjobs/{name} 表1 Path参数 参数 是否必选 描述 name Yes name of the MXJob namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 pretty No If 'true’, then the output is pretty printed.
  • 响应消息 响应参数: 响应参数的详细描述请参考表162。 响应示例: { "apiVersion": "kubeflow.org/v1", "kind": "MXJob", "metadata": { "creationTimestamp": "2019-07-24T08:42:33Z", "generation": 1, "name": "mxnet-job", "namespace": "kube-test", "resourceVersion": "72476154", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs/mxnet-job", "uid": "fac6dcd2-adee-11e9-8041-340a9837e2a7" }, "spec": { "cleanPodPolicy": "Running", "jobMode": "MXTrain", "mxReplicaSpecs": { "Scheduler": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Server": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Worker": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py --benchmark 1 --network resnet --batch-size 1 --num-epochs 1 --kv-store dist_sync --num-examples 500" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } } } }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-24T08:42:33Z", "lastUpdateTime": "2019-07-24T08:42:33Z", "message": "MXJob mxnet-job is created.", "reason": "MXJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-24T08:42:33Z", "lastUpdateTime": "2019-07-24T08:42:52Z", "message": "MXJob mxnet-job is running.", "reason": "MXJobRunning", "status": "True", "type": "Running" } ], "mxReplicaStatuses": { "Scheduler": { "active": 1 }, "Server": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-24T08:42:33Z" } }
  • 请求消息 请求参数: 请求参数的详细描述请参考表162。 请求示例: { "apiVersion": "kubeflow.org/v1", "kind": "MXJob", "metadata": { "name": "mxnet-job" }, "spec": { "cleanPodPolicy": "Running", "jobMode": "MXTrain", "mxReplicaSpecs": { "Scheduler": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "imagePullSecrets": [ { "name": "imagepull-secret" } ], "containers": [ { "name": "mxnet", "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "command": [ "/bin/bash" ], "args": [ "-c", "python train_imagenet.py" ], "resources": { "requests": { "cpu": "1000m", "memory": "2Gi" }, "limits": { "cpu": "1000m", "memory": "2Gi" } } } ] } } }, "Server": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "imagePullSecrets": [ { "name": "imagepull-secret" } ], "containers": [ { "name": "mxnet", "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "command": [ "/bin/bash" ], "args": [ "-c", "python train_imagenet.py" ], "resources": { "requests": { "cpu": "1000m", "memory": "2Gi" }, "limits": { "cpu": "1000m", "memory": "2Gi" } } } ] } } }, "Worker": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "imagePullSecrets": [ { "name": "imagepull-secret" } ], "containers": [ { "name": "mxnet", "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "command": [ "/bin/bash" ], "args": [ "-c", "python train_imagenet.py --benchmark 1 --network resnet --batch-size 1 --num-epochs 1 --kv-store dist_sync --num-examples 500" ], "resources": { "requests": { "cpu": "1000m", "memory": "2Gi" }, "limits": { "cpu": "1000m", "memory": "2Gi" } } } ] } } } } } }
  • URI POST /apis/kubeflow.org/v1/namespaces/{namespace}/mxjobs 表1 Path参数 参数 是否必选 描述 namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 pretty No If 'true’, then the output is pretty printed.
  • 响应消息 响应参数: 响应参数的详细描述请参考表162。 响应示例: { "apiVersion": "kubeflow.org/v1", "kind": "MXJob", "metadata": { "creationTimestamp": "2019-07-24T08:42:33Z", "generation": 1, "name": "mxnet-job", "namespace": "kube-test", "resourceVersion": "72476154", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/mxjobs/mxnet-job", "uid": "fac6dcd2-adee-11e9-8041-340a9837e2a7" }, "spec": { "cleanPodPolicy": "Running", "jobMode": "MXTrain", "mxReplicaSpecs": { "Scheduler": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Server": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } }, "Worker": { "replicas": 1, "restartPolicy": "Never", "template": { "spec": { "containers": [ { "args": [ "-c", "python train_imagenet.py --benchmark 1 --network resnet --batch-size 1 --num-epochs 1 --kv-store dist_sync --num-examples 500" ], "command": [ "/bin/bash" ], "image": "*.*.*.215:20202/cci/mxnet:xsw-dis", "name": "mxnet", "resources": { "limits": { "cpu": "1000m", "memory": "2Gi" }, "requests": { "cpu": "1000m", "memory": "2Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ] } } } } }, "status": { } }
  • URI PUT /apis/kubeflow.org/v1/namespaces/{namespace}/tfjobs/{name} 表1 Path参数 参数 是否必选 描述 name Yes name of the TFJob namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 pretty No If 'true’, then the output is pretty printed.
  • 响应消息 响应参数: 响应参数的详细描述请参考表154。 响应示例: { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "creationTimestamp": "2019-07-24T07:17:01Z", "generation": 2, "labels": { "app": "test" }, "name": "tfjob-test", "namespace": "kube-test", "resourceVersion": "72447176", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/tfjobs/tfjob-test", "uid": "083cc6df-ade3-11e9-aaa4-340a9837e413" }, "spec": { "backoffLimit": 6, "tfReplicaSpecs": { "Ps": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--num_gpus=1", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } }, "Worker": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } } }, "ttlSecondsAfterFinished": 1000 }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-24T07:16:13Z", "lastUpdateTime": "2019-07-24T07:16:13Z", "message": "TFJob tfjob-test is created.", "reason": "TFJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-24T07:16:18Z", "lastUpdateTime": "2019-07-24T07:16:18Z", "message": "TFJob tfjob-test is running.", "reason": "TFJobRunning", "status": "True", "type": "Running" } ], "replicaStatuses": { "PS": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-24T07:16:13Z" } }
  • 请求消息 请求参数的详细描述请参见表154。 请求示例: 更改TFJob的结束存活时间ttlSecondsAfterFinished: { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "creationTimestamp": "2019-07-24T07:17:01Z", "generation": 2, "labels": { "app": "test" }, "name": "tfjob-test", "namespace": "kube-test", "resourceVersion": "72447176", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/tfjobs/tfjob-test", "uid": "083cc6df-ade3-11e9-aaa4-340a9837e413" }, "spec": { "backoffLimit": 6, "tfReplicaSpecs": { "Ps": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--num_gpus=1", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } }, "Worker": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } } }, "ttlSecondsAfterFinished": 1000 }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-24T07:16:13Z", "lastUpdateTime": "2019-07-24T07:16:13Z", "message": "TFJob tfjob-test is created.", "reason": "TFJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-24T07:16:18Z", "lastUpdateTime": "2019-07-24T07:16:18Z", "message": "TFJob tfjob-test is running.", "reason": "TFJobRunning", "status": "True", "type": "Running" } ], "replicaStatuses": { "PS": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-24T07:16:13Z" } }
  • 请求消息 请求参数: “Content-Type”消息头说明请参见PATCH请求方法操作说明。 目前只支持“Merge Patch”。 请求示例: Content-Type: application/merge-patch+json { "metadata": { "labels": { "app": "test" } } }
  • 响应消息 响应参数: 响应参数的详细描述请参见表154。 响应示例: { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "creationTimestamp": "2019-07-24T07:17:01.000Z", "generation": 1, "labels": { "app": "test" }, "name": "tfjob-test", "namespace": "kube-test", "resourceVersion": "72444814", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/tfjobs/tfjob-test", "uid": "083cc6df-ade3-11e9-aaa4-340a9837e413" }, "spec": { "backoffLimit": 6, "tfReplicaSpecs": { "Ps": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--num_gpus=1", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } }, "Worker": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } } } }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-24T07:16:13.000Z", "lastUpdateTime": "2019-07-24T07:16:13.000Z", "message": "TFJob tfjob-test is created.", "reason": "TFJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-24T07:16:18.000Z", "lastUpdateTime": "2019-07-24T07:16:18.000Z", "message": "TFJob tfjob-test is running.", "reason": "TFJobRunning", "status": "True", "type": "Running" } ], "replicaStatuses": { "PS": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-24T07:16:13.000Z" } }
  • URI PATCH /apis/kubeflow.org/v1/namespaces/{namespace}/tfjobs/{name} 表1 Path参数 参数 是否必选 描述 name Yes name of the TFJob namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 pretty No If 'true’, then the output is pretty printed.
  • 响应消息 响应参数: 响应参数的详细描述请参见表72。 响应示例: { "kind": "Status", "apiVersion": "v1", "metadata": {}, "status": "Success", "details": { "name": "tfjob-test", "group": "kubeflow.org", "kind": "tfjobs", "uid": "f461f966-ad46-11e9-aaa4-340a9837e413" } }
  • URI DELETE /apis/kubeflow.org/v1/namespaces/{namespace}/tfjobs/{name} 表1 Path参数 参数 是否必选 描述 name Yes name of the TFJob namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 dryRun No When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed gracePeriodSeconds No The duration in seconds before the object should be deleted. Value must be non-negative integer. The value zero indicates delete immediately. If this value is nil, the default grace period for the specified type will be used. Defaults to a per object value if not specified. zero means delete immediately. orphanDependents No Deprecated: please use the PropagationPolicy, this field will be deprecated in 1.7. Should the dependent objects be orphaned. If true/false, the “orphan” finalizer will be added to/removed from the object’s finalizers list. Either this field or PropagationPolicy may be set, but not both. propagationPolicy No Whether and how garbage collection will be performed. Either this field or OrphanDependents may be set, but not both. The default policy is decided by the existing finalizer set in the metadata.finalizers and the resource-specific default policy. Acceptable values are: ‘Orphan’ - orphan the dependents; ‘Background’ - allow the garbage collector to delete the dependents in the background; ‘Foreground’ - a cascading policy that deletes all dependents in the foreground. pretty No If 'true’, then the output is pretty printed.
  • 响应消息 响应参数: 响应参数的详细描述请参考表168。 响应示例: { "apiVersion": "kubeflow.org/v1", "items": [ { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "creationTimestamp": "2019-07-23T12:39:47Z", "generation": 1, "name": "tfjob-test", "namespace": "kube-test", "resourceVersion": "72050567", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/tfjobs/tfjob-test", "uid": "f461f966-ad46-11e9-aaa4-340a9837e413" }, "spec": { "backoffLimit": 6, "tfReplicaSpecs": { "Ps": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--num_gpus=1", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } }, "Worker": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } } } }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-23T12:38:58Z", "lastUpdateTime": "2019-07-23T12:38:58Z", "message": "TFJob tfjob-test is created.", "reason": "TFJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-23T12:39:30Z", "lastUpdateTime": "2019-07-23T12:39:30Z", "message": "TFJob tfjob-test is running.", "reason": "TFJobRunning", "status": "True", "type": "Running" } ], "replicaStatuses": { "PS": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-23T12:38:58Z" } } ], "kind": "TFJobList", "metadata": { "continue": "", "resourceVersion": "72353810", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/tfjobs" } }
  • URI DELETE /apis/kubeflow.org/v1/namespaces/{namespace}/tfjobs 表1 Path参数 参数 是否必选 描述 namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 fieldSelector No A selector to restrict the list of returned objects by their fields. Defaults to everything. labelSelector No A selector to restrict the list of returned objects by their labels. Defaults to everything. limit No limit is a maximum number of responses to return for a list call. If more items exist, the server will set the continue field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. resourceVersion No When specified with a watch call, shows changes that occur after that particular version of a resource. Defaults to changes from the beginning of history. When specified for list: - if unset, then the result is returned from remote storage based on quorum-read flag; - if it’s 0, then we simply return what we currently have in cache, no guarantee; - if set to non zero, then the result is at least as fresh as given rv. timeoutSeconds No Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. watch No Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion.
  • URI GET /apis/kubeflow.org/v1/namespaces/{namespace}/tfjobs 表1 Path参数 参数 是否必选 描述 namespace Yes object name and auth scope, such as for teams and projects 表2 Query参数 参数 是否必选 描述 fieldSelector No A selector to restrict the list of returned objects by their fields. Defaults to everything. labelSelector No A selector to restrict the list of returned objects by their labels. Defaults to everything. limit No limit is a maximum number of responses to return for a list call. If more items exist, the server will set the continue field on the list metadata to a value that can be used with the same initial query to retrieve the next set of results. Setting a limit may return fewer than the requested amount of items (up to zero items) in the event all requested objects are filtered out and clients should only use the presence of the continue field to determine whether more results are available. Servers may choose not to support the limit argument and will return all of the available results. If limit is specified and the continue field is empty, clients may assume that no more results are available. This field is not supported if watch is true. The server guarantees that the objects returned when using continue will be identical to issuing a single list call without a limit - that is, no objects created, modified, or deleted after the first request is issued will be included in any subsequent continued requests. This is sometimes referred to as a consistent snapshot, and ensures that a client that is using limit to receive smaller chunks of a very large result can ensure they see all possible objects. If objects are updated during a chunked list the version of the object that was present at the time the first list result was calculated is returned. resourceVersion No When specified with a watch call, shows changes that occur after that particular version of a resource. Defaults to changes from the beginning of history. When specified for list: - if unset, then the result is returned from remote storage based on quorum-read flag; - if it’s 0, then we simply return what we currently have in cache, no guarantee; - if set to non zero, then the result is at least as fresh as given rv. timeoutSeconds No Timeout for the list/watch call. This limits the duration of the call, regardless of any activity or inactivity. watch No Watch for changes to the described resources and return them as a stream of add, update, and remove notifications. Specify resourceVersion.
  • 响应消息 响应参数: 响应参数的详细描述请参见表168。 响应示例: { "apiVersion": "kubeflow.org/v1", "items": [ { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "creationTimestamp": "2019-07-23T12:39:47Z", "generation": 1, "name": "tfjob-test", "namespace": "kube-test", "resourceVersion": "72050567", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/tfjobs/tfjob-test", "uid": "f461f966-ad46-11e9-aaa4-340a9837e413" }, "spec": { "backoffLimit": 6, "tfReplicaSpecs": { "Ps": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--num_gpus=1", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } }, "Worker": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } } } }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-23T12:38:58Z", "lastUpdateTime": "2019-07-23T12:38:58Z", "message": "TFJob tfjob-test is created.", "reason": "TFJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-23T12:39:30Z", "lastUpdateTime": "2019-07-23T12:39:30Z", "message": "TFJob tfjob-test is running.", "reason": "TFJobRunning", "status": "True", "type": "Running" } ], "replicaStatuses": { "PS": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-23T12:38:58Z" } } ], "kind": "TFJobList", "metadata": { "continue": "", "resourceVersion": "72353810", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/tfjobs" } }
  • 响应消息 响应参数: 响应参数的详细描述请参考表154。 响应示例: { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "creationTimestamp": "2019-07-23T12:39:47Z", "generation": 1, "name": "tfjob-test", "namespace": "kube-test", "resourceVersion": "72050567", "selfLink": "/apis/kubeflow.org/v1/namespaces/kube-test/tfjobs/tfjob-test", "uid": "f461f966-ad46-11e9-aaa4-340a9837e413" }, "spec": { "backoffLimit": 6, "tfReplicaSpecs": { "Ps": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--num_gpus=1", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } }, "Worker": { "replicas": 1, "template": { "spec": { "containers": [ { "args": [ "python", "/opt/tf-benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py", "--batch_size=1", "--model=resnet50", "--variable_update=parameter_server", "--flush_stdout=true", "--local_parameter_device=cpu", "--device=cpu", "--data_format=NHWC" ], "image": "*.*.*.215:20202/cci/tf-benchmarks-cpu:v1", "name": "tensorflow", "ports": [ { "containerPort": 2222, "name": "tfjob-port" } ], "resources": { "limits": { "cpu": "2", "memory": "4Gi" }, "requests": { "cpu": "2", "memory": "4Gi" } } } ], "imagePullSecrets": [ { "name": "imagepull-secret" } ], "restartPolicy": "OnFailure" } } } } }, "status": { "conditions": [ { "lastTransitionTime": "2019-07-23T12:38:58Z", "lastUpdateTime": "2019-07-23T12:38:58Z", "message": "TFJob tfjob-test is created.", "reason": "TFJobCreated", "status": "True", "type": "Created" }, { "lastTransitionTime": "2019-07-23T12:39:30Z", "lastUpdateTime": "2019-07-23T12:39:30Z", "message": "TFJob tfjob-test is running.", "reason": "TFJobRunning", "status": "True", "type": "Running" } ], "replicaStatuses": { "PS": { "active": 1 }, "Worker": { "active": 1 } }, "startTime": "2019-07-23T12:38:58Z" } }
共100000条