From 669cc76911d27945b3a26607d0ae0db6cdfa2a96 Mon Sep 17 00:00:00 2001 From: rpancham Date: Sat, 21 Sep 2024 00:04:23 +0530 Subject: [PATCH 1/2] add automation tests for keras grpc --- .../kserve-triton-keras-gRPC-input.json | 1 + .../kserve-triton-keras-gRPC-output.json | 1 + .../triton_keras_gRPC_servingruntime.yaml | 63 +++++++++++++++++++ ods_ci/tests/Resources/OCP.resource | 6 +- ...1009__model_serving_triton_on_kserve.robot | 60 ++++++++++++++++-- 5 files changed, 122 insertions(+), 9 deletions(-) create mode 100644 ods_ci/tests/Resources/Files/triton/kserve-triton-keras-gRPC-input.json create mode 100644 ods_ci/tests/Resources/Files/triton/kserve-triton-keras-gRPC-output.json create mode 100644 ods_ci/tests/Resources/Files/triton/triton_keras_gRPC_servingruntime.yaml diff --git a/ods_ci/tests/Resources/Files/triton/kserve-triton-keras-gRPC-input.json b/ods_ci/tests/Resources/Files/triton/kserve-triton-keras-gRPC-input.json new file mode 100644 index 000000000..8fdd88be7 --- /dev/null +++ b/ods_ci/tests/Resources/Files/triton/kserve-triton-keras-gRPC-input.json @@ -0,0 +1 @@ +{"id":"test1","model_name":"resnet50","model_version":"1","inputs":[{"name":"keras_tensor","datatype":"FP32","shape":[1,224,224,3]}],"outputs":[{"name":"output_0"}],"raw_input_contents":[""]} \ No newline at end of file diff --git a/ods_ci/tests/Resources/Files/triton/kserve-triton-keras-gRPC-output.json b/ods_ci/tests/Resources/Files/triton/kserve-triton-keras-gRPC-output.json new file mode 100644 index 000000000..04338013c --- /dev/null +++ b/ods_ci/tests/Resources/Files/triton/kserve-triton-keras-gRPC-output.json @@ -0,0 +1 @@ +{"outputs":[{"shape":["1","1000"],"parameters":{},"name":"output_0","datatype":"FP32","contents":null}],"raw_output_contentsparameters":{},"model_name":"resnet50","model_version":"1","id":"test1"} \ No newline at end of file diff --git a/ods_ci/tests/Resources/Files/triton/triton_keras_gRPC_servingruntime.yaml b/ods_ci/tests/Resources/Files/triton/triton_keras_gRPC_servingruntime.yaml new file mode 100644 index 000000000..9709ee579 --- /dev/null +++ b/ods_ci/tests/Resources/Files/triton/triton_keras_gRPC_servingruntime.yaml @@ -0,0 +1,63 @@ +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + name: triton-keras-grpc +spec: + annotations: + prometheus.kserve.io/path: /metrics + prometheus.kserve.io/port: "8002" + containers: + - args: + - tritonserver + - --model-store=/mnt/models + - --grpc-port=9000 + - --http-port=8080 + - --allow-grpc=true + - --allow-http=true + image: nvcr.io/nvidia/tritonserver:23.05-py3 + name: kserve-container + ports: + - containerPort: 9000 + name: h2c + protocol: TCP + volumeMounts: + - mountPath: /dev/shm + name: shm + resources: + limits: + cpu: "1" + memory: 2Gi + requests: + cpu: "1" + memory: 2Gi + protocolVersions: + - v2 + - grpc-v2 + supportedModelFormats: + - autoSelect: true + name: tensorrt + priority: 1 + version: "8" + - autoSelect: true + name: tensorflow + priority: 1 + version: "1" + - autoSelect: true + name: tensorflow + priority: 1 + version: "2" + - autoSelect: true + name: onnx + priority: 1 + version: "1" + - name: pytorch + version: "1" + - autoSelect: true + name: triton + priority: 1 + version: "2" +volumes: + - emptyDir: null + medium: Memory + sizeLimit: 2Gi + name: shm \ No newline at end of file diff --git a/ods_ci/tests/Resources/OCP.resource b/ods_ci/tests/Resources/OCP.resource index f2bcc1b88..c1382ebab 100644 --- a/ods_ci/tests/Resources/OCP.resource +++ b/ods_ci/tests/Resources/OCP.resource @@ -111,7 +111,7 @@ Wait For Pods To Be Ready ... This keyword can also check for the expected number of the pod replicas using `${exp_replicas}` ... argument. [Arguments] ${label_selector} ${namespace} ${timeout}=600s ${exp_replicas}=${NONE} - Wait Until Keyword Succeeds ${timeout} 3s + Wait Until Keyword Succeeds ${timeout} 15s ... Check If Pod Exists namespace=${namespace} label_selector=${label_selector} ... status_only=${FALSE} ${rc} ${out}= Run And Return Rc And Output @@ -147,7 +147,7 @@ Wait For Deployment Replica To Be Ready Wait For Pods To Succeed [Arguments] ${label_selector} ${namespace} ${timeout}=300s ${exp_replicas}=${NONE} - Wait Until Keyword Succeeds ${timeout} 3s + Wait Until Keyword Succeeds ${timeout} 15s ... Check If Pod Exists namespace=${namespace} label_selector=${label_selector} ... status_only=${FALSE} ${rc} ${out}= Run And Return Rc And Output @@ -164,7 +164,7 @@ Wait For Pods To Succeed Wait For Pods To Be Terminated [Arguments] ${label_selector} ${namespace} ${timeout}=180s - Wait Until Keyword Succeeds ${timeout} 3s + Wait Until Keyword Succeeds ${timeout} 15s ... Check If Pod Exists namespace=${namespace} label_selector=${label_selector} ... status_only=${FALSE} ${rc} ${out}= Run And Return Rc And Output diff --git a/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot b/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot index e449146f6..4aeb19230 100644 --- a/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot +++ b/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot @@ -46,11 +46,13 @@ ${TENSORFLOW_GRPC_RUNTIME_NAME}= triton-tensorflow-grpc ${TENSORFLOW_RUNTIME_FILEPATH}= ${RESOURCES_DIRPATH}/triton_tensorflow_gRPC_servingruntime.yaml ${EXPECTED_INFERENCE_GRPC_OUTPUT_FILE_TENSORFLOW}= tests/Resources/Files/triton/kserve-triton-inception_graphdef-gRPC-output.json ${KERAS_RUNTIME_NAME}= triton-keras-rest -${KERAS_MODEL_NAME}= resnet50 ${KERAS_RUNTIME_FILEPATH}= ${RESOURCES_DIRPATH}/triton_keras_rest_servingruntime.yaml ${INFERENCE_REST_INPUT_KERAS}= @tests/Resources/Files/triton/kserve-triton-keras-rest-input.json ${EXPECTED_INFERENCE_REST_OUTPUT_FILE_KERAS}= tests/Resources/Files/triton/kserve-triton-keras-rest-output.json - +${KERAS_RUNTIME_NAME_GRPC}= triton-keras-grpc +${KERAS_GRPC_RUNTIME_FILEPATH}= ${RESOURCES_DIRPATH}/triton_keras_gRPC_servingruntime.yaml +${INFERENCE_GRPC_INPUT_KERAS}= @tests/Resources/Files/triton/kserve-triton-keras-gRPC-input.json +${EXPECTED_INFERENCE_GRPC_OUTPUT_FILE_KERAS}= tests/Resources/Files/triton/kserve-triton-keras-gRPC-output.json *** Test Cases *** @@ -208,6 +210,52 @@ Test Tensorflow Model Grpc Inference Via UI (Triton on Kserve) # robocop: off ... AND ... Delete Serving Runtime Template From CLI displayed_name=triton-tensorflow-grpc +Test KERAS Model Grpc Inference Via UI (Triton on Kserve) # robocop: off=too-long-test-case + [Documentation] Test the deployment of an keras model in Kserve using Triton + [Tags] Sanity RHOAIENG-10327 + Open Data Science Projects Home Page + Create Data Science Project title=${PRJ_TITLE} description=${PRJ_DESCRIPTION} + ... existing_project=${FALSE} + Open Dashboard Settings settings_page=Serving runtimes + Upload Serving Runtime Template runtime_filepath=${KERAS_GRPC_RUNTIME_FILEPATH} + ... serving_platform=single runtime_protocol=gRPC + Serving Runtime Template Should Be Listed displayed_name=${KERAS_RUNTIME_NAME_GRPC} + ... serving_platform=single + Recreate S3 Data Connection project_title=${PRJ_TITLE} dc_name=model-serving-connection + ... aws_access_key=${S3.AWS_ACCESS_KEY_ID} aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY} + ... aws_bucket_name=ods-ci-s3 + Deploy Kserve Model Via UI model_name=${PYTORCH_MODEL_NAME} serving_runtime=triton-keras-grpc + ... data_connection=model-serving-connection path=tritonkeras/model_repository/ model_framework=tensorflow - 2 + ... token=${TRUE} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${PYTORCH_MODEL_NAME} + ... namespace=${PRJ_TITLE} + ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}= Load Json File file_path=${EXPECTED_INFERENCE_GRPC_OUTPUT_FILE_KERAS} + ... as_string=${TRUE} + ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}= Load Json String ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS} + ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}= Evaluate json.dumps(${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS}) + Log ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS} + Open Model Serving Home Page + ${host_url}= Get Model Route Via UI model_name=${PYTORCH_MODEL_NAME} + ${host}= Evaluate re.search(r"${PATTERN}", r"${host_url}").group(1) re + Log ${host} + ${token}= Get Access Token Via UI single_model=${TRUE} model_name=resnet50 project_name=${PRJ_TITLE} + ${inference_output}= Query Model With GRPCURL host=${host} port=443 + ... endpoint=inference.GRPCInferenceService/ModelInfer + ... json_body=@ input_filepath=${INFERENCE_GRPC_INPUT_KERAS} + ... insecure=${True} protobuf_file=${PROTOBUFF_FILE} json_header="Authorization: Bearer ${token}" + Log ${inference_output} + ${inference_output}= Evaluate json.dumps(${inference_output}) + Log ${inference_output} + ${result} ${list}= Inference Comparison ${EXPECTED_INFERENCE_GRPC_OUTPUT_KERAS} ${inference_output} + Log ${result} + Log ${list} + [Teardown] Run Keywords Get Kserve Events And Logs model_name=${PYTORCH_MODEL_NAME} + ... project_title=${PRJ_TITLE} + ... AND + ... Clean All Models Of Current User + ... AND + ... Delete Serving Runtime Template From CLI displayed_name=triton-keras-grpc + Test KERAS Model Inference Via UI(Triton on Kserve) [Documentation] Test the deployment of an keras model in Kserve using Triton [Tags] Sanity RHOAIENG-10328 @@ -223,17 +271,17 @@ Test KERAS Model Inference Via UI(Triton on Kserve) Recreate S3 Data Connection project_title=${PRJ_TITLE} dc_name=model-serving-connection ... aws_access_key=${S3.AWS_ACCESS_KEY_ID} aws_secret_access=${S3.AWS_SECRET_ACCESS_KEY} ... aws_bucket_name=ods-ci-s3 - Deploy Kserve Model Via UI model_name=${KERAS_MODEL_NAME} serving_runtime=triton-keras-rest + Deploy Kserve Model Via UI model_name=${PYTORCH_MODEL_NAME} serving_runtime=triton-keras-rest ... data_connection=model-serving-connection path=tritonkeras/model_repository/ model_framework=tensorflow - 2 - Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${KERAS_MODEL_NAME} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${PYTORCH_MODEL_NAME} ... namespace=${PRJ_TITLE} timeout=180s ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS}= Load Json File ... file_path=${EXPECTED_INFERENCE_REST_OUTPUT_FILE_KERAS} as_string=${TRUE} Log ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS} Run Keyword And Continue On Failure Verify Model Inference With Retries - ... ${KERAS_MODEL_NAME} ${INFERENCE_REST_INPUT_KERAS} ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS} + ... ${PYTORCH_MODEL_NAME} ${INFERENCE_REST_INPUT_KERAS} ${EXPECTED_INFERENCE_REST_OUTPUT_KERAS} ... token_auth=${FALSE} project_title=${PRJ_TITLE} - [Teardown] Run Keywords Get Kserve Events And Logs model_name=${KERAS_MODEL_NAME} + [Teardown] Run Keywords Get Kserve Events And Logs model_name=${PYTORCH_MODEL_NAME} ... project_title=${PRJ_TITLE} ... AND ... Clean All Models Of Current User From d1144aaff65d9d3c5448b8876ad8b3e32b58c041 Mon Sep 17 00:00:00 2001 From: rpancham Date: Thu, 26 Sep 2024 20:12:53 +0530 Subject: [PATCH 2/2] remove @ from file path --- .../1009__model_serving_triton_on_kserve.robot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot b/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot index 4aeb19230..e84727d5b 100644 --- a/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot +++ b/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve.robot @@ -51,7 +51,7 @@ ${INFERENCE_REST_INPUT_KERAS}= @tests/Resources/Files/triton/kserve-triton-ke ${EXPECTED_INFERENCE_REST_OUTPUT_FILE_KERAS}= tests/Resources/Files/triton/kserve-triton-keras-rest-output.json ${KERAS_RUNTIME_NAME_GRPC}= triton-keras-grpc ${KERAS_GRPC_RUNTIME_FILEPATH}= ${RESOURCES_DIRPATH}/triton_keras_gRPC_servingruntime.yaml -${INFERENCE_GRPC_INPUT_KERAS}= @tests/Resources/Files/triton/kserve-triton-keras-gRPC-input.json +${INFERENCE_GRPC_INPUT_KERAS}= tests/Resources/Files/triton/kserve-triton-keras-gRPC-input.json ${EXPECTED_INFERENCE_GRPC_OUTPUT_FILE_KERAS}= tests/Resources/Files/triton/kserve-triton-keras-gRPC-output.json