Add new sample manifests for GKE AI workloads

These will be embedded in new docs at https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/introduction for how to instrument and observe these workloads.
GoogleCloudPlatform · Jan 26, 2025 · 349bd4e · 349bd4e
1 parent 118f0ef
commit 349bd4e
Show file tree

Hide file tree

Showing 6 changed files with 57 additions and 0 deletions.
diff --git a/examples/nvidia-triton/README.md b/examples/nvidia-triton/README.md
@@ -0,0 +1,3 @@
+# NVIDIA Triton sample manifests
+
+Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/nvidia-triton) for how to use these manifests.
diff --git a/examples/nvidia-triton/pod-monitoring.yaml b/examples/nvidia-triton/pod-monitoring.yaml
@@ -0,0 +1,16 @@
+apiVersion: monitoring.googleapis.com/v1
+kind: PodMonitoring
+metadata:
+  name: triton
+  labels:
+    app.kubernetes.io/name: triton
+    app.kubernetes.io/part-of: google-cloud-managed-prometheus
+spec:
+  endpoints:
+  - port: 8002
+    scheme: http
+    interval: 30s
+    path: /metrics
+  selector:
+    matchLabels:
+      app: triton
diff --git a/examples/tgi/README.md b/examples/tgi/README.md
@@ -0,0 +1,3 @@
+# Text Generation Inference (TGI) sample manifests
+
+Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/tgi) for how to use these manifests.
diff --git a/examples/tgi/pod-monitoring.yaml b/examples/tgi/pod-monitoring.yaml
@@ -0,0 +1,16 @@
+apiVersion: monitoring.googleapis.com/v1
+kind: PodMonitoring
+metadata:
+  name: tgi
+  labels:
+    app.kubernetes.io/name: tgi
+    app.kubernetes.io/part-of: google-cloud-managed-prometheus
+spec:
+  endpoints:
+  - port: 8080
+    scheme: http
+    interval: 30s
+    path: /metrics
+  selector:
+    matchLabels:
+      app: tgi-gemma-server
diff --git a/examples/vllm/README.md b/examples/vllm/README.md
@@ -0,0 +1,3 @@
+# vLLM sample manifests
+
+Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/vllm) for how to use these manifests.
diff --git a/examples/vllm/pod-monitoring.yaml b/examples/vllm/pod-monitoring.yaml
@@ -0,0 +1,16 @@
+apiVersion: monitoring.googleapis.com/v1
+kind: PodMonitoring
+metadata:
+  name: vllm
+  labels:
+    app.kubernetes.io/name: vllm
+    app.kubernetes.io/part-of: google-cloud-managed-prometheus
+spec:
+  endpoints:
+  - port: 8000
+    scheme: http
+    interval: 30s
+    path: /metrics
+  selector:
+    matchLabels:
+      app: vllm-gemma-server
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# NVIDIA Triton sample manifests

		Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/nvidia-triton) for how to use these manifests.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Text Generation Inference (TGI) sample manifests

		Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/tgi) for how to use these manifests.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# vLLM sample manifests

		Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/vllm) for how to use these manifests.