Add support for GPU instance groups (#6)

* Add support for GPU instance groups * Install nvidia drivers * Fix error handling in wait-for-kops. Fix patching so that it overwrites. * add nvidia-docker-installer.sh script * Upgrade geodesic. Fix wait-for-kops formatting. * Upgrade docker-ce for docker-nvidia compatibility * verbose bash execution * reduce default size back to zero
vanvalenlab · Sep 6, 2018 · 3878dc6 · 3878dc6
1 parent a0e740d
commit 3878dc6
Show file tree

Hide file tree

Showing 12 changed files with 582 additions and 5 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,6 @@
 FROM cloudposse/build-harness:0.9.0 as build-harness
 
-FROM cloudposse/geodesic:0.16.7
+FROM cloudposse/geodesic:0.18.1
 
 RUN apk add --update dialog
 

diff --git a/README.md b/README.md
@@ -35,6 +35,11 @@ It's 100% Open Source and licensed under the [APACHE2](LICENSE).
 4. Install wrapper script: `make install`
 5. Start the kiosk. `make run`
 
+## References
+- [Cluster Autoscaler for AWS](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider/aws)
+- [Cluster Autoscaler for Kops](https://github.com/kubernetes/kops/blob/master/addons/cluster-autoscaler/)
+- [Running GPU Intances on Kops](https://github.com/brunsgaard/kops-nvidia-docker-installer)
+
 ## Copyright
 
 Copyright © 2018 [The Van Valen Lab](http://www.vanvalen.caltech.edu/)

diff --git a/conf/addons/cluster-autoscaler.yaml b/conf/addons/cluster-autoscaler.yaml
@@ -0,0 +1,237 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    app: cluster-autoscaler
+  name: cluster-autoscaler
+  namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRole
+metadata:
+  name: cluster-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    app: cluster-autoscaler
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - events
+  - endpoints
+  verbs:
+  - create
+  - patch
+- apiGroups:
+  - ""
+  resources:
+  - pods/eviction
+  verbs:
+  - create
+- apiGroups:
+  - ""
+  resources:
+  - pods/status
+  verbs:
+  - update
+- apiGroups:
+  - ""
+  resources:
+  - endpoints
+  resourceNames:
+  - cluster-autoscaler
+  verbs:
+  - get
+  - update
+- apiGroups:
+  - ""
+  resources:
+  - nodes
+  verbs:
+  - watch
+  - list
+  - get
+  - update
+- apiGroups:
+  - ""
+  resources:
+  - pods
+  - services
+  - replicationcontrollers
+  - persistentvolumeclaims
+  - persistentvolumes
+  verbs:
+  - watch
+  - list
+  - get
+- apiGroups:
+  - extensions
+  resources:
+  - replicasets
+  - daemonsets
+  verbs:
+  - watch
+  - list
+  - get
+- apiGroups:
+  - policy
+  resources:
+  - poddisruptionbudgets
+  verbs:
+  - watch
+  - list
+- apiGroups:
+  - apps
+  resources:
+  - statefulsets
+  verbs:
+  - watch
+  - list
+  - get
+- apiGroups:
+  - storage.k8s.io
+  resources:
+  - storageclasses
+  verbs:
+  - watch
+  - list
+  - get
+
+---
+
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: Role
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    app: cluster-autoscaler
+rules:
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  verbs:
+  - create
+- apiGroups:
+  - ""
+  resources:
+  - configmaps
+  resourceNames:
+  - cluster-autoscaler-status
+  verbs:
+  - delete
+  - get
+  - update
+
+---
+
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRoleBinding
+metadata:
+  name: cluster-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    app: cluster-autoscaler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: cluster-autoscaler
+subjects:
+  - kind: ServiceAccount
+    name: cluster-autoscaler
+    namespace: kube-system
+
+---
+
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: RoleBinding
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    app: cluster-autoscaler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: cluster-autoscaler
+subjects:
+  - kind: ServiceAccount
+    name: cluster-autoscaler
+    namespace: kube-system
+
+---
+
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    app: cluster-autoscaler
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: cluster-autoscaler
+  template:
+    metadata:
+      labels:
+        k8s-addon: cluster-autoscaler.addons.k8s.io
+        app: cluster-autoscaler
+      annotations:
+         # For 1.6, we keep the old tolerations in case of a downgrade to 1.5
+        scheduler.alpha.kubernetes.io/tolerations: '[{"key":"dedicated", "value":"master"}]'
+        prometheus.io/scrape: 'true'
+        prometheus.io/port: '8085'
+    spec:
+      tolerations:
+      - effect: NoSchedule
+        key: node-role.kubernetes.io/master
+      nodeSelector:
+        kubernetes.io/role: master
+      serviceAccountName: cluster-autoscaler
+      containers:
+        - name: cluster-autoscaler
+          image: '{{ getenv "IMAGE" | default "k8s.gcr.io/cluster-autoscaler:v1.2.0" }}'
+          livenessProbe:
+            httpGet:
+              path: /health-check
+              port: 8085
+          readinessProbe:
+            httpGet:
+              path: /health-check
+              port: 8085
+          resources:
+            limits:
+              cpu: 100m
+              memory: 300Mi
+            requests:
+              cpu: 100m
+              memory: 300Mi
+          command:
+            - ./cluster-autoscaler
+            - --v=4
+            - --stderrthreshold=info
+            - --cloud-provider={{ getenv "CLOUD_PROVIDER" | default "aws" }}
+            - --scale-down-delay-after-add={{ getenv "GPU_SCALE_DOWN_DELAY_AFTER_ADD" | default "30m" }}
+            - --scale-down-delay-after-delete={{ getenv "GPU_SCALE_DOWN_DELAY_AFTER_DELETE" | default "10m" }}
+            - --skip-nodes-with-local-storage=false
+            - --nodes={{ getenv "GPU_MIN_NODES" | default "0" }}:{{ getenv "GPU_MAX_NODES" | default "2" }}:{{ getenv "GPU_GROUP_NAME" | default "gpu-nodes" }}.{{getenv "KOPS_CLUSTER_NAME"}}
+          env:
+            - name: AWS_REGION
+              value: {{ getenv "AWS_REGION" }}
+          volumeMounts:
+            - name: ssl-certs
+              mountPath: "/etc/ssl/certs/ca-certificates.crt"
+              readOnly: true
+      volumes:
+        - name: ssl-certs
+          hostPath:
+            path: "/etc/ssl/certs/ca-certificates.crt"
+      dnsPolicy: "Default"
diff --git a/conf/addons/nvidia-device-plugin.yaml b/conf/addons/nvidia-device-plugin.yaml
@@ -0,0 +1,41 @@
+# Based on https://github.com/NVIDIA/k8s-device-plugin
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: nvidia-device-plugin
+  namespace: kube-system
+spec:
+  template:
+    metadata:
+      # Mark this pod as a critical add-on; when enabled, the critical add-on scheduler
+      # reserves resources for critical add-on pods so that they can be rescheduled after
+      # a failure.  This annotation works in tandem with the toleration below.
+      annotations:
+        scheduler.alpha.kubernetes.io/critical-pod: ""
+      labels:
+        name: nvidia-device-plugin
+    spec:
+      nodeSelector:
+        kops.k8s.io/instancegroup: gpu-nodes
+      tolerations:
+      # Allow this pod to be rescheduled while the node is in "critical add-ons only" mode.
+      # This, along with the annotation above marks this pod as a critical add-on.
+      - key: CriticalAddonsOnly
+        operator: Exists
+      - key: nvidia.com/gpu
+        operator: Exists
+        effect: NoSchedule
+      containers:
+      - image: nvidia/k8s-device-plugin:1.9
+        name: nvidia-device-plugin-ctr
+        securityContext:
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop: ["ALL"]
+        volumeMounts:
+          - name: device-plugin
+            mountPath: /var/lib/kubelet/device-plugins
+      volumes:
+        - name: device-plugin
+          hostPath:
+            path: /var/lib/kubelet/device-plugins
diff --git a/conf/addons/nvidia-test.yaml b/conf/addons/nvidia-test.yaml
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "nvidia-test"
+spec:
+  restartPolicy: "OnFailure"
+  nodeSelector:
+    beta.kubernetes.io/instance-type: "p2.xlarge"
+  tolerations:
+  - key: "nvidia.com/gpu"
+    effect: "NoSchedule"
+  containers:
+    - name: "cuda-vector-add"
+      # https://github.com/kubernetes/kubernetes/blob/v1.7.11/test/images/nvidia-cuda/Dockerfile
+      image: "k8s.gcr.io/cuda-vector-add:v0.1"
+      resources:
+        limits:
+          nvidia.com/gpu: 1
diff --git a/conf/patches/aws.yaml b/conf/patches/aws.yaml
@@ -0,0 +1,29 @@
+spec:
+  docker:
+    logDriver: json-file
+    version: 17.03.2
+  additionalPolicies:
+      nodes: |
+        [
+          {
+            "Sid": "assumeClusterRole",
+            "Action": [
+              "sts:AssumeRole"
+            ],
+            "Effect": "Allow",
+            "Resource": ["*"]
+          },
+          {
+            "Effect": "Allow",
+            "Action": [
+              "autoscaling:DescribeAutoScalingGroups",
+              "autoscaling:DescribeAutoScalingInstances",
+              "autoscaling:DescribeTags",
+              "autoscaling:DescribeLaunchConfigurations",
+              "autoscaling:SetDesiredCapacity",
+              "autoscaling:TerminateInstanceInAutoScalingGroup",
+              "ec2:DescribeLaunchTemplateVersions"
+            ],
+            "Resource": "*"
+          }
+        ]
diff --git a/conf/patches/gpu-nodes.yaml b/conf/patches/gpu-nodes.yaml
@@ -0,0 +1,19 @@
+spec:
+  machineType: p2.xlarge
+  taints:
+  - nvidia.com/gpu=:NoSchedule
+  hooks:
+  # Before is just advisory; `docker-healthcheck.service` appears to get started by `kops-configuration.service`
+  - before:
+    - docker.service
+    - docker-healthcheck.service
+    manifest: |
+      Type=oneshot
+      ExecStart=/bin/bash -c '/usr/bin/curl -L -S -f https://raw.githubusercontent.com/vanvalenlab/kiosk/gpus/scripts/nvidia-docker-installer.sh | /bin/bash -x'
+    name: nvidia-docker-install.service
+  minSize: 0
+  maxSize: 2
+  # https://github.com/kubernetes/autoscaler/issues/903#issuecomment-392885606
+  kubelet:
+    featureGates:
+      DevicePlugins: "true"