diff --git a/.github/workflows/kind-e2e.yml b/.github/workflows/kind-e2e.yml index 72539edda..ebe92e1bb 100644 --- a/.github/workflows/kind-e2e.yml +++ b/.github/workflows/kind-e2e.yml @@ -85,6 +85,10 @@ jobs: working-directory: ./e2e run: ./test-default-route1.sh + - name: Test DRA integration + working-directory: ./e2e + run: ./test-dra-integration.sh + - name: Export kind logs if: always() run: | diff --git a/.gitignore b/.gitignore index 186b9dfa0..af074ab8c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ bin/ e2e/bin/ e2e/yamls/ +e2e/repos/ # GOPATH created by the build script gopath/ diff --git a/docs/how-to-use.md b/docs/how-to-use.md index 566275c4b..78ffabc9a 100644 --- a/docs/how-to-use.md +++ b/docs/how-to-use.md @@ -511,7 +511,7 @@ spec: EOF ``` -We can then create a pod which uses the `default-route` key in the JSON formatted `k8s.v1.cni.cncf.io/networks` annotation. +We can then create a pod which uses the `default-route` key in the JSON formatted `k8s.v1.cni.cncf.io/networks` annotation. ``` cat < :warning: Dynamic Resource Allocation (DRA) is [currently an alpha](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/), +> and is subject to change. Please consider this functionality as a preview. The architecture and usage of DRA in +> Multus CNI may change in the future as this technology matures. + +Dynamic Resource Allocation is alternative mechanism to device plugin which allows to requests pod and container +resources. + +The following sections describe how to use DRA with multus and NVIDIA DRA driver. Other DRA networking driver vendors +should follow similar concepts to make use of multus DRA support. + +#### Prerequisite + +1. Kubernetes 1.27 +2. Container Runtime with CDI support enabled +3. Kubernetes runtime-config=resource.k8s.io/v1alpha2 +4. Kubernetes feature-gates=DynamicResourceAllocation=True,KubeletPodResourcesDynamicResources=true + +#### Install DRA driver + +The current example uses NVIDIA DRA driver for networking. This DRA driver is not publicly available. An alternative to +this DRA driver is available at [dra-example-driver](https://github.com/kubernetes-sigs/dra-example-driver). + +#### Create dynamic resource class with NVIDIA network DRA driver + +The `ResourceClass` defines the resource pool of `sf-pool-1`. + +``` +# Execute following command at Kubernetes master +cat <> ${overriden_values_path} +controller: + nodeSelector: null + tolerations: null +EOF + +helm install \ + -n dra-example-driver \ + --create-namespace \ + -f ${overriden_values_path} \ + dra-example-driver \ + ${chart_path} + +echo "installing testing pods" +kubectl create -f yamls/dra-integration.yml +kubectl wait --for=condition=ready -l app=dra-integration --timeout=300s pod + +echo "check dra-integration pod for DRA injected environment variable" + +# We can validate that the resource is correctly injected by checking an environment variable this dra driver is injecting +# in the Pod. +# https://github.com/kubernetes-sigs/dra-example-driver/blob/be2b8b1db47b8c757440e955ce5ced88c23bfe86/cmd/dra-example-kubeletplugin/cdi.go#L71C20-L71C44 +env_variable=$(kubectl exec dra-integration -- bash -c "echo \$DRA_RESOURCE_DRIVER_NAME | grep gpu.resource.example.com") +if [ $? -eq 0 ];then + echo "dra-integration pod has DRA injected environment variable" +else + echo "dra-integration pod doesn't have DRA injected environment variable" + exit 1 +fi + +echo "cleanup resources" +kubectl delete -f yamls/dra-integration.yml +helm uninstall -n dra-example-driver dra-example-driver diff --git a/pkg/kubeletclient/kubeletclient.go b/pkg/kubeletclient/kubeletclient.go index d0438f012..60876403f 100644 --- a/pkg/kubeletclient/kubeletclient.go +++ b/pkg/kubeletclient/kubeletclient.go @@ -21,6 +21,7 @@ import ( "net/url" "os" "path/filepath" + "strings" "time" "golang.org/x/net/context" @@ -137,19 +138,45 @@ func (rc *kubeletClient) GetPodResourceMap(pod *v1.Pod) (map[string]*types.Resou for _, pr := range rc.resources { if pr.Name == name && pr.Namespace == ns { for _, cnt := range pr.Containers { - for _, dev := range cnt.Devices { - if rInfo, ok := resourceMap[dev.ResourceName]; ok { - rInfo.DeviceIDs = append(rInfo.DeviceIDs, dev.DeviceIds...) - } else { - resourceMap[dev.ResourceName] = &types.ResourceInfo{DeviceIDs: dev.DeviceIds} - } - } + rc.getDevicePluginResources(cnt.Devices, resourceMap) + rc.getDRAResources(cnt.DynamicResources, resourceMap) } } } return resourceMap, nil } +func (rc *kubeletClient) getDevicePluginResources(devices []*podresourcesapi.ContainerDevices, resourceMap map[string]*types.ResourceInfo) { + for _, dev := range devices { + if rInfo, ok := resourceMap[dev.ResourceName]; ok { + rInfo.DeviceIDs = append(rInfo.DeviceIDs, dev.DeviceIds...) + } else { + resourceMap[dev.ResourceName] = &types.ResourceInfo{DeviceIDs: dev.DeviceIds} + } + } +} + +func (rc *kubeletClient) getDRAResources(dynamicResources []*podresourcesapi.DynamicResource, resourceMap map[string]*types.ResourceInfo) { + for _, dynamicResource := range dynamicResources { + var deviceIDs []string + for _, claimResource := range dynamicResource.ClaimResources { + for _, cdiDevice := range claimResource.CDIDevices { + res := strings.Split(cdiDevice.Name, "=") + if len(res) == 2 { + deviceIDs = append(deviceIDs, res[1]) + } else { + logging.Errorf("GetPodResourceMap: Invalid CDI format") + } + } + } + if rInfo, ok := resourceMap[dynamicResource.ClassName]; ok { + rInfo.DeviceIDs = append(rInfo.DeviceIDs, deviceIDs...) + } else { + resourceMap[dynamicResource.ClassName] = &types.ResourceInfo{DeviceIDs: deviceIDs} + } + } +} + func hasKubeletAPIEndpoint(url *url.URL) bool { // Check for kubelet resource API socket file if _, err := os.Stat(url.Path); err != nil { diff --git a/pkg/kubeletclient/kubeletclient_test.go b/pkg/kubeletclient/kubeletclient_test.go index 668055176..f8c493eee 100644 --- a/pkg/kubeletclient/kubeletclient_test.go +++ b/pkg/kubeletclient/kubeletclient_test.go @@ -60,10 +60,6 @@ func (m *fakeResourceServer) Get(_ context.Context, _ *podresourcesapi.GetPodRes } func (m *fakeResourceServer) List(_ context.Context, _ *podresourcesapi.ListPodResourcesRequest) (*podresourcesapi.ListPodResourcesResponse, error) { - podName := "pod-name" - podNamespace := "pod-namespace" - containerName := "container-name" - devs := []*podresourcesapi.ContainerDevices{ { ResourceName: "resource", @@ -71,18 +67,49 @@ func (m *fakeResourceServer) List(_ context.Context, _ *podresourcesapi.ListPodR }, } + cdiDevices := []*podresourcesapi.CDIDevice{ + { + Name: "cdi-kind=cdi-resource", + }, + } + + claimsResource := []*podresourcesapi.ClaimResource{ + { + CDIDevices: cdiDevices, + }, + } + + dynamicResources := []*podresourcesapi.DynamicResource{ + { + ClassName: "resource-class", + ClaimName: "resource-claim", + ClaimNamespace: "dynamic-resource-pod-namespace", + ClaimResources: claimsResource, + }, + } + resp := &podresourcesapi.ListPodResourcesResponse{ PodResources: []*podresourcesapi.PodResources{ { - Name: podName, - Namespace: podNamespace, + Name: "pod-name", + Namespace: "pod-namespace", Containers: []*podresourcesapi.ContainerResources{ { - Name: containerName, + Name: "container-name", Devices: devs, }, }, }, + { + Name: "dynamic-resource-pod-name", + Namespace: "dynamic-resource-pod-namespace", + Containers: []*podresourcesapi.ContainerResources{ + { + Name: "dynamic-resource-container-name", + DynamicResources: dynamicResources, + }, + }, + }, }, } return resp, nil @@ -188,7 +215,7 @@ var _ = Describe("Kubelet resource endpoint data read operations", func() { }) }) Context("GetPodResourceMap() with valid pod name and namespace", func() { - It("should return no error", func() { + It("should return no error with device plugin resource", func() { podUID := k8sTypes.UID("970a395d-bb3b-11e8-89df-408d5c537d23") fakePod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -216,6 +243,34 @@ var _ = Describe("Kubelet resource endpoint data read operations", func() { Expect(resourceMap).To(Equal(outputRMap)) }) + It("should return no error with dynamic resource", func() { + podUID := k8sTypes.UID("9f94e27b-4233-43d6-bd10-f73b4de6f456") + fakePod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dynamic-resource-pod-name", + Namespace: "dynamic-resource-pod-namespace", + UID: podUID, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "dynamic-resource-container-name", + }, + }, + }, + } + client, err := getKubeletClient(testKubeletSocket) + Expect(err).NotTo(HaveOccurred()) + + outputRMap := map[string]*mtypes.ResourceInfo{ + "resource-class": {DeviceIDs: []string{"cdi-resource"}}, + } + resourceMap, err := client.GetPodResourceMap(fakePod) + Expect(err).NotTo(HaveOccurred()) + Expect(resourceMap).ShouldNot(BeNil()) + Expect(resourceMap).To(Equal(outputRMap)) + }) + It("should return an error with garbage socket value", func() { u, err := url.Parse("/badfilepath!?//") Expect(err).NotTo(HaveOccurred())