Skip to content

Commit

Permalink
feat: ListNodeImageVersions + shared image gallery support (#526)
Browse files Browse the repository at this point in the history
* feat: support shared image galleries inside of karpenter

* chore: populating image stubs for shared image galleries

* fix: progress

* fix: PopulateResourceStub accessing the wrong index

* test: properly testing ListNodeImageVersions

* refactor: rename symbol for SIG Subscription id

* test: conditional use of sig dependent on the managed karpenter flag

* refactor: removing panics used in testing

* test: adding RBAC and helm values to the template for SIG Gallery logic

* fix: bug in azure linux sig image resolution

* chore: update cleanupenv to handle inflate too ratehr than just job pods

* test: fix randomized test order flake

* ci: shadow declaration

* refactor: comment wording

* test: validate all image ids are resolved correctly

* fix: adding filtering for duplicate sku + os combinations and filtering out unsupported galleries

* refactor: renaming var

* refactor: rename the managedKarpenter reference to UseSIG

* refactor: spelling

* fix: v1 migration for test

* ci: lint

* fix: lint

* ci: fix

* ci: license

* ci: fix

* fix: test pollution

* fix: resetting options before each test run

* fix: accounting for versions of the shape 'yy.mm.dd'

* test: removing unused cleanup funcs

* fix: making the key for shared image gallery smaller

* refactor: removing windows types leaving them to be added back later

* refactor: reducing key even more

* fix: extending key

* fix: removing log line

* refactor: not nesting options as deep, leaving refactor of USESIG to provider for later

* fix: comment about pulling out the variable

* refactor: decoupling the cache reads and cache writes from the image gallery id retrivial functions

* test: adding some coverage to FilteredNodeImages

* refactor: removing community galleries fake method for GET since this implementation doesn't use it

* test: test that we drift nodes when switching from community gallery to shared image gallery

* test: SIG_SUBSCRIPTION_ID

* refactor: using sig from context

* fix: remove extra space

---------

Co-authored-by: Alex Leites <[email protected]>
  • Loading branch information
Bryce-Soghigian and tallaxes authored Dec 21, 2024
1 parent 746ec9a commit 3a9cbc0
Show file tree
Hide file tree
Showing 21 changed files with 967 additions and 161 deletions.
9 changes: 8 additions & 1 deletion Makefile-az.mk
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ else
AZURE_ACR_NAME ?= $(COMMON_NAME)
endif

AZURE_SIG_SUBSCRIPTION_ID ?= $(AZURE_SUBSCRIPTION_ID)
AZURE_CLUSTER_NAME ?= $(COMMON_NAME)
AZURE_RESOURCE_GROUP_MC = MC_$(AZURE_RESOURCE_GROUP)_$(AZURE_CLUSTER_NAME)_$(AZURE_LOCATION)

Expand Down Expand Up @@ -46,7 +47,8 @@ az-mkacr: az-mkrg ## Create test ACR
az-acrimport: ## Imports an image to an acr registry
az acr import --name $(AZURE_ACR_NAME) --source "mcr.microsoft.com/oss/kubernetes/pause:3.6" --image "pause:3.6"

az-cleanenv: az-rmnodeclaims-fin ## Deletes a few common karpenter testing resources(pods, nodepools, nodeclaims, aksnodeclasses)
az-cleanenv: az-rmnodeclaims-fin ## Deletes a few common karpenter testing resources(pods, nodepools, nodeclaims, aksnodeclasses)
kubectl delete deployments -n default --all
kubectl delete pods -n default --all
kubectl delete nodeclaims --all
kubectl delete nodepools --all
Expand Down Expand Up @@ -136,6 +138,11 @@ az-perm: ## Create role assignments to let Karpenter manage VMs and Network
az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --scope /subscriptions/$(AZURE_SUBSCRIPTION_ID)/resourceGroups/$(AZURE_RESOURCE_GROUP) --role "Network Contributor" # in some case we create vnet here
@echo Consider "make az-configure-values"!

az-perm-sig: ## Create role assignments when testing with SIG images
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv))
az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --role "Reader" --scope /subscriptions/$(AZURE_SIG_SUBSCRIPTION_ID)/resourceGroups/AKS-Ubuntu/providers/Microsoft.Compute/galleries/AKSUbuntu
az role assignment create --assignee $(KARPENTER_USER_ASSIGNED_CLIENT_ID) --role "Reader" --scope /subscriptions/$(AZURE_SIG_SUBSCRIPTION_ID)/resourceGroups/AKS-AzureLinux/providers/Microsoft.Compute/galleries/AKSAzureLinux

az-perm-subnet-custom: az-perm ## Create role assignments to let Karpenter manage VMs and Network (custom VNet)
$(eval VNET_SUBNET_ID=$(shell az aks show --name $(AZURE_CLUSTER_NAME) --resource-group $(AZURE_RESOURCE_GROUP) | jq -r ".agentPoolProfiles[0].vnetSubnetId"))
$(eval KARPENTER_USER_ASSIGNED_CLIENT_ID=$(shell az identity show --resource-group "${AZURE_RESOURCE_GROUP}" --name "${AZURE_KARPENTER_USER_ASSIGNED_IDENTITY_NAME}" --query 'principalId' -otsv))
Expand Down
6 changes: 6 additions & 0 deletions karpenter-values-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ controller:
value: ""
- name: AZURE_NODE_RESOURCE_GROUP
value: ${AZURE_RESOURCE_GROUP_MC}

# managed karpenter settings
- name: USE_SIG
value: "false"
- name: SIG_SUBSCRIPTION_ID
value: ""
serviceAccount:
name: ${KARPENTER_SERVICE_ACCOUNT_NAME}
annotations:
Expand Down
18 changes: 7 additions & 11 deletions pkg/cloudprovider/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,21 +145,17 @@ func (c *CloudProvider) isImageVersionDrifted(

if vm.Properties == nil ||
vm.Properties.StorageProfile == nil ||
vm.Properties.StorageProfile.ImageReference == nil ||
vm.Properties.StorageProfile.ImageReference.CommunityGalleryImageID == nil ||
*vm.Properties.StorageProfile.ImageReference.CommunityGalleryImageID == "" {
logger.Debug("not using a CommunityGalleryImageID for nodeClaim %s", nodeClaim.Name)
vm.Properties.StorageProfile.ImageReference == nil {
return "", nil
}
CIGID := lo.FromPtr(vm.Properties.StorageProfile.ImageReference.CommunityGalleryImageID)
SIGID := lo.FromPtr(vm.Properties.StorageProfile.ImageReference.ID)
vmImageID := lo.Ternary(SIGID != "", SIGID, CIGID)

vmImageID := *vm.Properties.StorageProfile.ImageReference.CommunityGalleryImageID
var imageStub imagefamily.DefaultImageOutput
imageStub.PopulateImageTraitsFromID(vmImageID)

publicGalleryURL, communityImageName, _, err := imagefamily.ParseCommunityImageIDInfo(vmImageID)
if err != nil {
return "", err
}

expectedImageID, err := c.imageProvider.GetImageID(ctx, communityImageName, publicGalleryURL)
expectedImageID, err := c.imageProvider.GetLatestImageID(ctx, imageStub)
if err != nil {
return "", err
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/awslabs/operatorpkg/object"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

Expand Down Expand Up @@ -223,5 +224,14 @@ var _ = Describe("CloudProvider", func() {
Expect(err).To(HaveOccurred())
Expect(drifted).To(BeEmpty())
})
It("should trigger drift when the image gallery changes to SIG", func() {
options := test.Options(test.OptionsFields{
UseSIG: lo.ToPtr(true),
})
ctx = options.ToContext(ctx)
drifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expect(string(drifted)).To(Equal("ImageVersionDrift"))
})
})
})
Loading

0 comments on commit 3a9cbc0

Please sign in to comment.