Skip to content

Commit

Permalink
Add multiple concurrent node reboot feature (#660)
Browse files Browse the repository at this point in the history
* Add ability to have multiple nodes get a lock

Currently in kured a single node can get a lock with Acquire. There
could be situations where multiple nodes might want a lock in the event
that a cluster can handle multiple nodes being rebooted. This adds the
side-by-side implementation for a multiple node lock situation.

Signed-off-by: Thomas Stringer <[email protected]>

* Refactor to use the same code path for a single lock and a multilock

Signed-off-by: Thomas Stringer <[email protected]>

* test: force rebuild

Signed-off-by: Christian Kotzbauer <[email protected]>

* build: log pod-logs

Signed-off-by: Christian Kotzbauer <[email protected]>

* fix: change condition

Signed-off-by: Christian Kotzbauer <[email protected]>

* build: fix test-script

Signed-off-by: Christian Kotzbauer <[email protected]>

* build: add concurrent test

Signed-off-by: Christian Kotzbauer <[email protected]>

* fix: final changes

Signed-off-by: Christian Kotzbauer <[email protected]>

---------

Signed-off-by: Thomas Stringer <[email protected]>
Signed-off-by: Christian Kotzbauer <[email protected]>
Co-authored-by: Christian Kotzbauer <[email protected]>
  • Loading branch information
trstringer and ckotzbauer authored Aug 14, 2023
1 parent f22b1ab commit 3b9b190
Show file tree
Hide file tree
Showing 6 changed files with 474 additions and 10 deletions.
87 changes: 87 additions & 0 deletions .github/workflows/on-pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,90 @@ jobs:
DEBUG: true
run: |
./tests/kind/follow-coordinated-reboot.sh
# This ensures the latest code works with the manifests built from tree.
# It is useful for two things:
# - Test manifests changes (obviously), ensuring they don't break existing clusters
# - Ensure manifests work with the latest versions even with no manifest change
# (compared to helm charts, manifests cannot easily template changes based on versions)
# Helm charts are _trailing_ releases, while manifests are done during development.
# Concurrency = 2
e2e-manifests-concurent:
name: End-to-End test with kured with code and manifests from HEAD (concurrent)
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
kubernetes:
- "1.25"
- "1.26"
- "1.27"
steps:
- uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Setup GoReleaser
run: make bootstrap-tools
- name: Find current tag version
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
id: tags
- name: Build artifacts
run: |
VERSION="${{ steps.tags.outputs.sha_short }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
sudo bash << EOF
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
echo '{}' > /etc/docker/daemon.json
systemctl restart docker || journalctl --no-pager -n 500
systemctl status docker
EOF
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
uses: helm/[email protected]
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
version: v0.14.0

- name: Preload previously built images onto kind cluster
run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing

- name: Do not wait for an hour before detecting the rebootSentinel
run: |
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds.yaml
sed -i 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml
- name: Install kured with kubectl
run: |
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
- name: Ensure kured is ready
uses: nick-invision/[email protected]
with:
timeout_minutes: 10
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"

- name: Create reboot sentinel files
run: |
./tests/kind/create-reboot-sentinels.sh
- name: Follow reboot until success
env:
DEBUG: true
run: |
./tests/kind/follow-coordinated-reboot.sh
46 changes: 36 additions & 10 deletions cmd/kured/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ var (
preRebootNodeLabels []string
postRebootNodeLabels []string
nodeID string
concurrency int

rebootDays []string
rebootStart string
Expand Down Expand Up @@ -167,6 +168,8 @@ func NewRootCommand() *cobra.Command {
"command for which a zero return code will trigger a reboot command")
rootCmd.PersistentFlags().StringVar(&rebootCommand, "reboot-command", "/bin/systemctl reboot",
"command to run when a reboot is required")
rootCmd.PersistentFlags().IntVar(&concurrency, "concurrency", 1,
"amount of nodes to concurrently reboot. Defaults to 1")

rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
"slack hook URL for reboot notifications [deprecated in favor of --notify-url]")
Expand Down Expand Up @@ -421,8 +424,14 @@ func rebootBlocked(blockers ...RebootBlocker) bool {
return false
}

func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
holding, err := lock.Test(metadata)
func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}, isMultiLock bool) bool {
var holding bool
var err error
if isMultiLock {
holding, err = lock.TestMultiple()
} else {
holding, err = lock.Test(metadata)
}
if err != nil {
log.Fatalf("Error testing lock: %v", err)
}
Expand All @@ -432,8 +441,17 @@ func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
return holding
}

func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}, TTL time.Duration) bool {
holding, holder, err := lock.Acquire(metadata, TTL)
func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}, TTL time.Duration, maxOwners int) bool {
var holding bool
var holder string
var err error
if maxOwners > 1 {
var holders []string
holding, holders, err = lock.AcquireMultiple(metadata, TTL, maxOwners)
holder = strings.Join(holders, ",")
} else {
holding, holder, err = lock.Acquire(metadata, TTL)
}
switch {
case err != nil:
log.Fatalf("Error acquiring lock: %v", err)
Expand All @@ -454,9 +472,16 @@ func throttle(releaseDelay time.Duration) {
}
}

func release(lock *daemonsetlock.DaemonSetLock) {
func release(lock *daemonsetlock.DaemonSetLock, isMultiLock bool) {
log.Infof("Releasing lock")
if err := lock.Release(); err != nil {

var err error
if isMultiLock {
err = lock.ReleaseMultiple()
} else {
err = lock.Release()
}
if err != nil {
log.Fatalf("Error releasing lock: %v", err)
}
}
Expand Down Expand Up @@ -641,7 +666,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
source := rand.NewSource(time.Now().UnixNano())
tick := delaytick.New(source, 1*time.Minute)
for range tick {
if holding(lock, &nodeMeta) {
if holding(lock, &nodeMeta, concurrency > 1) {
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
if err != nil {
log.Errorf("Error retrieving node object via k8s API: %v", err)
Expand Down Expand Up @@ -674,7 +699,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
}
}
throttle(releaseDelay)
release(lock)
release(lock, concurrency > 1)
break
} else {
break
Expand Down Expand Up @@ -732,7 +757,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
}
}

if !holding(lock, &nodeMeta) && !acquire(lock, &nodeMeta, TTL) {
if !holding(lock, &nodeMeta, concurrency > 1) && !acquire(lock, &nodeMeta, TTL, concurrency) {
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
preferNoScheduleTaint.Enable()
continue
Expand All @@ -754,7 +779,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
if err != nil {
if !forceReboot {
log.Errorf("Unable to cordon or drain %s: %v, will release lock and retry cordon and drain before rebooting when lock is next acquired", node.GetName(), err)
release(lock)
release(lock, concurrency > 1)
log.Infof("Performing a best-effort uncordon after failed cordon and drain")
uncordon(client, node)
continue
Expand Down Expand Up @@ -832,6 +857,7 @@ func root(cmd *cobra.Command, args []string) {
log.Infof("Blocking Pod Selectors: %v", podSelectors)
log.Infof("Reboot schedule: %v", window)
log.Infof("Reboot check command: %s every %v", sentinelCommand, period)
log.Infof("Concurrency: %v", concurrency)
log.Infof("Reboot command: %s", restartCommand)
if annotateNodes {
log.Infof("Will annotate nodes during kured reboot operations")
Expand Down
1 change: 1 addition & 0 deletions kured-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,4 @@ spec:
# - --log-format=text
# - --metrics-host=""
# - --metrics-port=8080
# - --concurrency=1
Loading

0 comments on commit 3b9b190

Please sign in to comment.