Skip to content

Commit

Permalink
Decouple automated K8s deployments (TraceMachina#1531)
Browse files Browse the repository at this point in the history
This change allows deploying nativelink in various new configurations,
including cache-only and multi-toolchain setups.

The new setup is significantly more modular and lets us add and remove
toolchains at runtime.

Includes a reordering of the deployment logic to ensure that Alerts get
deployed before the GitRepository that triggers them. For demo and CI
this comes at the cost of an additional dummy GitRepository. This
tradeoff seems well worth it as the previous ordering was entirely wrong
and unreliable.

Fixes the flakiness of the LRE workflow where it often gets stuck in the
`Waiting for Tekton pipelines` loop.
  • Loading branch information
aaronmondal authored Dec 10, 2024
1 parent 8782c0b commit a0ca341
Show file tree
Hide file tree
Showing 41 changed files with 532 additions and 299 deletions.
72 changes: 37 additions & 35 deletions .github/workflows/lre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,9 @@ jobs:
nix develop --impure --command bash -c 'cat > kustomization.yaml << EOF
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
components:
- kubernetes/components/operator
resources:
- deploy/kubernetes-example
patches:
- patch: |-
- op: replace
path: /spec/path
value: ./kubernetes/overlays/lre
target:
kind: Kustomization
name: nativelink
- patch: |-
- op: replace
path: /spec/url
Expand All @@ -119,27 +112,6 @@ jobs:
target:
kind: GitRepository
name: nativelink
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
value: ./src_root#image
target:
kind: Alert
name: nativelink-image-alert
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
value: ./src_root#nativelink-worker-init
target:
kind: Alert
name: nativelink-worker-init-alert
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
value: ./src_root#nativelink-worker-lre-cc
target:
kind: Alert
name: nativelink-worker-alert
EOF
kubectl apply -k . &&
rm kustomization.yaml'
Expand All @@ -151,6 +123,36 @@ jobs:
--timeout=15m \
nativelink-tekton-resources"
- name: Wait for alerts
run: >
nix develop --impure --command
bash -c "flux reconcile kustomization -n default \
--timeout=15m \
nativelink-alert-core && \
flux reconcile kustomization -n default \
--timeout=15m \
nativelink-alert-worker-init && \
flux reconcile kustomization -n default \
--timeout=15m \
nativelink-alert-lre-cc"
- name: Trigger pipelines
run: |
nix develop --impure --command bash -c 'cat > dummy-repo.yaml << EOF
apiVersion: source.toolkit.fluxcd.io/v1
kind: GitRepository
metadata:
name: dummy-repository
namespace: default
spec:
interval: 2m
url: https://github.com/TraceMachina/nativelink
ref:
branch: main
EOF
kubectl apply -f dummy-repo.yaml &&
rm dummy-repo.yaml'
- name: Wait for Tekton pipelines
run: >
nix develop --impure --command bash << 'EOF'
Expand All @@ -171,19 +173,19 @@ jobs:
-l tekton.dev/pipeline=rebuild-nativelink
EOF
- name: Wait for Configmaps
- name: Wait for NativeLink Kustomization
run: >
nix develop --impure --command
bash -c "flux reconcile kustomization -n default \
--timeout=15m \
nativelink-configmaps"
nativelink-core"
- name: Wait for NativeLink Kustomization
- name: Wait for Worker Kustomization
run: >
nix develop --impure --command
bash -c "flux reconcile kustomization -n default \
--timeout=15m \
nativelink"
nativelink-lre-cc"
- name: Wait for NativeLink
run: >
Expand All @@ -193,7 +195,7 @@ jobs:
- name: Wait for worker
run: >
nix develop --impure --command
bash -c "kubectl rollout status deploy/nativelink-worker"
bash -c "kubectl rollout status deploy/nativelink-worker-lre-cc"
- name: Get gateway IPs
id: gateway-ips
Expand Down
24 changes: 6 additions & 18 deletions deploy/chromium-example/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

components:
- ../../kubernetes/components/operator

patches:
- patch: |-
- op: replace
path: /spec/path
value: ./kubernetes/overlays/chromium
target:
kind: Kustomization
name: nativelink
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
value: github:TraceMachina/nativelink#nativelink-worker-siso-chromium
target:
kind: Alert
name: nativelink-worker-alert
resources:
- ../../kubernetes/resources/flux
- ../../kubernetes/resources/nativelink-core
- ../../kubernetes/resources/gateway-routes
- ../../kubernetes/workers/resources/worker-init
- ../../kubernetes/workers/resources/siso-chromium
42 changes: 16 additions & 26 deletions deploy/dev/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,33 +1,23 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

components:
- ../../kubernetes/components/operator
resources:
- ../kubernetes-example

# Change this value to deploy custom overlays.
patches:
- patch: |-
- op: replace
path: /spec/path
value: ./kubernetes/overlays/lre
target:
kind: Kustomization
name: nativelink

# Modify this value to change the URL of the repository with deployment files.
# Modify this value to change the repository containing the deployment files.
#
# This is usually only necessary if you change deployment YAML files or
# NativeLink config files. If you only intend to change the Rust sources you can
# leave this as is and need to ensure that the Alerts below are patched to build
# your local sources.
patches:
- patch: |-
- op: replace
path: /spec/url
value: https://github.com/TraceMachina/nativelink
# Optionally, change the tracked branch.
# - op: replace
# path: /spec/ref/branch
# value: somecustombranch
- op: replace
path: /spec/ref/branch
value: main
target:
kind: GitRepository
name: nativelink
Expand All @@ -43,22 +33,22 @@ patches:
# outputs from a Pull request.
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
path: /spec/postBuild/substitute/PLACEHOLDER_FLAKE_OUTPUT
value: ./src_root#image
target:
kind: Alert
name: nativelink-image-alert
kind: Kustomization
name: nativelink-alert-core
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
path: /spec/postBuild/substitute/PLACEHOLDER_FLAKE_OUTPUT
value: ./src_root#nativelink-worker-init
target:
kind: Alert
name: nativelink-worker-init-alert
kind: Kustomization
name: nativelink-alert-worker-init
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
path: /spec/postBuild/substitute/PLACEHOLDER_FLAKE_OUTPUT
value: ./src_root#nativelink-worker-lre-cc
target:
kind: Alert
name: nativelink-worker-alert
kind: Kustomization
name: nativelink-alert-lre-cc
24 changes: 6 additions & 18 deletions deploy/kubernetes-example/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,9 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

components:
- ../../kubernetes/components/operator

patches:
- patch: |-
- op: replace
path: /spec/path
value: ./kubernetes/overlays/lre
target:
kind: Kustomization
name: nativelink
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
value: github:TraceMachina/nativelink#nativelink-worker-lre-cc
target:
kind: Alert
name: nativelink-worker-alert
resources:
- ../../kubernetes/resources/flux
- ../../kubernetes/resources/nativelink-core
- ../../kubernetes/resources/gateway-routes
- ../../kubernetes/workers/resources/worker-init
- ../../kubernetes/workers/resources/lre-cc
29 changes: 29 additions & 0 deletions deploy/lre-manual/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- ../../kubernetes/nativelink
- ../../kubernetes/workers/lre-cc
- ../../kubernetes/resources/gateway-routes

# Note: This deployment is mainly intended for documentation purposes. If you
# intend to use it you'll need to provide image tags here.
#
# NativeLink doesn't support a "latest" tag or similar for reproducibility
# reasons. For the latest tags see:
# https://github.com/orgs/TraceMachina/packages?repo_name=nativelink
#
# See the lre overlay for a setup that autodetects these tags and automatically
# stays in sync with main. See the `../deploy/dev` directory for development
# setups.
images:
- name: nativelink
newName: ghcr.io/TraceMachina/nativelink
newTag: TODO
- name: nativelink-worker-init
newName: ghcr.io/TraceMachina/nativelink-worker-init
newTag: TODO
- name: nativelink-worker-lre-cc
newName: ghcr.io/TraceMachina/nativelink-worker-lre-cc
newTag: TODO
12 changes: 0 additions & 12 deletions kubernetes/base/kustomization.yaml

This file was deleted.

17 changes: 17 additions & 0 deletions kubernetes/components/alerts/alert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
apiVersion: notification.toolkit.fluxcd.io/v1beta3
kind: Alert
metadata:
name: nativelink-alert
namespace: default
spec:
eventSeverity: info
eventSources:
- kind: GitRepository
name: '*'
namespace: default
providerRef:
name: nativelink-webhook
eventMetadata:
flakeOutput: ${PLACEHOLDER_FLAKE_OUTPUT}
configMapName: ${PLACEHOLDER_CONFIG_MAP_NAME}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

resources:
- routes.yaml
- alert.yaml
22 changes: 22 additions & 0 deletions kubernetes/components/kustomization/flux-kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: nativelink
namespace: default
spec:
interval: 2m
path: PLACEHOLDER_NATIVELINK_KUSTOMIZATION
prune: true
force: true
retryInterval: 20s
targetNamespace: default
wait: true
sourceRef:
kind: GitRepository
name: nativelink
namespace: default
postBuild:
substituteFrom:
- kind: ConfigMap
name: nativelink-image-tags
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
---
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

resources:
- flux-config.yaml
- flux-kustomization.yaml
Loading

0 comments on commit a0ca341

Please sign in to comment.