Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add rolling restart #231

Merged
merged 6 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ansible-lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ jobs:

steps:
- name: Checkout code
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Ansible Lint
uses: ansible/ansible-lint[email protected]
uses: ansible/ansible-lint@main
41 changes: 28 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ role:
.PHONY: monitor
monitor: ansible-prereqs
@mkdir -p $(ARTIFACT_DIR)/logs
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
@ansible-playbook ansible/deploy-monitor.yml --private-key $(PRIVATE_KEY) --inventory $(ANSIBLE_INVENTORY) --extra-vars is_using_unstable=$(IS_USING_UNSTABLE)

.PHONY: monitor-tls
Expand Down Expand Up @@ -300,6 +301,8 @@ cluster: ansible-prereqs
@mkdir -p $(ARTIFACT_DIR)/logs
@ansible-playbook ansible/provision-cluster.yml --private-key $(PRIVATE_KEY) --inventory $(ANSIBLE_INVENTORY) --extra-vars is_using_unstable=$(IS_USING_UNSTABLE)

TEST_TOPIC_NAME ?= testtopic
PARTITION_COUNT ?= 3
.PHONY: test-cluster
test-cluster:
@# Assemble the redpanda brokers by chopping up the hosts file
Expand All @@ -309,16 +312,17 @@ test-cluster:

@echo $(REDPANDA_BROKERS)
@echo "checking cluster status"
@echo rpk cluster status -X brokers=$(REDPANDA_BROKERS) -v || exit 1
@$(RPK_PATH) cluster status -X brokers=$(REDPANDA_BROKERS) -v || exit 1

@echo "creating topic"
@$(RPK_PATH) topic create testtopic -X brokers=$(REDPANDA_BROKERS) -v || exit 1
@$(RPK_PATH) topic create $(TEST_TOPIC_NAME) -p $(PARTITION_COUNT) -X brokers=$(REDPANDA_BROKERS) -v || exit 1

@echo "producing to topic"
@echo squirrel | $(RPK_PATH) topic produce testtopic -X brokers=$(REDPANDA_BROKERS) -v || exit 1
@echo squirrel | $(RPK_PATH) topic produce $(TEST_TOPIC_NAME) -X brokers=$(REDPANDA_BROKERS) -v || exit 1

@echo "consuming from topic"
@$(RPK_PATH) topic consume testtopic -X brokers=$(REDPANDA_BROKERS) -v -o :end | grep squirrel || exit 1
@$(RPK_PATH) topic consume $(TEST_TOPIC_NAME) -X brokers=$(REDPANDA_BROKERS) -v -o :end | grep squirrel || exit 1

.PHONY: test-schema
test-schema:
Expand Down Expand Up @@ -350,13 +354,13 @@ test-cluster-tls:
@$(ARTIFACT_DIR)/bin/rpk cluster status -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(CA_CRT)" -v || exit 1

@echo "creating topic"
@$(ARTIFACT_DIR)/bin/rpk topic create testtopic -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(CA_CRT)" -v || exit 1
@$(ARTIFACT_DIR)/bin/rpk topic create $(TEST_TOPIC_NAME) -p $(PARTITION_COUNT) -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(CA_CRT)" -v || exit 1

@echo "producing to topic"
@echo squirrels | $(ARTIFACT_DIR)/bin/rpk topic produce testtopic -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(CA_CRT)" -v || exit 1
@echo squirrels | $(ARTIFACT_DIR)/bin/rpk topic produce $(TEST_TOPIC_NAME) -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(CA_CRT)" -v || exit 1

@echo "consuming from topic"
@$(ARTIFACT_DIR)/bin/rpk topic consume testtopic -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(CA_CRT)" -v -o :end | grep squirrels || exit 1
@$(ARTIFACT_DIR)/bin/rpk topic consume $(TEST_TOPIC_NAME) -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(CA_CRT)" -v -o :end | grep squirrels || exit 1

.PHONY: test-schema-tls
test-schema-tls:
Expand Down Expand Up @@ -422,15 +426,15 @@ test-cluster-proxy:
@ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -i "$(SSHKEY)" "$(CLIENT_SSH_USER)@$(CLIENT_PUBLIC_IP)" 'rpk cluster status -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(PATH_TO_CA_CRT)" -v' || exit 1

@echo "creating topic"
@ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -i "$(SSHKEY)" "$(CLIENT_SSH_USER)@$(CLIENT_PUBLIC_IP)" 'rpk topic create testtopic -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(PATH_TO_CA_CRT)" -v' || exit 1
@ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -i "$(SSHKEY)" "$(CLIENT_SSH_USER)@$(CLIENT_PUBLIC_IP)" 'rpk topic create $(TEST_TOPIC_NAME) -p $(PARTITION_COUNT) -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(PATH_TO_CA_CRT)" -v' || exit 1

@echo "producing to topic"
@ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -i "$(SSHKEY)" "$(CLIENT_SSH_USER)@$(CLIENT_PUBLIC_IP)" 'echo squirrels | rpk topic produce testtopic -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(PATH_TO_CA_CRT)" -v' || exit 1
@ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -i "$(SSHKEY)" "$(CLIENT_SSH_USER)@$(CLIENT_PUBLIC_IP)" 'echo squirrels | rpk topic produce $(TEST_TOPIC_NAME) -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(PATH_TO_CA_CRT)" -v' || exit 1

@sleep 30

@echo "consuming from topic"
$(eval testoutput := $(shell ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -i "$(SSHKEY)" "$(CLIENT_SSH_USER)@$(CLIENT_PUBLIC_IP)" 'rpk topic consume testtopic -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(PATH_TO_CA_CRT)" -v -o :end'))
$(eval testoutput := $(shell ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -i "$(SSHKEY)" "$(CLIENT_SSH_USER)@$(CLIENT_PUBLIC_IP)" 'rpk topic consume $(TEST_TOPIC_NAME) -X brokers="$(REDPANDA_BROKERS)" -X tls.ca="$(PATH_TO_CA_CRT)" -v -o :end'))
@echo "$(testoutput)" | grep squirrels || exit 1

CLIENT_NAME ?= client
Expand Down Expand Up @@ -575,6 +579,7 @@ extra-copy-rpm:
done

# spam messages at an existing topic
SPAM_MESSAGE_COUNT ?= 10
.PHONY: test-cluster-spam-messages
test-cluster-spam-messages:
@# Assemble the redpanda brokers by chopping up the hosts file
Expand All @@ -583,8 +588,8 @@ test-cluster-spam-messages:
$(eval REDPANDA_BROKERS := $(shell awk '/^\[redpanda\]/{f=1; next} /^$$/{f=0} f{print $$1}' "$(HOSTS_FILE)" | paste -sd ',' - | awk '{gsub(/,/,":9092,"); sub(/,$$/,":9092")}1'))

@echo "producing to topic"
$(foreach i,$(shell seq 1 10), \
echo "squirrel$i" | $(RPK_PATH) topic produce testtopic -X brokers=$(REDPANDA_BROKERS) -v || exit 1; \
$(foreach i,$(shell seq 1 $(SPAM_MESSAGE_COUNT)), \
echo "squirrel$i" | $(RPK_PATH) topic produce $(TEST_TOPIC_NAME) -X brokers=$(REDPANDA_BROKERS) -v || exit 1; \
)

# spam messages at an existing topic
Expand All @@ -596,8 +601,8 @@ test-cluster-spam-messages-tls:
$(eval REDPANDA_BROKERS := $(shell awk '/^\[redpanda\]/{f=1; next} /^$$/{f=0} f{print $$1}' "$(HOSTS_FILE)" | paste -sd ',' - | awk '{gsub(/,/,":9092,"); sub(/,$$/,":9092")}1'))

@echo "producing to topic"
$(foreach i,$(shell seq 1 10), \
echo "squirrel$i" | $(RPK_PATH) topic produce testtopic -X brokers=$(REDPANDA_BROKERS) -X tls.ca="$(CA_CRT)" -v || exit 1; \
$(foreach i,$(shell seq 1 $(SPAM_MESSAGE_COUNT)), \
echo "squirrel$i" | $(RPK_PATH) topic produce $(TEST_TOPIC_NAME) -X brokers=$(REDPANDA_BROKERS) -X tls.ca="$(CA_CRT)" -v || exit 1; \
)


Expand All @@ -616,3 +621,13 @@ create-connector-tls:
$(eval CONNECT_IP := $(shell awk '/^\[connect\]/{f=1; next} f{print $$1; exit}' $(HOSTS_FILE)))

curl -X POST -H 'Content-Type: application/json' -H 'accept: application/json' --key $(CLIENT_KEY) --cacert $(CLIENT_CERT) https://$(CONNECT_IP):8083/connectors -d '{"name": "mirror-source-connector", "config": {"connector.class": "org.apache.kafka.connect.mirror.MirrorSourceConnector", "topics": "testtopic", "replication.factor": "1", "source.cluster.bootstrap.servers": "$(REDPANDA_BROKERS)", "source.cluster.security.protocol": "SSL", "source.cluster.ssl.truststore.type": "PKCS12", "source.cluster.ssl.keystore.type": "PKCS12", "target.cluster.bootstrap.servers": "$(EXTRA_BROKERS)", "target.cluster.security.protocol": "SSL", "source.cluster.alias": "source", "target.cluster.ssl.truststore.type": "PKCS12", "target.cluster.ssl.keystore.type": "PKCS12"}}'

.PHONY: lint
lint:
@echo "Running ansible-lint"
@ansible-lint -c .ansible-lint

.PHONY: dev-tiered-storage
dev-tiered-storage: ansible-prereqs
@mkdir -p $(ARTIFACT_DIR)/logs
ansible-playbook ansible/provision-cluster-tiered-storage.yml --private-key $(PRIVATE_KEY) --extra-vars redpanda_broker_no_log=false --extra-vars development_build=true --extra-vars segment_upload_interval=$(SEGMENT_UPLOAD_INTERVAL) --extra-vars cloud_storage_credentials_source=$(CLOUD_STORAGE_CREDENTIALS_SOURCE)
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ terraform init
terraform apply --auto-approve -var='public_key_path=~/.ssh/id_rsa.pub' -var='deployment_prefix=go-rp'
cd ..


# Install collections and roles
ansible-galaxy install -r ./requirements.yml
export $ANSIBLE_COLLECTIONS_PATH=$PWD/artifacts/collections
export $ANSIBLE_ROLES_PATH=$PWD/artifacts/roles
ansible-galaxy collection install -r $PWD/requirements.yml --force -p $ANSIBLE_COLLECTIONS_PATH
ansible-galaxy role install -r $PWD/requirements.yml --force -p $ANSIBLE_ROLES_PATH

# Run a Playbook
# You need to pick the correct playbook for you, in this case we picked provision-cluster
Expand Down
2 changes: 1 addition & 1 deletion ansible/airgap/bundle-deb-for-airgap.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
- name: bundle debs
- name: Bundle debs
hosts: client
vars:
basearch: "amd64" # must match the output of `uname -m`. however on ubuntu you need to use the debian version of the output. So for example x86_64 is amd64
Expand Down
2 changes: 1 addition & 1 deletion ansible/airgap/bundle-rpm-for-airgap.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
- name: bundle rpms
- name: Bundle rpms
hosts: client
vars:
basearch: "x86_64" # must match the output of `uname -m`
Expand Down
5 changes: 3 additions & 2 deletions ansible/deploy-connect-tls.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# This is a limited access Alpha playbook. Please contact Redpanda for more information.

# create_demo_certs: when false, disables all activity related to our demo CA
# handle_cert_install: when false, disables all activity related to installing certificates with our role
---
- name: Provision tls cluster
hosts: connect
Expand All @@ -24,6 +25,6 @@
- name: Install and configure CA certs for running tls
ansible.builtin.include_role:
name: redpanda.cluster.demo_certs
- name: install connect
- name: Install connect
ansible.builtin.include_role:
name: redpanda.cluster.redpanda_connect
5 changes: 3 additions & 2 deletions ansible/deploy-connect.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# This is a limited access Alpha playbook. Please contact Redpanda for more information.

---
- hosts: connect
- name: Provision Redpanda Connect
hosts: connect
tasks:
- name: install connect
- name: Install connect
ansible.builtin.include_role:
name: redpanda.cluster.redpanda_connect
2 changes: 2 additions & 0 deletions ansible/deploy-console-tls.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# create_demo_certs: when false, disables all activity related to our demo CA
# handle_cert_install: when false, disables all activity related to installing certificates with our role
- name: Provision redpanda console tls
hosts: client
vars:
Expand Down
19 changes: 12 additions & 7 deletions ansible/deploy-monitor-tls.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# jmespath is a required dependency for this playbook. Install it with `pip install jmespath`.
- hosts: redpanda
# jmespath is a required deendency for this playbook. Install it with `pip install jmespath`.
# create_demo_certs: when false, disables all activity related to our demo CA
# handle_cert_install: when false, disables all activity related to installing certificates with our role
- name: Set node_exporter_arch based on host architecture
hosts: redpanda
tasks:
- name: Set node_exporter_arch based on host architecture
ansible.builtin.set_fact:
Expand All @@ -10,9 +13,10 @@
vars:
node_exporter_arch: "{{ rp_arch }}"

- hosts: monitor
- name: Install jmespath
hosts: monitor
tasks:
- name: install deps
- name: Install deps
ansible.builtin.package:
name: python3-jmespath
state: present
Expand All @@ -33,11 +37,11 @@
tags:
- install_certs

- hosts: monitor
- name: Install prometheus
hosts: monitor
roles:
- prometheus.prometheus.prometheus
vars:
tls: true
prometheus_scrape_configs:
- job_name: "redpanda"
scheme: "https"
Expand All @@ -59,7 +63,8 @@
tls_config:
ca_file: "/etc/redpanda/certs/ca.crt"

- hosts: monitor
- name: Install grafana
hosts: monitor
roles:
- grafana.grafana.grafana
vars:
Expand Down
14 changes: 9 additions & 5 deletions ansible/deploy-monitor.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# jmespath is a required dependency for this playbook. Install it with `pip install jmespath`.
- hosts: redpanda
- name: Set node_exporter_arch based on host architecture
hosts: redpanda
tasks:
- name: Set node_exporter_arch based on host architecture
ansible.builtin.set_fact:
Expand All @@ -10,14 +11,16 @@
vars:
node_exporter_arch: "{{ rp_arch }}"

- hosts: monitor
- name: Install jmespath
hosts: monitor
tasks:
- name: install deps
- name: Install deps
ansible.builtin.package:
name: python3-jmespath
state: present

- hosts: monitor
- name: Install prometheus
hosts: monitor
roles:
- prometheus.prometheus.prometheus
vars:
Expand All @@ -35,7 +38,8 @@
static_configs:
- targets: "{{ groups['connect'] | default([]) | map('extract', hostvars, 'inventory_hostname') | map('regex_replace', '^(.*)$','\\1:9404') | list }}"

- hosts: monitor
- name: Install grafana
hosts: monitor
roles:
- grafana.grafana.grafana
vars:
Expand Down
84 changes: 84 additions & 0 deletions ansible/operation-rolling-restart.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
---
- name: Redpanda Rolling Restart
hosts: redpanda
become: true
serial: 1
vars:
rpk_bin: rpk

tasks:
- name: Check cluster health
ansible.builtin.shell: |
{{ rpk_bin }} cluster health | grep -i 'healthy:' | tr -d '[:space:]' | awk -F ':' '{print tolower($2)}'
register: health_check
failed_when: "health_check.stdout != 'true'"
changed_when: false

- name: Get node ID
ansible.builtin.shell: |
{{ rpk_bin }} cluster info | awk '$2 == "{{ ansible_host }}" {gsub("\\*", "", $1); print $1}'
register: node_id
changed_when: false

- name: Enable maintenance mode
ansible.builtin.command: "{{ rpk_bin }} cluster maintenance enable {{ node_id.stdout }} --wait"
register: maintenance_result
failed_when:
- "'Successfully enabled maintenance mode' not in maintenance_result.stdout"
- "'Maintenance mode is already enabled for node' not in maintenance_result.stdout"
changed_when: "'Successfully enabled maintenance mode' in maintenance_result.stdout"

- name: Verify maintenance mode status
ansible.builtin.shell: |
{{ rpk_bin }} cluster maintenance status | grep -q '{{ node_id.stdout }}'
register: maintenance_status
failed_when: maintenance_status.rc != 0
changed_when: false

- name: Check cluster health after enabling maintenance mode
ansible.builtin.shell: |
{{ rpk_bin }} cluster health --watch --exit-when-healthy | grep -i 'healthy:' | tr -d '[:space:]' | awk -F ':' '{print tolower($2)}'
register: health_check_maintenance
failed_when: "health_check_maintenance.stdout != 'true'"
retries: 10
delay: 30
changed_when: false

- name: Stop Redpanda service
ansible.builtin.systemd:
name: redpanda
state: stopped

- name: Start Redpanda service
ansible.builtin.systemd:
name: redpanda
state: started

- name: Disable maintenance mode
ansible.builtin.command: "{{ rpk_bin }} cluster maintenance disable {{ node_id.stdout }}"
register: disable_maintenance_result
changed_when: "'Successfully disabled maintenance mode' in disable_maintenance_result.stdout"
failed_when: "'Successfully disabled maintenance mode' not in disable_maintenance_result.stdout"

- name: Verify maintenance mode is disabled
ansible.builtin.shell: |
{{ rpk_bin }} cluster maintenance status | grep -qv '{{ node_id.stdout }}'
register: maintenance_status_after
failed_when: maintenance_status_after.rc != 0
changed_when: false

- name: Check cluster health after disabling maintenance mode
ansible.builtin.shell: |
{{ rpk_bin }} cluster health --watch --exit-when-healthy | grep -i 'healthy:' | tr -d '[:space:]' | awk -F ':' '{print tolower($2)}'
register: health_check_maintenance
failed_when: "health_check_maintenance.stdout != 'true'"
retries: 10
delay: 30
changed_when: false

- name: Check broker status
ansible.builtin.shell: |
{{ rpk_bin }} redpanda admin brokers list | grep -q 'active.*true'
register: broker_status
changed_when: false
failed_when: broker_status.rc != 0
4 changes: 2 additions & 2 deletions ansible/provision-cluster-tiered-storage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
# Redpanda encourages you to integrate with your corporate certificate management setup.
# variables
# root_ca_dir: location of our demo cert authority
# handle_certs: when false, disables all activity related to our demo CA
# enable_tls: when true, adds the tls template to the /etc/redpanda/redpanda.yml file
# create_demo_certs: when false, disables all activity related to our demo CA
# handle_cert_install: when false, disables all activity related to installing certificates with our role# enable_tls: when true, adds the tls template to the /etc/redpanda/redpanda.yml file
# advertise_public_ips : causes the cluster to set a public ip on advertised_kafka_api, allowing access to the cluster from the public internet
---
- name: Provision tiered storage cluster
Expand Down
6 changes: 2 additions & 4 deletions ansible/provision-cluster-tls.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
# Redpanda encourages you to integrate with your corporate certificate management setup.
# variables
# root_ca_dir: location of our demo cert authority
# handle_certs: when false, disables all activity related to our demo CA
# create_demo_certs: when false, disables all activity related to our demo CA
# handle_cert_install: when false, disables all activity related to installing certificates with our role
# enable_tls: when true, adds the tls template to the /etc/redpanda/redpanda.yml file
# advertise_public_ips : causes the cluster to set a public ip on advertised_kafka_api, allowing access to the cluster from the public internet
---
Expand All @@ -24,9 +25,6 @@
- name: Install and configure CA certs for running tls
ansible.builtin.include_role:
name: redpanda.cluster.demo_certs
- name: Install system prereqs
ansible.builtin.include_role:
name: redpanda.cluster.sysctl_setup
- name: Install system prereqs
ansible.builtin.include_role:
name: redpanda.cluster.system_setup
Expand Down
Loading