-
Notifications
You must be signed in to change notification settings - Fork 19
/
setup_server.tftpl.sh
179 lines (146 loc) · 5.05 KB
/
setup_server.tftpl.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env bash
# Script to configure and bootstrap Nomad server nodes in an AWS Auto Scaling group.
#
# This script performs the following steps:
# - Configures the Nomad agent as a server on the EC2 instances.
# - Bootstraps the Nomad ACL system with a pre-configured token on the first server.
# - Joins the Nomad server nodes to form a cluster.
# - Starts the Nomad agent service.
#
# This script should be run on each Nomad server node as part of the EC2 instance launch process.
#
set -Eeuo pipefail
declare -r SCRIPT_NAME="$(basename "$0")"
declare -ag AWS_TAGS=()
# Send the log output from this script to user-data.log, syslog, and the console.
exec > >(tee /var/log/user-data.log | logger -t user-data -s 2>/dev/console) 2>&1
# Wrapper to log any outputs from the script to stderr
function log {
declare -r LVL="$1"
declare -r MSG="$2"
declare -r TS=$(date +"%Y-%m-%d %H:%M:%S")
echo >&2 -e "$TS [$LVL] [$SCRIPT_NAME] $MSG"
}
# Stores AWS tags to use as nomad client meta
# Requires `nomad-cluster` tag to be defined
# within AWS instance tags
store_tags() {
max_attempts=3
count=0
while true; do
TOKEN=$(curl -s --connect-timeout 1 --retry 3 --retry-delay 3 \
-X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")
TAGS=$(curl -s --connect-timeout 1 --retry 3 --retry-delay 3 \
-H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/tags/instance)
# If there's no 'nomad-cluster' found in tags, retry.
if [[ "$${TAGS}" != *"nomad-cluster"* ]]; then
sleep 1
count=$((count + 1))
# If max retries still didn't get the data, fail.
if [[ $count -eq $max_attempts ]]; then
log "ERROR" "aborting as max attempts reached"
exit 1
fi
continue
fi
readarray -t AWS_TAGS <<<"$TAGS"
break
done
}
# Sets hostname for the system
# Replaces `ip` in the hostname with the AWS instance `Name` tag
set_hostname() {
for t in "$${AWS_TAGS[@]}"; do
# For servers we'll use the NAME tag of the EC2 instance.
if [ "$t" == "Name" ]; then
TAG=$(curl -s --retry 3 --retry-delay 3 --connect-timeout 3 \
-H "Accept: application/json" -H "X-aws-ec2-metadata-token: $TOKEN" "http://169.254.169.254/latest/meta-data/tags/instance/$t")
# The original hostname is like `ip-10-x-y-z`
CURR_HOSTNAME=$(sudo hostnamectl --static)
# Replace `ip` with tag value.
HOSTNAME="$${CURR_HOSTNAME//ip/$TAG}"
log "INFO" "setting hostname as $HOSTNAME"
sudo hostnamectl set-hostname "$HOSTNAME"
fi
done
}
# Increase the file limit
modify_nomad_systemd_config() {
if [ ${nomad_file_limit} > 65536 ]; then
sudo sed -i '/^LimitNOFILE/s/=.*$/=${nomad_file_limit}/' /lib/systemd/system/nomad.service
fi
}
# Enables nomad systemd service
start_nomad() {
sudo systemctl daemon-reload
sudo systemctl enable --now nomad
}
# Restarts nomad systemd service
restart_nomad() {
sudo systemctl restart nomad
}
# Sets up `/etc/nomad.d`
prepare_nomad_server_config() {
cat <<EOF >/etc/nomad.d/nomad.hcl
${nomad_server_cfg}
EOF
}
# Wait for the Nomad leader to be elected.
wait_for_leader() {
log "INFO" "Waiting for leader node to be elected"
max_retries=10
retry=0
while [ $retry -lt $max_retries ]; do
if nomad operator api /v1/status/leader >/dev/null; then
log "INFO" "Leader node elected"
return 0
fi
log "WARN" "Leader not yet elected. Retrying in 5 seconds..."
sleep 5
retry=$((retry + 1))
done
log "WARN" "Leader not elected after $max_retries attempts."
return 1
}
bootstrap_acl() {
# Get the IP address of this node.
local ip_address
ip_address=$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)
# Get the IP address of the Nomad leader.
local nomad_leader_ip
nomad_leader_ip=$(nomad operator api /v1/status/leader | tr -d '"' | cut -d':' -f1)
log "INFO" "Checking if this node is the Nomad leader"
if [ "$ip_address" = "$nomad_leader_ip" ]; then
log "INFO" "This node is the Nomad leader. Proceeding with bootstrap process."
echo "${nomad_acl_bootstrap_token}" >/tmp/bootstrap.token
if nomad acl bootstrap - </tmp/bootstrap.token >/dev/null 2>&1; then
log "INFO" "Nomad ACL bootstrap succeeded."
rm /tmp/bootstrap.token
else
log "ERROR" "Nomad ACL bootstrap failed."
fi
else
log "WARN" "This node is not the Nomad leader. Skipping bootstrap."
fi
}
log "INFO" "Fetching EC2 Tags from AWS"
store_tags
log "INFO" "Setting hostname of machine"
set_hostname
log "INFO" "Rendering server config for nomad"
prepare_nomad_server_config
log "INFO" "Modify Nomad systemd config"
modify_nomad_systemd_config
log "INFO" "Starting Nomad service"
start_nomad
log "INFO" "Waiting for Nomad to be ready"
wait_for_leader
%{ if nomad_acl_enable }
log "INFO" "Bootstrapping ACL for Nomad"
bootstrap_acl
%{else}
log "INFO" "Skipping ACL Bootstrap for Nomad as 'nomad_acl_enable' is not set to true"
%{ endif }
log "INFO" "Restarting services"
restart_nomad
log "INFO" "Finished server initializing process! Enjoy Nomad!"