-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Clear job locks on generic workers after grace period is exceeded (#477)
Introduces a configurable grace period after which the generic worker processes will be killed. This allows workers to finish their current job during e.g. an update without being killed after 15 seconds (bpm default). After the worker processes are stopped/killed pending locks will be cleared which allows other workers to pick up pending jobs. Before the locks would be only cleared after the job timeout has been exceeded (default 4 hours). This is based on the assumption that jobs processed on the generic workers are idempotent.
- Loading branch information
Showing
9 changed files
with
105 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,16 @@ | ||
#!/usr/bin/env bash | ||
|
||
for i in {1..<%=p("cc.jobs.generic.number_of_workers")%>}; do | ||
/var/vcap/jobs/bpm/bin/bpm stop cloud_controller_worker -p "worker_${i}" 1>&2 | ||
done | ||
source /var/vcap/jobs/cloud_controller_worker/bin/ruby_version.sh | ||
export CLOUD_CONTROLLER_NG_CONFIG=/var/vcap/jobs/cloud_controller_worker/config/cloud_controller_ng.yml | ||
|
||
/var/vcap/jobs/cloud_controller_worker/bin/shutdown_drain 1>&2 | ||
|
||
pushd /var/vcap/packages/cloud_controller_ng/cloud_controller_ng > /dev/null || exit 1 | ||
for i in {1..<%=p("cc.jobs.generic.number_of_workers")%>}; do | ||
# shellcheck disable=SC2093 | ||
bundle exec rake jobs:clear_pending_locks[cc_global_worker.<%= spec.job.name %>.<%= spec.index %>."${i}"] 1>&2 | ||
done | ||
popd > /dev/null || exit 1 | ||
|
||
echo 0 # tell bosh not wait for anything | ||
exit 0 |
16 changes: 16 additions & 0 deletions
16
jobs/cloud_controller_worker/templates/shutdown_drain.rb.erb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/var/vcap/packages/ruby-3.2/bin/ruby --disable-all | ||
|
||
$LOAD_PATH.unshift('/var/vcap/packages/cloud_controller_ng/cloud_controller_ng/app') | ||
$LOAD_PATH.unshift('/var/vcap/packages/cloud_controller_ng/cloud_controller_ng/lib') | ||
|
||
require 'cloud_controller/drain' | ||
|
||
@threads = [] | ||
@grace_period = <%= p("cc.jobs.generic.worker_grace_period_seconds") %> | ||
@drain = VCAP::CloudController::Drain.new('/var/vcap/sys/log/cloud_controller_worker') | ||
|
||
(1..<%= p("cc.jobs.generic.number_of_workers") %>).each do |i| | ||
@threads << Thread.new { @drain.shutdown_delayed_worker("/var/vcap/sys/run/bpm/cloud_controller_worker/worker_#{i}.pid", @grace_period.to_i) } | ||
end | ||
|
||
@threads.each(&:join) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'rspec' | ||
require 'bosh/template/test' | ||
|
||
module Bosh | ||
module Template | ||
module Test | ||
describe 'drain template rendering' do | ||
let(:release_path) { File.join(File.dirname(__FILE__), '../..') } | ||
let(:release) { ReleaseDir.new(release_path) } | ||
let(:job) { release.job('cloud_controller_worker') } | ||
|
||
describe 'bin/shutdown_drain' do | ||
let(:template) { job.template('bin/shutdown_drain') } | ||
|
||
it 'renders the default value' do | ||
rendered_file = template.render({}, consumes: {}) | ||
expect(rendered_file).to include('@grace_period = 15') | ||
end | ||
|
||
context "when 'worker_grace_period_seconds' is provided" do | ||
it 'renders the provided value' do | ||
rendered_file = template.render({ 'cc' => { 'jobs' => { 'generic' => { 'worker_grace_period_seconds' => 60 } } } }, consumes: {}) | ||
expect(rendered_file).to include('@grace_period = 60') | ||
end | ||
end | ||
|
||
it 'renders the default number of workers' do | ||
rendered_file = template.render({}, consumes: {}) | ||
expect(rendered_file).to include('(1..1).each do |i|') | ||
end | ||
|
||
context "when 'number_of_workers' is provided" do | ||
it 'renders the provided number of workers' do | ||
rendered_file = template.render({ 'cc' => { 'jobs' => { 'generic' => { 'number_of_workers' => 5 } } } }, consumes: {}) | ||
expect(rendered_file).to include('(1..5).each do |i|') | ||
end | ||
end | ||
end | ||
|
||
describe 'bin/drain' do | ||
let(:template) { job.template('bin/drain') } | ||
|
||
it 'renders the default number of workers' do | ||
rendered_file = template.render({}, consumes: {}) | ||
expect(rendered_file).to include('for i in {1..1}; do') | ||
end | ||
|
||
context "when 'number_of_workers' is provided" do | ||
it 'renders the provided number of workers' do | ||
rendered_file = template.render({ 'cc' => { 'jobs' => { 'generic' => { 'number_of_workers' => 5 } } } }, consumes: {}) | ||
expect(rendered_file).to include('for i in {1..5}; do') | ||
end | ||
end | ||
|
||
it 'renders the job name and index' do | ||
rendered_file = template.render({ 'job_name' => 'cc-worker' }, consumes: {}) | ||
# Default job name is 'me' in tests (bosh-template) | ||
expect(rendered_file).to include('bundle exec rake jobs:clear_pending_locks[cc_global_worker.me.0."${i}"]') | ||
end | ||
end | ||
end | ||
end | ||
end | ||
end |