From af083be0fbd800eaa9df7705a43324bcb81c09fe Mon Sep 17 00:00:00 2001 From: Dridi Boukelmoune Date: Tue, 19 Sep 2023 10:28:32 +0200 Subject: [PATCH 1/2] param: New startup_panic debug flag --- bin/varnishd/cache/cache_main.c | 3 +++ include/tbl/debug_bits.h | 1 + 2 files changed, 4 insertions(+) diff --git a/bin/varnishd/cache/cache_main.c b/bin/varnishd/cache/cache_main.c index 3e093c50b2b..26e789cbfe7 100644 --- a/bin/varnishd/cache/cache_main.c +++ b/bin/varnishd/cache/cache_main.c @@ -462,6 +462,9 @@ child_main(int sigmagic, size_t altstksz) SMP_Ready(); #endif + if (DO_DEBUG(DBG_STARTUP_PANIC)) + WRONG("startup_panic"); + CLI_Run(); if (shutdown_delay > 0) diff --git a/include/tbl/debug_bits.h b/include/tbl/debug_bits.h index 0e60d254702..8d8c3067361 100644 --- a/include/tbl/debug_bits.h +++ b/include/tbl/debug_bits.h @@ -52,6 +52,7 @@ DEBUG_BIT(PROCESSORS, processors, "Fetch/Deliver processors") DEBUG_BIT(PROTOCOL, protocol, "Protocol debugging") DEBUG_BIT(VCL_KEEP, vcl_keep, "Keep VCL C and so files") DEBUG_BIT(LCK, lck, "Additional lock statistics") +DEBUG_BIT(STARTUP_PANIC, startup_panic, "Panic early during cache startup") #undef DEBUG_BIT /*lint -restore */ From 926ec10e58addfc0496025705c8773de9afa84e2 Mon Sep 17 00:00:00 2001 From: Dridi Boukelmoune Date: Tue, 19 Sep 2023 10:20:30 +0200 Subject: [PATCH 2/2] mgt_main: Preserve VSM after startup panics When a panic occurs early during the cache process startup, before it starts listening to its CLI, we make sure to bypass MGT exit handlers to preserve the VSM. This restores our ability to perform post-mortem inspections. --- bin/varnishd/mgt/mgt_child.c | 2 +- bin/varnishd/mgt/mgt_main.c | 2 +- bin/varnishtest/tests/u00020.vtc | 35 ++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 bin/varnishtest/tests/u00020.vtc diff --git a/bin/varnishd/mgt/mgt_child.c b/bin/varnishd/mgt/mgt_child.c index b4fd329bbd9..05511226416 100644 --- a/bin/varnishd/mgt/mgt_child.c +++ b/bin/varnishd/mgt/mgt_child.c @@ -347,7 +347,7 @@ mgt_launch_child(struct cli *cli) if ((pid = fork()) < 0) { VJ_master(JAIL_MASTER_LOW); perror("Could not fork child"); - exit(1); // XXX Harsh ? + _exit(1); // XXX Harsh ? } if (pid == 0) { diff --git a/bin/varnishd/mgt/mgt_main.c b/bin/varnishd/mgt/mgt_main.c index c1bdaf812c3..fd5bf87bd9e 100644 --- a/bin/varnishd/mgt/mgt_main.c +++ b/bin/varnishd/mgt/mgt_main.c @@ -964,7 +964,7 @@ main(int argc, char * const *argv) mgt_eric_im_done(eric_fd, u); if (u) - exit(u); + _exit(u); /* Failure is no longer an option */ diff --git a/bin/varnishtest/tests/u00020.vtc b/bin/varnishtest/tests/u00020.vtc new file mode 100644 index 00000000000..e0219770d2b --- /dev/null +++ b/bin/varnishtest/tests/u00020.vtc @@ -0,0 +1,35 @@ +varnishtest "panic during startup" + +varnish v1 -cliok "param.set feature +no_coredump" + +# late startup panic +varnish v1 -vcl { + import vtc; + + backend be none; + + sub vcl_init { + vtc.panic("vcl_init"); + } +} + +varnish v1 -clierr 400 start +varnish v1 -cliexpect vcl_init panic.show +varnish v1 -cliok panic.clear +varnish v1 -expectexit 0x40 + +# early startup panic +process p1 { + exec varnishd -F -n "${tmpdir}/p1/workdir" \ + -a ${localhost}:0 -b ${localhost}:0 \ + -p feature=+no_coredump -p debug=+startup_panic +} -expect-exit 2 -run + +# post-mortem inspection +shell -match "MGT.child_start( +)1" { + varnishstat -n "${tmpdir}/p1/workdir" -1 +} + +shell -match "MGT.child_panic( +)1" { + varnishstat -n "${tmpdir}/p1/workdir" -1 +}