From d70581e6894c0a9c6bdbb5e4180791ad34694590 Mon Sep 17 00:00:00 2001 From: Andrey Lebedev Date: Tue, 7 Nov 2023 21:12:33 +0100 Subject: [PATCH] OCPBUGS-14914: Set tunnel and server timeouts at backend level The middle backends (`be_sni`, `be_no_sni`) are updated with the server timeout which is set to the maximum value among all routes from the configuration. This prevents a warning message during config reloads. A benchmark test is added for the template helper function which finds the maximum timeout value among all routes (`maxTimeoutFirstMatchedAndClipped`). A new make target is introduced to run all benchmark tests (`bench`). --- Makefile | 12 + .../haproxy/conf/haproxy-config.template | 26 ++- pkg/router/template/template_helper.go | 35 +++ pkg/router/template/template_helper_test.go | 210 ++++++++++++++++++ 4 files changed, 273 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 466487a64..fe8a2b298 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,13 @@ GO_LDFLAGS ?=-ldflags "-s -w $(call version-ldflags,$(PACKAGE)/pkg/version) $(GO GO=GO111MODULE=on GOFLAGS=-mod=vendor go GO_BUILD_RECIPE=CGO_ENABLED=1 $(GO) build -o $(BIN) $(GO_GCFLAGS) $(GO_LDFLAGS) $(MAIN_PACKAGE) +# Get all the packages with benchmark tests defined to prevent +# "go test" from entering all existing packages when "./.." is used. +BENCH_PKGS ?= $(shell \grep -lR '^func Benchmark' | xargs -I {} dirname {} | sed 's|^|./|' | paste -sd ' ') +BENCH_TEST ?= . +BENCH_TIME ?= 2s +BENCH_COUNT ?= 5 + all: build build: @@ -39,6 +46,11 @@ images/router/*/Dockerfile.rhel: images/router/base/Dockerfile.rhel check: CGO_ENABLED=1 $(GO) test -race ./... +.PHONY: bench +bench: + @# -run flag is added to avoid running unit tests + CGO_ENABLED=1 $(GO) test -bench=$(BENCH_TEST) -run='^#' -benchtime=$(BENCH_TIME) -count=$(BENCH_COUNT) -benchmem $(BENCH_PKGS) + .PHONY: verify verify: hack/verify-gofmt.sh diff --git a/images/router/haproxy/conf/haproxy-config.template b/images/router/haproxy/conf/haproxy-config.template index 35f00c3f9..94df3bf6a 100644 --- a/images/router/haproxy/conf/haproxy-config.template +++ b/images/router/haproxy/conf/haproxy-config.template @@ -9,6 +9,8 @@ {{- $dynamicConfigManager := .DynamicConfigManager }} {{- $router_ip_v4_v6_mode := env "ROUTER_IP_V4_V6_MODE" "v4" }} {{- $router_disable_http2 := env "ROUTER_DISABLE_HTTP2" "false" }} +{{- $routerDefaultServerTimeout := env "ROUTER_DEFAULT_SERVER_TIMEOUT" "30s" }} +{{- $routerDefaultTunnelTimeout := env "ROUTER_DEFAULT_TUNNEL_TIMEOUT" "1h" }} {{- $haveClientCA := .HaveClientCA }} {{- $haveCRLs := .HaveCRLs }} @@ -42,6 +44,9 @@ {{- /* pathRewriteTargetPattern: Match path rewrite-Target */}} {{- $pathRewriteTargetPattern := `^/.*$` -}} +{{- /* Maximum timeout among all the routes, required to be set on the middle backends to avoid warning message about missing server timeout. */}} +{{- $routerMaxServerTimeout := maxTimeoutFirstMatchedAndClipped .State "haproxy.router.openshift.io/timeout" $timeSpecPattern $routerDefaultServerTimeout }} + global # Drop resource limit checks to mitigate https://issues.redhat.com/browse/OCPBUGS-21803 in HAProxy 2.6. no strict-limits @@ -158,14 +163,10 @@ defaults timeout connect {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_CONNECT_TIMEOUT") "5s" }} timeout client {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_CLIENT_TIMEOUT") "30s" }} timeout client-fin {{ firstMatch $timeSpecPattern (env "ROUTER_CLIENT_FIN_TIMEOUT") "1s" }} - timeout server {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_SERVER_TIMEOUT") "30s" }} timeout server-fin {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_SERVER_FIN_TIMEOUT") "1s" }} timeout http-request {{ firstMatch $timeSpecPattern (env "ROUTER_SLOWLORIS_TIMEOUT") "10s" }} timeout http-keep-alive {{ firstMatch $timeSpecPattern (env "ROUTER_SLOWLORIS_HTTP_KEEPALIVE") "300s" }} - # Long timeout for WebSocket connections. - timeout tunnel {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_TUNNEL_TIMEOUT") "1h" }} - {{- if isTrue (env "ROUTER_ENABLE_COMPRESSION") }} compression algo gzip compression type {{ env "ROUTER_COMPRESSION_MIME" "text/html text/plain text/css" }} @@ -318,6 +319,7 @@ frontend public_ssl # traffic ########################################################################## backend be_sni + timeout server {{ $routerMaxServerTimeout }} server fe_sni unix@/var/lib/haproxy/run/haproxy-sni.sock weight 1 send-proxy frontend fe_sni @@ -434,6 +436,7 @@ frontend fe_sni ########################################################################## # backend for when sni does not exist, or ssl term needs to happen on the edge backend be_no_sni + timeout server {{ $routerMaxServerTimeout }} server fe_no_sni unix@/var/lib/haproxy/run/haproxy-no-sni.sock weight 1 send-proxy frontend fe_no_sni @@ -593,11 +596,11 @@ backend {{ genBackendNamePrefix $cfg.TLSTermination }}:{{ $cfgIdx }} {{- end }} tcp-request content reject if !whitelist {{- end }} - {{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout")) }} - timeout server {{ $value }} + {{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout") $routerDefaultServerTimeout) }} + timeout server {{ $value }} {{- end }} - {{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout-tunnel")) }} - timeout tunnel {{ $value }} + {{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout-tunnel") $routerDefaultTunnelTimeout) }} + timeout tunnel {{ $value }} {{- end }} {{- if isTrue (index $cfg.Annotations "haproxy.router.openshift.io/rate-limit-connections") }} @@ -797,8 +800,11 @@ backend {{ genBackendNamePrefix $cfg.TLSTermination }}:{{ $cfgIdx }} {{- end }} tcp-request content reject if !whitelist {{- end }} - {{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout-tunnel") (index $cfg.Annotations "haproxy.router.openshift.io/timeout")) }} - timeout tunnel {{ $value }} + {{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout") $routerDefaultServerTimeout) }} + timeout server {{ $value }} + {{- end }} + {{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout-tunnel") (index $cfg.Annotations "haproxy.router.openshift.io/timeout") $routerDefaultTunnelTimeout) }} + timeout tunnel {{ $value }} {{- end }} {{- if isTrue (index $cfg.Annotations "haproxy.router.openshift.io/rate-limit-connections") }} diff --git a/pkg/router/template/template_helper.go b/pkg/router/template/template_helper.go index 3bf0c821b..68ecf00c0 100644 --- a/pkg/router/template/template_helper.go +++ b/pkg/router/template/template_helper.go @@ -13,6 +13,7 @@ import ( "strings" "sync" "text/template" + "time" routev1 "github.com/openshift/api/route/v1" "github.com/openshift/router/pkg/router/routeapihelpers" @@ -389,6 +390,38 @@ func parseIPList(list string) string { return list } +// maxTimeoutFirstMatchedAndClipped finds the maximum timeout managed by a given annotation among all the routes, matches it against a given pattern, and clips. +// The goal is to get the maximum timeout among the ones set on the lowest layer backends, rather than the maximum of all values provided to the function. +// For instance, if a route has a timeout annotation set, it will take precedence over the default timeout, even if the default timeout is greater. +func maxTimeoutFirstMatchedAndClipped(aliases map[ServiceAliasConfigKey]ServiceAliasConfig, annotation, pattern string, values ...string) string { + var ( + max string + maxDuration time.Duration + ) + // find max timeout in route annotations + for _, cfg := range aliases { + timeout := clipHAProxyTimeoutValue(firstMatch(pattern, cfg.Annotations[annotation])) + if timeout != "" { + // No error handling because clipHAProxyTimeoutValue returns + // a valid timeout or an empty string. The latter is already handled. + timeoutDuration, _ := time.ParseDuration(timeout) + if timeoutDuration > maxDuration { + max = timeout + maxDuration = timeoutDuration + } + } + } + // use values if no max was found in routes + if max == "" { + max = clipHAProxyTimeoutValue(firstMatch(pattern, values...)) + } + // use max haproxy timeout if no max was found + if max == "" { + max = templateutil.HaproxyMaxTimeout + } + return max +} + var helperFunctions = template.FuncMap{ "endpointsForAlias": endpointsForAlias, //returns the list of valid endpoints "processEndpointsForAlias": processEndpointsForAlias, //returns the list of valid endpoints after processing them @@ -416,4 +449,6 @@ var helperFunctions = template.FuncMap{ "parseIPList": parseIPList, //parses the list of IPs/CIDRs (IPv4/IPv6) "processRewriteTarget": rewritetarget.SanitizeInput, //sanitizes `haproxy.router.openshift.io/rewrite-target` annotation + + "maxTimeoutFirstMatchedAndClipped": maxTimeoutFirstMatchedAndClipped, //finds the maximum timeout managed by a given annotation among all the routes, matches it against a given pattern, and clips } diff --git a/pkg/router/template/template_helper_test.go b/pkg/router/template/template_helper_test.go index 03b1177bc..0847670be 100644 --- a/pkg/router/template/template_helper_test.go +++ b/pkg/router/template/template_helper_test.go @@ -1067,3 +1067,213 @@ func TestParseIPList(t *testing.T) { }) } } + +func Test_maxTimeoutFirstMatchedAndClipped(t *testing.T) { + testCases := []struct { + name string + state map[ServiceAliasConfigKey]ServiceAliasConfig + annotation string + pattern string + values []string + expected string + }{ + { + name: "Route timeout is maximum", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "5m", + }, + }, + "test:route2": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "35s", + }, + }, + "test:route3": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout-tunnel": "10m", + }, + }, + "test:route4": {}, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"30s"}, + expected: "5m", + }, + { + name: "Default timeout is maximum", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": {}, + "test:route2": {}, + "test:route3": {}, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"30s"}, + expected: "30s", + }, + { + name: "First default timeout is choosen", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": {}, + "test:route2": {}, + "test:route3": {}, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"30s", "40s"}, + expected: "30s", + }, + { + name: "One route timeout doesn't match pattern", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "5minutes", + }, + }, + "test:route2": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "35s", + }, + }, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"30s"}, + expected: "35s", + }, + { + name: "No route timeout matches pattern", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "5minutes", + }, + }, + "test:route2": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "35seconds", + }, + }, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"30s"}, + expected: "30s", + }, + { + name: "Route timeout clipped", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "999999999s", + }, + }, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"30s"}, + expected: "2147483647ms", + }, + { + name: "Default timeout overflows but route takes precedence", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "30s", + }, + }, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"999999999s"}, + expected: "30s", + }, + { + name: "Empty state", + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"30s"}, + expected: "30s", + }, + { + name: "Empty state and values", + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + expected: "2147483647ms", + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := maxTimeoutFirstMatchedAndClipped(tc.state, tc.annotation, tc.pattern, tc.values...) + if got != tc.expected { + t.Errorf("Failure: expected %q, got %q", tc.expected, got) + } + }) + } +} + +func Benchmark_maxTimeoutFirstMatchedAndClipped(b *testing.B) { + testCases := []struct { + name string + state map[ServiceAliasConfigKey]ServiceAliasConfig + annotation string + pattern string + values []string + }{ + { + name: "Input1", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "5m", + }, + }, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + values: []string{"30s"}, + }, + { + name: "Input5", + state: map[ServiceAliasConfigKey]ServiceAliasConfig{ + "test:route1": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "5m", + }, + }, + "test:route2": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout": "35s", + }, + }, + "test:route3": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout-tunnel": "35s", + }, + }, + "test:route4": { + Annotations: map[string]string{ + "haproxy.router.openshift.io/timeout-tunnel": "10m", + }, + }, + "test:route5": {}, + }, + annotation: "haproxy.router.openshift.io/timeout", + pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, + // valid value is the last one to force all the iterations + values: []string{"", "", "", "", "30s"}, + }, + } + for _, tc := range testCases { + b.ResetTimer() + b.Run(tc.name, func(b *testing.B) { + for n := 0; n < b.N; n++ { + maxTimeoutFirstMatchedAndClipped(tc.state, tc.annotation, tc.pattern, tc.values...) + } + }) + } +}