-
Notifications
You must be signed in to change notification settings - Fork 116
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OCPBUGS-14914: Set tunnel and server timeouts at backend level #536
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,8 @@ | |
{{- $dynamicConfigManager := .DynamicConfigManager }} | ||
{{- $router_ip_v4_v6_mode := env "ROUTER_IP_V4_V6_MODE" "v4" }} | ||
{{- $router_disable_http2 := env "ROUTER_DISABLE_HTTP2" "false" }} | ||
{{- $routerDefaultServerTimeout := env "ROUTER_DEFAULT_SERVER_TIMEOUT" "30s" }} | ||
{{- $routerDefaultTunnelTimeout := env "ROUTER_DEFAULT_TUNNEL_TIMEOUT" "1h" }} | ||
{{- $haveClientCA := .HaveClientCA }} | ||
{{- $haveCRLs := .HaveCRLs }} | ||
|
||
|
@@ -42,6 +44,9 @@ | |
{{- /* pathRewriteTargetPattern: Match path rewrite-Target */}} | ||
{{- $pathRewriteTargetPattern := `^/.*$` -}} | ||
|
||
{{- /* Maximum timeout among all the routes, required to be set on the middle backends to avoid warning message about missing server timeout. */}} | ||
{{- $routerMaxServerTimeout := maxTimeoutFirstMatchedAndClipped .State "haproxy.router.openshift.io/timeout" $timeSpecPattern $routerDefaultServerTimeout }} | ||
|
||
global | ||
# Drop resource limit checks to mitigate https://issues.redhat.com/browse/OCPBUGS-21803 in HAProxy 2.6. | ||
no strict-limits | ||
|
@@ -158,14 +163,10 @@ defaults | |
timeout connect {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_CONNECT_TIMEOUT") "5s" }} | ||
timeout client {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_CLIENT_TIMEOUT") "30s" }} | ||
timeout client-fin {{ firstMatch $timeSpecPattern (env "ROUTER_CLIENT_FIN_TIMEOUT") "1s" }} | ||
timeout server {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_SERVER_TIMEOUT") "30s" }} | ||
timeout server-fin {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_SERVER_FIN_TIMEOUT") "1s" }} | ||
timeout http-request {{ firstMatch $timeSpecPattern (env "ROUTER_SLOWLORIS_TIMEOUT") "10s" }} | ||
timeout http-keep-alive {{ firstMatch $timeSpecPattern (env "ROUTER_SLOWLORIS_HTTP_KEEPALIVE") "300s" }} | ||
|
||
# Long timeout for WebSocket connections. | ||
timeout tunnel {{ firstMatch $timeSpecPattern (env "ROUTER_DEFAULT_TUNNEL_TIMEOUT") "1h" }} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why don't you need to specify Does omitting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes. Only the missing server timeout gives a warning message. I'm not sure the warning reports a real problem though taking into account that the bottom backends will still have the server timeout set.
Regarding the link between server and tunnel timeouts. I discovered that the tunnel timeout supersedes server timeout in the TCP proxies. Note that the middle backends in the router configuration are TCP proxies. So setting tunnel timeout in |
||
|
||
{{- if isTrue (env "ROUTER_ENABLE_COMPRESSION") }} | ||
compression algo gzip | ||
compression type {{ env "ROUTER_COMPRESSION_MIME" "text/html text/plain text/css" }} | ||
|
@@ -318,6 +319,7 @@ frontend public_ssl | |
# traffic | ||
########################################################################## | ||
backend be_sni | ||
timeout server {{ $routerMaxServerTimeout }} | ||
server fe_sni unix@/var/lib/haproxy/run/haproxy-sni.sock weight 1 send-proxy | ||
|
||
frontend fe_sni | ||
|
@@ -434,6 +436,7 @@ frontend fe_sni | |
########################################################################## | ||
# backend for when sni does not exist, or ssl term needs to happen on the edge | ||
backend be_no_sni | ||
timeout server {{ $routerMaxServerTimeout }} | ||
server fe_no_sni unix@/var/lib/haproxy/run/haproxy-no-sni.sock weight 1 send-proxy | ||
|
||
frontend fe_no_sni | ||
|
@@ -593,11 +596,11 @@ backend {{ genBackendNamePrefix $cfg.TLSTermination }}:{{ $cfgIdx }} | |
{{- end }} | ||
tcp-request content reject if !whitelist | ||
{{- end }} | ||
{{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout")) }} | ||
timeout server {{ $value }} | ||
{{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout") $routerDefaultServerTimeout) }} | ||
timeout server {{ $value }} | ||
{{- end }} | ||
{{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout-tunnel")) }} | ||
timeout tunnel {{ $value }} | ||
{{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout-tunnel") $routerDefaultTunnelTimeout) }} | ||
timeout tunnel {{ $value }} | ||
{{- end }} | ||
|
||
{{- if isTrue (index $cfg.Annotations "haproxy.router.openshift.io/rate-limit-connections") }} | ||
|
@@ -797,8 +800,11 @@ backend {{ genBackendNamePrefix $cfg.TLSTermination }}:{{ $cfgIdx }} | |
{{- end }} | ||
tcp-request content reject if !whitelist | ||
{{- end }} | ||
{{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout-tunnel") (index $cfg.Annotations "haproxy.router.openshift.io/timeout")) }} | ||
timeout tunnel {{ $value }} | ||
{{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout") $routerDefaultServerTimeout) }} | ||
timeout server {{ $value }} | ||
{{- end }} | ||
{{- with $value := clipHAProxyTimeoutValue (firstMatch $timeSpecPattern (index $cfg.Annotations "haproxy.router.openshift.io/timeout-tunnel") (index $cfg.Annotations "haproxy.router.openshift.io/timeout") $routerDefaultTunnelTimeout) }} | ||
timeout tunnel {{ $value }} | ||
{{- end }} | ||
|
||
{{- if isTrue (index $cfg.Annotations "haproxy.router.openshift.io/rate-limit-connections") }} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ import ( | |
"strings" | ||
"sync" | ||
"text/template" | ||
"time" | ||
|
||
routev1 "github.com/openshift/api/route/v1" | ||
"github.com/openshift/router/pkg/router/routeapihelpers" | ||
|
@@ -389,6 +390,38 @@ func parseIPList(list string) string { | |
return list | ||
} | ||
|
||
// maxTimeoutFirstMatchedAndClipped finds the maximum timeout managed by a given annotation among all the routes, matches it against a given pattern, and clips. | ||
// The goal is to get the maximum timeout among the ones set on the lowest layer backends, rather than the maximum of all values provided to the function. | ||
// For instance, if a route has a timeout annotation set, it will take precedence over the default timeout, even if the default timeout is greater. | ||
func maxTimeoutFirstMatchedAndClipped(aliases map[ServiceAliasConfigKey]ServiceAliasConfig, annotation, pattern string, values ...string) string { | ||
var ( | ||
max string | ||
maxDuration time.Duration | ||
) | ||
// find max timeout in route annotations | ||
for _, cfg := range aliases { | ||
timeout := clipHAProxyTimeoutValue(firstMatch(pattern, cfg.Annotations[annotation])) | ||
if timeout != "" { | ||
// No error handling because clipHAProxyTimeoutValue returns | ||
// a valid timeout or an empty string. The latter is already handled. | ||
timeoutDuration, _ := time.ParseDuration(timeout) | ||
if timeoutDuration > maxDuration { | ||
max = timeout | ||
maxDuration = timeoutDuration | ||
} | ||
} | ||
} | ||
// use values if no max was found in routes | ||
if max == "" { | ||
max = clipHAProxyTimeoutValue(firstMatch(pattern, values...)) | ||
} | ||
// use max haproxy timeout if no max was found | ||
if max == "" { | ||
max = templateutil.HaproxyMaxTimeout | ||
} | ||
return max | ||
} | ||
|
||
var helperFunctions = template.FuncMap{ | ||
"endpointsForAlias": endpointsForAlias, //returns the list of valid endpoints | ||
"processEndpointsForAlias": processEndpointsForAlias, //returns the list of valid endpoints after processing them | ||
|
@@ -416,4 +449,6 @@ var helperFunctions = template.FuncMap{ | |
"parseIPList": parseIPList, //parses the list of IPs/CIDRs (IPv4/IPv6) | ||
|
||
"processRewriteTarget": rewritetarget.SanitizeInput, //sanitizes `haproxy.router.openshift.io/rewrite-target` annotation | ||
|
||
"maxTimeoutFirstMatchedAndClipped": maxTimeoutFirstMatchedAndClipped, //finds the maximum timeout managed by a given annotation among all the routes, matches it against a given pattern, and clips | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep in mind that this list of helper functions is effectively an API. We cannot rename or remove a helper function without potentially breaking third-party templates, so we tend to prefer defining general and composable helpers. That said, I think it is worth putting this functionality in a single specialized helper to avoid excessive performance impact. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm not sure I understand how putting the function into a different helper supposed to remove the API contract. Taking into account that the function will still be used in the config template. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My point was that there is a tradeoff here, and we should be aware of it.
However, it is probably worth making that tradeoff here. I'm not asking you to change anything; I'm just trying to be deliberate and explicit about the design choice. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1067,3 +1067,213 @@ func TestParseIPList(t *testing.T) { | |
}) | ||
} | ||
} | ||
|
||
func Test_maxTimeoutFirstMatchedAndClipped(t *testing.T) { | ||
testCases := []struct { | ||
name string | ||
state map[ServiceAliasConfigKey]ServiceAliasConfig | ||
annotation string | ||
pattern string | ||
Comment on lines
+1075
to
+1076
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add test cases with different values for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had a test case with 2 routes one of which didn't match the pattern ( |
||
values []string | ||
expected string | ||
}{ | ||
{ | ||
name: "Route timeout is maximum", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "5m", | ||
}, | ||
}, | ||
"test:route2": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "35s", | ||
}, | ||
}, | ||
"test:route3": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout-tunnel": "10m", | ||
}, | ||
}, | ||
"test:route4": {}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"30s"}, | ||
expected: "5m", | ||
}, | ||
{ | ||
name: "Default timeout is maximum", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": {}, | ||
"test:route2": {}, | ||
"test:route3": {}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"30s"}, | ||
expected: "30s", | ||
}, | ||
Comment on lines
+1106
to
+1116
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should at least one of the routes specify a value? It makes sense to me to have a test case for "some routes specify timeouts, but the default timeout is maximum" . There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By the way, what happens if you have 0 routes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The goal of maxTimeoutFirstMatchedAndClipped is to get the maximum timeout among the ones set on the lowest layer backends, rather than the maximum of all values provided to the function. For instance, if a route has a timeout annotation set, it will take precedence over the default timeout, even if the default timeout is greater. I updated the godoc for the function to highlight this.
Unit test case added to cover this case ( |
||
{ | ||
name: "First default timeout is choosen", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": {}, | ||
"test:route2": {}, | ||
"test:route3": {}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"30s", "40s"}, | ||
expected: "30s", | ||
}, | ||
{ | ||
name: "One route timeout doesn't match pattern", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "5minutes", | ||
}, | ||
}, | ||
"test:route2": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "35s", | ||
}, | ||
}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"30s"}, | ||
expected: "35s", | ||
}, | ||
{ | ||
name: "No route timeout matches pattern", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "5minutes", | ||
}, | ||
}, | ||
"test:route2": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "35seconds", | ||
}, | ||
}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"30s"}, | ||
expected: "30s", | ||
}, | ||
{ | ||
name: "Route timeout clipped", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "999999999s", | ||
}, | ||
}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"30s"}, | ||
expected: "2147483647ms", | ||
}, | ||
alebedev87 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
name: "Default timeout overflows but route takes precedence", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "30s", | ||
}, | ||
}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"999999999s"}, | ||
expected: "30s", | ||
}, | ||
{ | ||
name: "Empty state", | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"30s"}, | ||
expected: "30s", | ||
}, | ||
{ | ||
name: "Empty state and values", | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
expected: "2147483647ms", | ||
}, | ||
} | ||
for _, tc := range testCases { | ||
t.Run(tc.name, func(t *testing.T) { | ||
got := maxTimeoutFirstMatchedAndClipped(tc.state, tc.annotation, tc.pattern, tc.values...) | ||
if got != tc.expected { | ||
t.Errorf("Failure: expected %q, got %q", tc.expected, got) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func Benchmark_maxTimeoutFirstMatchedAndClipped(b *testing.B) { | ||
testCases := []struct { | ||
name string | ||
state map[ServiceAliasConfigKey]ServiceAliasConfig | ||
annotation string | ||
pattern string | ||
values []string | ||
}{ | ||
{ | ||
name: "Input1", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "5m", | ||
}, | ||
}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
values: []string{"30s"}, | ||
}, | ||
{ | ||
name: "Input5", | ||
state: map[ServiceAliasConfigKey]ServiceAliasConfig{ | ||
"test:route1": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "5m", | ||
}, | ||
}, | ||
"test:route2": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout": "35s", | ||
}, | ||
}, | ||
"test:route3": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout-tunnel": "35s", | ||
}, | ||
}, | ||
"test:route4": { | ||
Annotations: map[string]string{ | ||
"haproxy.router.openshift.io/timeout-tunnel": "10m", | ||
}, | ||
}, | ||
"test:route5": {}, | ||
}, | ||
annotation: "haproxy.router.openshift.io/timeout", | ||
pattern: `[1-9][0-9]*(us|ms|s|m|h|d)?`, | ||
// valid value is the last one to force all the iterations | ||
values: []string{"", "", "", "", "30s"}, | ||
}, | ||
} | ||
for _, tc := range testCases { | ||
b.ResetTimer() | ||
b.Run(tc.name, func(b *testing.B) { | ||
for n := 0; n < b.N; n++ { | ||
maxTimeoutFirstMatchedAndClipped(tc.state, tc.annotation, tc.pattern, tc.values...) | ||
} | ||
}) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would you mind adding a couple of comments about what these are for? I see below that
BENCH_PKGS
is a value for flag-benchmem
, so it could use a clarification.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.