From acf9c84fd9185dcfbb1a78318e38955f730a6d92 Mon Sep 17 00:00:00 2001 From: Christophe Papazian <114495376+christophe-papazian@users.noreply.github.com> Date: Wed, 10 Apr 2024 17:55:44 +0200 Subject: [PATCH] chore(asm): add waf integration headers to be reported always with asm (#8925) Implementation of [RFC] WAF Integration : Identify Requests for the python tracer. APPSEC-52393 (no system tests yet) ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/appsec/_processor.py | 23 ++++++++++++------- tests/appsec/contrib_appsec/utils.py | 34 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/ddtrace/appsec/_processor.py b/ddtrace/appsec/_processor.py index 15b9568c205..0e16f309dfc 100644 --- a/ddtrace/appsec/_processor.py +++ b/ddtrace/appsec/_processor.py @@ -77,8 +77,21 @@ def get_appsec_obfuscation_parameter_value_regexp() -> bytes: ) -_COLLECTED_REQUEST_HEADERS = { +_COLLECTED_REQUEST_HEADERS_ASM_ENABLED = { "accept", + "content-type", + "user-agent", + "x-amzn-trace-id", + "cloudfront-viewer-ja3-fingerprint", + "cf-ray", + "x-cloud-trace-context", + "x-appgw-trace-id", + "akamai-user-risk", + "x-sigsci-requestid", + "x-sigsci-tags", +} + +_COLLECTED_REQUEST_HEADERS = { "accept-encoding", "accept-language", "cf-connecting-ip", @@ -86,13 +99,11 @@ def get_appsec_obfuscation_parameter_value_regexp() -> bytes: "content-encoding", "content-language", "content-length", - "content-type", "fastly-client-ip", "forwarded", "forwarded-for", "host", "true-client-ip", - "user-agent", "via", "x-client-ip", "x-cluster-client-ip", @@ -101,11 +112,7 @@ def get_appsec_obfuscation_parameter_value_regexp() -> bytes: "x-real-ip", } -_COLLECTED_REQUEST_HEADERS_ASM_ENABLED = { - "accept", - "content-type", - "user-agent", -} +_COLLECTED_REQUEST_HEADERS.update(_COLLECTED_REQUEST_HEADERS_ASM_ENABLED) def _set_headers(span: Span, headers: Any, kind: str, only_asm_enabled: bool = False) -> None: diff --git a/tests/appsec/contrib_appsec/utils.py b/tests/appsec/contrib_appsec/utils.py index 209436eaab1..cdcb8b1bc97 100644 --- a/tests/appsec/contrib_appsec/utils.py +++ b/tests/appsec/contrib_appsec/utils.py @@ -1068,6 +1068,40 @@ def test_asm_enabled_headers(self, asm_enabled, interface, get_tag, root_span): assert get_tag("http.request.headers.user-agent") is None assert get_tag("http.request.headers.content-type") is None + @pytest.mark.parametrize( + "header", + [ + "X-Amzn-Trace-Id", + "Cloudfront-Viewer-Ja3-Fingerprint", + "Cf-Ray", + "X-Cloud-Trace-Context", + "X-Appgw-Trace-id", + "Akamai-User-Risk", + "X-SigSci-RequestID", + "X-SigSci-Tags", + ], + ) + @pytest.mark.parametrize("asm_enabled", [True, False]) + # RFC: https://docs.google.com/document/d/1xf-s6PtSr6heZxmO_QLUtcFzY_X_rT94lRXNq6-Ghws/edit + def test_asm_waf_integration_identify_requests(self, asm_enabled, header, interface, get_tag, root_span): + import random + import string + + with override_global_config(dict(_asm_enabled=asm_enabled)): + self.update_tracer(interface) + random_value = "".join(random.choices(string.ascii_letters + string.digits, k=random.randint(6, 128))) + response = interface.client.get( + "/", + headers={header: random_value}, + ) + assert response.status_code == 200 + assert self.status(response) == 200 + meta_tagname = "http.request.headers." + header.lower() + if asm_enabled: + assert get_tag(meta_tagname) == random_value + else: + assert get_tag(meta_tagname) is None + def test_global_callback_list_length(self, interface): from ddtrace.appsec import _asm_request_context