Skip to content

Commit

Permalink
fix(tracing): only extract distributed headers if a trace is not alre…
Browse files Browse the repository at this point in the history
…ady started (#9456)

Today there are some integrations which always check for distributed
tracing headers and if found will activate them as the current/parent
context. This blind approach does not take into account if a parent
trace has already been started.

This means if you have nested Flask apps (for example), then the child
Flask app may make it's parent the upstream remote service, and not the
first/initial Flask app in the process. This can result in a broken
trace.

The scenario which "works" today which will be broken with this change
is if there is "broken" instrumentation as the first span in a trace.
This could either be custom instrumentation that did not check for
distributed headers, or a built-in integration which is not properly
parsing/activating distributed tracing headers.

This change would prioritize keeping the local trace accurate over
ensuring a connection with the upstream calling service.


## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [ ] Title is accurate
- [ ] All changes are related to the pull request's stated goal
- [ ] Description motivates each change
- [ ] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [ ] Testing strategy adequately addresses listed risks
- [ ] Change is maintainable (easy to change, telemetry, documentation)
- [ ] Release note makes sense to a user of the library
- [ ] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [ ] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: Federico Mon <[email protected]>
Co-authored-by: Munir Abdinur <[email protected]>
Co-authored-by: Andrew Lock <[email protected]>
Co-authored-by: Zachary Groves <[email protected]>
Co-authored-by: Yun Kim <[email protected]>
Co-authored-by: Alberto Vara <[email protected]>
Co-authored-by: Christophe Papazian <[email protected]>
  • Loading branch information
8 people authored Jan 17, 2025
1 parent 4084e8a commit 6141812
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 0 deletions.
12 changes: 12 additions & 0 deletions ddtrace/contrib/trace_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,18 @@ def activate_distributed_headers(tracer, int_config=None, request_headers=None,
if override is False:
return None

# Only extract and activate if we don't already have an activate span
# DEV: Only do this if there is an active Span, an active Context is fine to override
# DEV: Use _DD_TRACE_EXTRACT_IGNORE_ACTIVE_SPAN env var to override the default behavior
current_span = tracer.current_span()
if current_span and not config._extract_ignore_active_span:
log.debug(
"will not extract distributed headers, a Span(trace_id%d, span_id=%d) is already active",
current_span.trace_id,
current_span.span_id,
)
return

if override or (int_config and distributed_tracing_enabled(int_config)):
context = HTTPPropagator.extract(request_headers)

Expand Down
3 changes: 3 additions & 0 deletions ddtrace/settings/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,9 @@ def __init__(self):

self._propagation_extract_first = _get_config("DD_TRACE_PROPAGATION_EXTRACT_FIRST", False, asbool)

# When True any active span is ignored when extracting trace context from headers
self._extract_ignore_active_span = asbool(os.getenv("_DD_TRACE_EXTRACT_IGNORE_ACTIVE_SPAN", False))

# Datadog tracer tags propagation
x_datadog_tags_max_length = _get_config("DD_TRACE_X_DATADOG_TAGS_MAX_LENGTH", 512, int)
if x_datadog_tags_max_length < 0:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
tracing: Fix scenarios when distributed tracing headers would be extracted and possibly activated when a trace was already started.
75 changes: 75 additions & 0 deletions tests/contrib/wsgi/test_wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ddtrace.contrib.internal.wsgi.wsgi import _DDWSGIMiddlewareBase
from ddtrace.contrib.internal.wsgi.wsgi import get_request_headers
from tests.utils import override_config
from tests.utils import override_global_config
from tests.utils import override_http_config
from tests.utils import snapshot

Expand Down Expand Up @@ -411,3 +412,77 @@ def test_schematization(ddtrace_run_python_code_in_subprocess, service_name, sch
env["DD_SERVICE"] = service_name
_, stderr, status, _ = ddtrace_run_python_code_in_subprocess(code, env=env)
assert status == 0, stderr


def test_distributed_tracing_existing_parent(tracer, test_spans):
"""We should not parse and activate distributed context if there is already an active span"""

# Middleware that starts a root trace, but doesn't parse headers
def broken_middleware(app, tracer):
def middleware(environ, start_response):
with tracer.trace("broken_middleware"):
return app(environ, start_response)

return middleware

app = TestApp(broken_middleware(DDWSGIMiddleware(application, tracer=tracer), tracer))
resp = app.get("/", headers={"X-Datadog-Parent-Id": "1234", "X-Datadog-Trace-Id": "4321"})

assert config.wsgi.distributed_tracing is True
assert resp.status == "200 OK"
assert resp.status_int == 200

spans = test_spans.pop()
assert len(spans) == 5

# The root should NOT inherit from distributed headers
root = spans[0]
assert root.name == "broken_middleware"
assert root.trace_id != 4321
assert root.parent_id != 1234

# The rest of the spans should inherit from the root
for span in spans[1:]:
assert span.trace_id == root.trace_id
if span.name == "wsgi.request":
assert span.parent_id == root.span_id


def test_distributed_tracing_existing_parent_ff_enabled(tracer, test_spans):
"""We should parse and activate distributed context even if there is already an active span"""

# Middleware that starts a root trace, but doesn't parse headers
def broken_middleware(app, tracer):
def middleware(environ, start_response):
with tracer.trace("broken_middleware"):
return app(environ, start_response)

return middleware

# DEV: Default is False (ignore distributed headers when there is an active span)
with override_global_config(dict(_extract_ignore_active_span=True)):
app = TestApp(broken_middleware(DDWSGIMiddleware(application, tracer=tracer), tracer))
resp = app.get("/", headers={"X-Datadog-Parent-Id": "1234", "X-Datadog-Trace-Id": "4321"})

assert config.wsgi.distributed_tracing is True
assert resp.status == "200 OK"
assert resp.status_int == 200

spans = test_spans.pop()
assert len(spans) == 5

# The root should NOT inherit from distributed headers
root = spans[0]
assert root.name == "broken_middleware"
assert root.trace_id != 4321
assert root.parent_id != 1234

# The rest of the spans should inherit from distributed tracing headers
wsgi_request = spans[1]
assert wsgi_request.name == "wsgi.request"
assert wsgi_request.trace_id == 4321
assert wsgi_request.parent_id == 1234

for span in spans[3:]:
assert span.trace_id == wsgi_request.trace_id
assert span.parent_id != root.parent_id
1 change: 1 addition & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def override_global_config(values):
"_llmobs_enabled",
"_llmobs_sample_rate",
"_llmobs_ml_app",
"_extract_ignore_active_span",
"_llmobs_agentless_enabled",
"_data_streams_enabled",
]
Expand Down

0 comments on commit 6141812

Please sign in to comment.