From 698021b1be1b0e5cd37261b17c6b4f1e202154fa Mon Sep 17 00:00:00 2001
From: Yun Kim <35776586+Yun-Kim@users.noreply.github.com>
Date: Mon, 8 Apr 2024 14:22:35 -0400
Subject: [PATCH] fix(langchain): safely check if instance is openai llm/chat
 (#8896)

Fixes #8889.

This PR adds a safe check for if a traced LLM/Chat model instance is an
OpenAI instance by adding try/catch and reversing the order of the type
checks. Previously we were checking directly against the installed
langchain package, i.e.
```python
if isinstance(instance, BASE_LANGCHAIN_MODULE.chat_models.ChatOpenAI) or (
            langchain_openai and isinstance(instance, langchain_openai.ChatOpenAI)
        ):
# BASE_LANGCHAIN_MODULE can be either `langchain` or `langchain_community`
```
But `langchain_community` does not allow automatically accessing its
submodules, i.e. `langchain_community.chat_models.ChatOpenAI` will
result in an error unless `from langchain_community import chat_models`
is performed already.

With this fix, there are three scenarios for
`langchain_community/langchain_openai` users:
1. They use `langchain_openai` --> we perform the type check using
`langchain_openai` first which will always be available, and will never
hit the `BASE_LANGCHAIN_MODULE` type check.
2. They use `langchain_community` --> since users are using
`langchain_community.chat_models` they must have already imported this
in their code and it should not result in any errors. Regardless, we
will safely try type checking against the submodule.
3. They use `langchain` --> `langchain` allows automatically accessing
submodules without directly importing, so this should also not result in
any errors.

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

Co-authored-by: Alberto Vara <alberto.vara@datadoghq.com>
---
 ddtrace/contrib/langchain/patch.py            | 42 ++++++++++++-------
 ...enai-submodule-check-ea81685e31cb9720.yaml |  5 +++
 2 files changed, 33 insertions(+), 14 deletions(-)
 create mode 100644 releasenotes/notes/fix-langchain-openai-submodule-check-ea81685e31cb9720.yaml

diff --git a/ddtrace/contrib/langchain/patch.py b/ddtrace/contrib/langchain/patch.py
index 5ed7e028722..e903b90d279 100644
--- a/ddtrace/contrib/langchain/patch.py
+++ b/ddtrace/contrib/langchain/patch.py
@@ -145,6 +145,30 @@ def _tag_openai_token_usage(
         _tag_openai_token_usage(span._parent, llm_output, propagated_cost=propagated_cost + total_cost, propagate=True)
 
 
+def _is_openai_llm_instance(instance):
+    """Safely check if a traced instance is an OpenAI LLM.
+    langchain_community does not automatically import submodules which may result in AttributeErrors.
+    """
+    try:
+        if langchain_openai:
+            return isinstance(instance, langchain_openai.OpenAI)
+        return isinstance(instance, BASE_LANGCHAIN_MODULE.llms.OpenAI)
+    except (AttributeError, ModuleNotFoundError, ImportError):
+        return False
+
+
+def _is_openai_chat_instance(instance):
+    """Safely check if a traced instance is an OpenAI Chat Model.
+    langchain_community does not automatically import submodules which may result in AttributeErrors.
+    """
+    try:
+        if langchain_openai:
+            return isinstance(instance, langchain_openai.ChatOpenAI)
+        return isinstance(instance, BASE_LANGCHAIN_MODULE.chat_models.ChatOpenAI)
+    except (AttributeError, ModuleNotFoundError, ImportError):
+        return False
+
+
 @with_traced_module
 def traced_llm_generate(langchain, pin, func, instance, args, kwargs):
     llm_provider = instance._llm_type
@@ -173,9 +197,7 @@ def traced_llm_generate(langchain, pin, func, instance, args, kwargs):
                 span.set_tag_str("langchain.request.%s.parameters.%s" % (llm_provider, param), str(val))
 
         completions = func(*args, **kwargs)
-        if isinstance(instance, BASE_LANGCHAIN_MODULE.llms.OpenAI) or (
-            langchain_openai and isinstance(instance, langchain_openai.OpenAI)
-        ):
+        if _is_openai_llm_instance(instance):
             _tag_openai_token_usage(span, completions.llm_output)
             integration.record_usage(span, completions.llm_output)
 
@@ -253,9 +275,7 @@ async def traced_llm_agenerate(langchain, pin, func, instance, args, kwargs):
                 span.set_tag_str("langchain.request.%s.parameters.%s" % (llm_provider, param), str(val))
 
         completions = await func(*args, **kwargs)
-        if isinstance(instance, BASE_LANGCHAIN_MODULE.llms.OpenAI) or (
-            langchain_openai and isinstance(instance, langchain_openai.OpenAI)
-        ):
+        if _is_openai_llm_instance(instance):
             _tag_openai_token_usage(span, completions.llm_output)
             integration.record_usage(span, completions.llm_output)
 
@@ -346,9 +366,7 @@ def traced_chat_model_generate(langchain, pin, func, instance, args, kwargs):
                 span.set_tag_str("langchain.request.%s.parameters.%s" % (llm_provider, param), str(val))
 
         chat_completions = func(*args, **kwargs)
-        if isinstance(instance, BASE_LANGCHAIN_MODULE.chat_models.ChatOpenAI) or (
-            langchain_openai and isinstance(instance, langchain_openai.ChatOpenAI)
-        ):
+        if _is_openai_chat_instance(instance):
             _tag_openai_token_usage(span, chat_completions.llm_output)
             integration.record_usage(span, chat_completions.llm_output)
 
@@ -453,9 +471,7 @@ async def traced_chat_model_agenerate(langchain, pin, func, instance, args, kwar
                 span.set_tag_str("langchain.request.%s.parameters.%s" % (llm_provider, param), str(val))
 
         chat_completions = await func(*args, **kwargs)
-        if isinstance(instance, BASE_LANGCHAIN_MODULE.chat_models.ChatOpenAI) or (
-            langchain_openai and isinstance(instance, langchain_openai.ChatOpenAI)
-        ):
+        if _is_openai_chat_instance(instance):
             _tag_openai_token_usage(span, chat_completions.llm_output)
             integration.record_usage(span, chat_completions.llm_output)
 
@@ -842,9 +858,7 @@ def patch():
     # ref: https://github.com/DataDog/dd-trace-py/issues/7123
     if SHOULD_PATCH_LANGCHAIN_COMMUNITY:
         from langchain.chains.base import Chain  # noqa:F401
-        from langchain_community import chat_models  # noqa:F401
         from langchain_community import embeddings  # noqa:F401
-        from langchain_community import llms  # noqa:F401
         from langchain_community import vectorstores  # noqa:F401
 
         wrap("langchain_core", "language_models.llms.BaseLLM.generate", traced_llm_generate(langchain))
diff --git a/releasenotes/notes/fix-langchain-openai-submodule-check-ea81685e31cb9720.yaml b/releasenotes/notes/fix-langchain-openai-submodule-check-ea81685e31cb9720.yaml
new file mode 100644
index 00000000000..361f1aef79a
--- /dev/null
+++ b/releasenotes/notes/fix-langchain-openai-submodule-check-ea81685e31cb9720.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+  - |
+    langchain: This fix adds error handling for checking if a traced LLM or chat model is an OpenAI instance, as the 
+    langchain_community package does not allow automatic submodule importing.