From d8491c8b8cc593ff3d0569403bbfc942b1a40ba4 Mon Sep 17 00:00:00 2001
From: Sam Brenner <106700075+sabrenner@users.noreply.github.com>
Date: Thu, 11 Apr 2024 10:17:20 -0400
Subject: [PATCH] chore(langchain): mark additional tests as flaky (#8941)

The following tests have been flagged as flaky in the nightly runs in
CircleCI:

- `test_global_tags`
- `test_openai_llm_sync_with_multiple_prompts`
- `test_openai_chat_model_sync_call_langchain_openai`
- `test_openai_chat_model_sync_generate`
- `test_chat_model_metrics`
- `test_openai_math_chain_sync`
- `test_chain_invoke_dict_input`
- `test_chain_invoke_str_input`
- `test_openai_sequential_chain_with_multiple_llm_sync`
- `test_chain_logs`
- `test_llm_logs_when_response_not_completed`
- `test_lcel_chain_simple`
- `test_lcel_chain_batch_311`
- `test_lcel_chain_nested`
- `test_llmobs_openai_llm`
- `test_llmobs_openai_chat_model`
- `test_llmobs_openai_chat_model_custom_role`

Marking these as flaky as well until we can narrow down exactly why
these are flaky (it seems on Python 3.11 specifically)

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
---
 .../langchain/test_langchain_community.py        | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/contrib/langchain/test_langchain_community.py b/tests/contrib/langchain/test_langchain_community.py
index 202c197ed05..72645d68333 100644
--- a/tests/contrib/langchain/test_langchain_community.py
+++ b/tests/contrib/langchain/test_langchain_community.py
@@ -30,6 +30,7 @@ def request_vcr():
     yield get_request_vcr(subdirectory_name="langchain_community")
 
 
+@flaky(1735812000)
 @pytest.mark.parametrize("ddtrace_config_langchain", [dict(logs_enabled=True, log_prompt_completion_sample_rate=1.0)])
 def test_global_tags(
     ddtrace_config_langchain, langchain, langchain_openai, request_vcr, mock_metrics, mock_logs, mock_tracer
@@ -88,6 +89,7 @@ def test_openai_llm_sync(langchain, langchain_openai, request_vcr):
         llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?")
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_llm_sync_multiple_prompts(langchain, langchain_openai, request_vcr):
     llm = langchain_openai.OpenAI()
@@ -218,6 +220,7 @@ def test_llm_logs(
     mock_metrics.count.assert_not_called()
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_chat_model_sync_call_langchain_openai(langchain, langchain_openai, request_vcr):
     chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
@@ -225,6 +228,7 @@ def test_openai_chat_model_sync_call_langchain_openai(langchain, langchain_opena
         chat.invoke(input=[langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")])
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_chat_model_sync_generate(langchain, langchain_openai, request_vcr):
     chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
@@ -318,6 +322,7 @@ async def test_openai_chat_model_async_stream(langchain, langchain_openai, reque
         await chat.agenerate([[langchain.schema.HumanMessage(content="What is the secret Krabby Patty recipe?")]])
 
 
+@flaky(1735812000)
 def test_chat_model_metrics(langchain, langchain_openai, request_vcr, mock_metrics, mock_logs, snapshot_tracer):
     chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
     with request_vcr.use_cassette("openai_chat_completion_sync_call.yaml"):
@@ -475,6 +480,7 @@ def test_embedding_logs(langchain_openai, ddtrace_config_langchain, request_vcr,
     mock_metrics.count.assert_not_called()
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_math_chain_sync(langchain, langchain_openai, request_vcr):
     """
@@ -486,6 +492,7 @@ def test_openai_math_chain_sync(langchain, langchain_openai, request_vcr):
         chain.invoke("what is two raised to the fifty-fourth power?")
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain_community.test_chain_invoke")
 def test_chain_invoke_dict_input(langchain, langchain_openai, request_vcr):
     prompt_template = "what is {base} raised to the fifty-fourth power?"
@@ -495,6 +502,7 @@ def test_chain_invoke_dict_input(langchain, langchain_openai, request_vcr):
         chain.invoke(input={"base": "two"})
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain_community.test_chain_invoke")
 def test_chain_invoke_str_input(langchain, langchain_openai, request_vcr):
     prompt_template = "what is {base} raised to the fifty-fourth power?"
@@ -582,6 +590,7 @@ def _transform_func(inputs):
         sequential_chain.invoke({"text": input_text, "style": "a 90s rapper"})
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_sequential_chain_with_multiple_llm_sync(langchain, langchain_openai, request_vcr):
     template = """Paraphrase this text:
@@ -661,6 +670,7 @@ async def test_openai_sequential_chain_with_multiple_llm_async(langchain, langch
         await sequential_chain.ainvoke({"input_text": input_text})
 
 
+@flaky(1735812000)
 @pytest.mark.parametrize(
     "ddtrace_config_langchain",
     [dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -984,6 +994,7 @@ def test_openai_service_name(
     assert err == b""
 
 
+@flaky(1735812000)
 @pytest.mark.parametrize(
     "ddtrace_config_langchain",
     [dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -1102,6 +1113,7 @@ def test_embedding_logs_when_response_not_completed(
     )
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_lcel_chain_simple(langchain_core, langchain_openai, request_vcr):
     prompt = langchain_core.prompts.ChatPromptTemplate.from_messages(
@@ -1189,6 +1201,7 @@ def test_lcel_chain_batch_311(langchain_core, langchain_openai, request_vcr):
         chain.batch(["chickens", "cows", "pigs"])
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_lcel_chain_nested(langchain_core, langchain_openai, request_vcr):
     """
@@ -1303,6 +1316,7 @@ def _test_llmobs_invoke(
         assert mock_llmobs_writer.enqueue.call_count == 1
         mock_llmobs_writer.assert_has_calls(expected_llmobs_writer_calls)
 
+    @flaky(1735812000)
     def test_llmobs_openai_llm(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
         llm = langchain_openai.OpenAI()
 
@@ -1339,6 +1353,7 @@ def test_llmobs_ai21_llm(self, langchain_community, mock_llmobs_writer, mock_tra
             cassette_name="ai21_completion_sync.yaml",
         )
 
+    @flaky(1735812000)
     def test_llmobs_openai_chat_model(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
         chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
 
@@ -1353,6 +1368,7 @@ def test_llmobs_openai_chat_model(self, langchain_openai, mock_llmobs_writer, mo
             output_role="assistant",
         )
 
+    @flaky(1735812000)
     def test_llmobs_openai_chat_model_custom_role(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
         chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)