chore(langchain): mark additional tests as flaky (#8941)

The following tests have been flagged as flaky in the nightly runs in CircleCI: - `test_global_tags` - `test_openai_llm_sync_with_multiple_prompts` - `test_openai_chat_model_sync_call_langchain_openai` - `test_openai_chat_model_sync_generate` - `test_chat_model_metrics` - `test_openai_math_chain_sync` - `test_chain_invoke_dict_input` - `test_chain_invoke_str_input` - `test_openai_sequential_chain_with_multiple_llm_sync` - `test_chain_logs` - `test_llm_logs_when_response_not_completed` - `test_lcel_chain_simple` - `test_lcel_chain_batch_311` - `test_lcel_chain_nested` - `test_llmobs_openai_llm` - `test_llmobs_openai_chat_model` - `test_llmobs_openai_chat_model_custom_role` Marking these as flaky as well until we can narrow down exactly why these are flaky (it seems on Python 3.11 specifically) ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
DataDog · Apr 11, 2024 · d8491c8 · d8491c8
1 parent 9a40868
commit d8491c8
Showing 1 changed file with 16 additions and 0 deletions.
diff --git a/tests/contrib/langchain/test_langchain_community.py b/tests/contrib/langchain/test_langchain_community.py
@@ -30,6 +30,7 @@ def request_vcr():
     yield get_request_vcr(subdirectory_name="langchain_community")
 
 
+@flaky(1735812000)
 @pytest.mark.parametrize("ddtrace_config_langchain", [dict(logs_enabled=True, log_prompt_completion_sample_rate=1.0)])
 def test_global_tags(
     ddtrace_config_langchain, langchain, langchain_openai, request_vcr, mock_metrics, mock_logs, mock_tracer
@@ -88,6 +89,7 @@ def test_openai_llm_sync(langchain, langchain_openai, request_vcr):
         llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?")
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_llm_sync_multiple_prompts(langchain, langchain_openai, request_vcr):
     llm = langchain_openai.OpenAI()
@@ -218,13 +220,15 @@ def test_llm_logs(
     mock_metrics.count.assert_not_called()
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_chat_model_sync_call_langchain_openai(langchain, langchain_openai, request_vcr):
     chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
     with request_vcr.use_cassette("openai_chat_completion_sync_call.yaml"):
         chat.invoke(input=[langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")])
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_chat_model_sync_generate(langchain, langchain_openai, request_vcr):
     chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
@@ -318,6 +322,7 @@ async def test_openai_chat_model_async_stream(langchain, langchain_openai, reque
         await chat.agenerate([[langchain.schema.HumanMessage(content="What is the secret Krabby Patty recipe?")]])
 
 
+@flaky(1735812000)
 def test_chat_model_metrics(langchain, langchain_openai, request_vcr, mock_metrics, mock_logs, snapshot_tracer):
     chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
     with request_vcr.use_cassette("openai_chat_completion_sync_call.yaml"):
@@ -475,6 +480,7 @@ def test_embedding_logs(langchain_openai, ddtrace_config_langchain, request_vcr,
     mock_metrics.count.assert_not_called()
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_math_chain_sync(langchain, langchain_openai, request_vcr):
     """
@@ -486,6 +492,7 @@ def test_openai_math_chain_sync(langchain, langchain_openai, request_vcr):
         chain.invoke("what is two raised to the fifty-fourth power?")
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain_community.test_chain_invoke")
 def test_chain_invoke_dict_input(langchain, langchain_openai, request_vcr):
     prompt_template = "what is {base} raised to the fifty-fourth power?"
@@ -495,6 +502,7 @@ def test_chain_invoke_dict_input(langchain, langchain_openai, request_vcr):
         chain.invoke(input={"base": "two"})
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain_community.test_chain_invoke")
 def test_chain_invoke_str_input(langchain, langchain_openai, request_vcr):
     prompt_template = "what is {base} raised to the fifty-fourth power?"
@@ -582,6 +590,7 @@ def _transform_func(inputs):
         sequential_chain.invoke({"text": input_text, "style": "a 90s rapper"})
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_openai_sequential_chain_with_multiple_llm_sync(langchain, langchain_openai, request_vcr):
     template = """Paraphrase this text:
@@ -661,6 +670,7 @@ async def test_openai_sequential_chain_with_multiple_llm_async(langchain, langch
         await sequential_chain.ainvoke({"input_text": input_text})
 
 
+@flaky(1735812000)
 @pytest.mark.parametrize(
     "ddtrace_config_langchain",
     [dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -984,6 +994,7 @@ def test_openai_service_name(
     assert err == b""
 
 
+@flaky(1735812000)
 @pytest.mark.parametrize(
     "ddtrace_config_langchain",
     [dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
@@ -1102,6 +1113,7 @@ def test_embedding_logs_when_response_not_completed(
     )
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_lcel_chain_simple(langchain_core, langchain_openai, request_vcr):
     prompt = langchain_core.prompts.ChatPromptTemplate.from_messages(
@@ -1189,6 +1201,7 @@ def test_lcel_chain_batch_311(langchain_core, langchain_openai, request_vcr):
         chain.batch(["chickens", "cows", "pigs"])
 
 
+@flaky(1735812000)
 @pytest.mark.snapshot
 def test_lcel_chain_nested(langchain_core, langchain_openai, request_vcr):
     """
@@ -1303,6 +1316,7 @@ def _test_llmobs_invoke(
         assert mock_llmobs_writer.enqueue.call_count == 1
         mock_llmobs_writer.assert_has_calls(expected_llmobs_writer_calls)
 
+    @flaky(1735812000)
     def test_llmobs_openai_llm(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
         llm = langchain_openai.OpenAI()
 
@@ -1339,6 +1353,7 @@ def test_llmobs_ai21_llm(self, langchain_community, mock_llmobs_writer, mock_tra
             cassette_name="ai21_completion_sync.yaml",
         )
 
+    @flaky(1735812000)
     def test_llmobs_openai_chat_model(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
         chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
 
@@ -1353,6 +1368,7 @@ def test_llmobs_openai_chat_model(self, langchain_openai, mock_llmobs_writer, mo
             output_role="assistant",
         )
 
+    @flaky(1735812000)
     def test_llmobs_openai_chat_model_custom_role(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
         chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)