From d8491c8b8cc593ff3d0569403bbfc942b1a40ba4 Mon Sep 17 00:00:00 2001 From: Sam Brenner <106700075+sabrenner@users.noreply.github.com> Date: Thu, 11 Apr 2024 10:17:20 -0400 Subject: [PATCH] chore(langchain): mark additional tests as flaky (#8941) The following tests have been flagged as flaky in the nightly runs in CircleCI: - `test_global_tags` - `test_openai_llm_sync_with_multiple_prompts` - `test_openai_chat_model_sync_call_langchain_openai` - `test_openai_chat_model_sync_generate` - `test_chat_model_metrics` - `test_openai_math_chain_sync` - `test_chain_invoke_dict_input` - `test_chain_invoke_str_input` - `test_openai_sequential_chain_with_multiple_llm_sync` - `test_chain_logs` - `test_llm_logs_when_response_not_completed` - `test_lcel_chain_simple` - `test_lcel_chain_batch_311` - `test_lcel_chain_nested` - `test_llmobs_openai_llm` - `test_llmobs_openai_chat_model` - `test_llmobs_openai_chat_model_custom_role` Marking these as flaky as well until we can narrow down exactly why these are flaky (it seems on Python 3.11 specifically) ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. ## Reviewer Checklist - [x] Title is accurate - [x] All changes are related to the pull request's stated goal - [x] Description motivates each change - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [x] Testing strategy adequately addresses listed risks - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] Release note makes sense to a user of the library - [x] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- .../langchain/test_langchain_community.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/contrib/langchain/test_langchain_community.py b/tests/contrib/langchain/test_langchain_community.py index 202c197ed05..72645d68333 100644 --- a/tests/contrib/langchain/test_langchain_community.py +++ b/tests/contrib/langchain/test_langchain_community.py @@ -30,6 +30,7 @@ def request_vcr(): yield get_request_vcr(subdirectory_name="langchain_community") +@flaky(1735812000) @pytest.mark.parametrize("ddtrace_config_langchain", [dict(logs_enabled=True, log_prompt_completion_sample_rate=1.0)]) def test_global_tags( ddtrace_config_langchain, langchain, langchain_openai, request_vcr, mock_metrics, mock_logs, mock_tracer @@ -88,6 +89,7 @@ def test_openai_llm_sync(langchain, langchain_openai, request_vcr): llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?") +@flaky(1735812000) @pytest.mark.snapshot def test_openai_llm_sync_multiple_prompts(langchain, langchain_openai, request_vcr): llm = langchain_openai.OpenAI() @@ -218,6 +220,7 @@ def test_llm_logs( mock_metrics.count.assert_not_called() +@flaky(1735812000) @pytest.mark.snapshot def test_openai_chat_model_sync_call_langchain_openai(langchain, langchain_openai, request_vcr): chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256) @@ -225,6 +228,7 @@ def test_openai_chat_model_sync_call_langchain_openai(langchain, langchain_opena chat.invoke(input=[langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")]) +@flaky(1735812000) @pytest.mark.snapshot def test_openai_chat_model_sync_generate(langchain, langchain_openai, request_vcr): chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256) @@ -318,6 +322,7 @@ async def test_openai_chat_model_async_stream(langchain, langchain_openai, reque await chat.agenerate([[langchain.schema.HumanMessage(content="What is the secret Krabby Patty recipe?")]]) +@flaky(1735812000) def test_chat_model_metrics(langchain, langchain_openai, request_vcr, mock_metrics, mock_logs, snapshot_tracer): chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256) with request_vcr.use_cassette("openai_chat_completion_sync_call.yaml"): @@ -475,6 +480,7 @@ def test_embedding_logs(langchain_openai, ddtrace_config_langchain, request_vcr, mock_metrics.count.assert_not_called() +@flaky(1735812000) @pytest.mark.snapshot def test_openai_math_chain_sync(langchain, langchain_openai, request_vcr): """ @@ -486,6 +492,7 @@ def test_openai_math_chain_sync(langchain, langchain_openai, request_vcr): chain.invoke("what is two raised to the fifty-fourth power?") +@flaky(1735812000) @pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain_community.test_chain_invoke") def test_chain_invoke_dict_input(langchain, langchain_openai, request_vcr): prompt_template = "what is {base} raised to the fifty-fourth power?" @@ -495,6 +502,7 @@ def test_chain_invoke_dict_input(langchain, langchain_openai, request_vcr): chain.invoke(input={"base": "two"}) +@flaky(1735812000) @pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain_community.test_chain_invoke") def test_chain_invoke_str_input(langchain, langchain_openai, request_vcr): prompt_template = "what is {base} raised to the fifty-fourth power?" @@ -582,6 +590,7 @@ def _transform_func(inputs): sequential_chain.invoke({"text": input_text, "style": "a 90s rapper"}) +@flaky(1735812000) @pytest.mark.snapshot def test_openai_sequential_chain_with_multiple_llm_sync(langchain, langchain_openai, request_vcr): template = """Paraphrase this text: @@ -661,6 +670,7 @@ async def test_openai_sequential_chain_with_multiple_llm_async(langchain, langch await sequential_chain.ainvoke({"input_text": input_text}) +@flaky(1735812000) @pytest.mark.parametrize( "ddtrace_config_langchain", [dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)], @@ -984,6 +994,7 @@ def test_openai_service_name( assert err == b"" +@flaky(1735812000) @pytest.mark.parametrize( "ddtrace_config_langchain", [dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)], @@ -1102,6 +1113,7 @@ def test_embedding_logs_when_response_not_completed( ) +@flaky(1735812000) @pytest.mark.snapshot def test_lcel_chain_simple(langchain_core, langchain_openai, request_vcr): prompt = langchain_core.prompts.ChatPromptTemplate.from_messages( @@ -1189,6 +1201,7 @@ def test_lcel_chain_batch_311(langchain_core, langchain_openai, request_vcr): chain.batch(["chickens", "cows", "pigs"]) +@flaky(1735812000) @pytest.mark.snapshot def test_lcel_chain_nested(langchain_core, langchain_openai, request_vcr): """ @@ -1303,6 +1316,7 @@ def _test_llmobs_invoke( assert mock_llmobs_writer.enqueue.call_count == 1 mock_llmobs_writer.assert_has_calls(expected_llmobs_writer_calls) + @flaky(1735812000) def test_llmobs_openai_llm(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr): llm = langchain_openai.OpenAI() @@ -1339,6 +1353,7 @@ def test_llmobs_ai21_llm(self, langchain_community, mock_llmobs_writer, mock_tra cassette_name="ai21_completion_sync.yaml", ) + @flaky(1735812000) def test_llmobs_openai_chat_model(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr): chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256) @@ -1353,6 +1368,7 @@ def test_llmobs_openai_chat_model(self, langchain_openai, mock_llmobs_writer, mo output_role="assistant", ) + @flaky(1735812000) def test_llmobs_openai_chat_model_custom_role(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr): chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)