Skip to content

Commit

Permalink
chore(langchain): mark additional tests as flaky (#8941)
Browse files Browse the repository at this point in the history
The following tests have been flagged as flaky in the nightly runs in
CircleCI:

- `test_global_tags`
- `test_openai_llm_sync_with_multiple_prompts`
- `test_openai_chat_model_sync_call_langchain_openai`
- `test_openai_chat_model_sync_generate`
- `test_chat_model_metrics`
- `test_openai_math_chain_sync`
- `test_chain_invoke_dict_input`
- `test_chain_invoke_str_input`
- `test_openai_sequential_chain_with_multiple_llm_sync`
- `test_chain_logs`
- `test_llm_logs_when_response_not_completed`
- `test_lcel_chain_simple`
- `test_lcel_chain_batch_311`
- `test_lcel_chain_nested`
- `test_llmobs_openai_llm`
- `test_llmobs_openai_chat_model`
- `test_llmobs_openai_chat_model_custom_role`

Marking these as flaky as well until we can narrow down exactly why
these are flaky (it seems on Python 3.11 specifically)

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
sabrenner authored Apr 11, 2024
1 parent 9a40868 commit d8491c8
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions tests/contrib/langchain/test_langchain_community.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def request_vcr():
yield get_request_vcr(subdirectory_name="langchain_community")


@flaky(1735812000)
@pytest.mark.parametrize("ddtrace_config_langchain", [dict(logs_enabled=True, log_prompt_completion_sample_rate=1.0)])
def test_global_tags(
ddtrace_config_langchain, langchain, langchain_openai, request_vcr, mock_metrics, mock_logs, mock_tracer
Expand Down Expand Up @@ -88,6 +89,7 @@ def test_openai_llm_sync(langchain, langchain_openai, request_vcr):
llm.invoke("Can you explain what Descartes meant by 'I think, therefore I am'?")


@flaky(1735812000)
@pytest.mark.snapshot
def test_openai_llm_sync_multiple_prompts(langchain, langchain_openai, request_vcr):
llm = langchain_openai.OpenAI()
Expand Down Expand Up @@ -218,13 +220,15 @@ def test_llm_logs(
mock_metrics.count.assert_not_called()


@flaky(1735812000)
@pytest.mark.snapshot
def test_openai_chat_model_sync_call_langchain_openai(langchain, langchain_openai, request_vcr):
chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
with request_vcr.use_cassette("openai_chat_completion_sync_call.yaml"):
chat.invoke(input=[langchain.schema.HumanMessage(content="When do you use 'whom' instead of 'who'?")])


@flaky(1735812000)
@pytest.mark.snapshot
def test_openai_chat_model_sync_generate(langchain, langchain_openai, request_vcr):
chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
Expand Down Expand Up @@ -318,6 +322,7 @@ async def test_openai_chat_model_async_stream(langchain, langchain_openai, reque
await chat.agenerate([[langchain.schema.HumanMessage(content="What is the secret Krabby Patty recipe?")]])


@flaky(1735812000)
def test_chat_model_metrics(langchain, langchain_openai, request_vcr, mock_metrics, mock_logs, snapshot_tracer):
chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)
with request_vcr.use_cassette("openai_chat_completion_sync_call.yaml"):
Expand Down Expand Up @@ -475,6 +480,7 @@ def test_embedding_logs(langchain_openai, ddtrace_config_langchain, request_vcr,
mock_metrics.count.assert_not_called()


@flaky(1735812000)
@pytest.mark.snapshot
def test_openai_math_chain_sync(langchain, langchain_openai, request_vcr):
"""
Expand All @@ -486,6 +492,7 @@ def test_openai_math_chain_sync(langchain, langchain_openai, request_vcr):
chain.invoke("what is two raised to the fifty-fourth power?")


@flaky(1735812000)
@pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain_community.test_chain_invoke")
def test_chain_invoke_dict_input(langchain, langchain_openai, request_vcr):
prompt_template = "what is {base} raised to the fifty-fourth power?"
Expand All @@ -495,6 +502,7 @@ def test_chain_invoke_dict_input(langchain, langchain_openai, request_vcr):
chain.invoke(input={"base": "two"})


@flaky(1735812000)
@pytest.mark.snapshot(token="tests.contrib.langchain.test_langchain_community.test_chain_invoke")
def test_chain_invoke_str_input(langchain, langchain_openai, request_vcr):
prompt_template = "what is {base} raised to the fifty-fourth power?"
Expand Down Expand Up @@ -582,6 +590,7 @@ def _transform_func(inputs):
sequential_chain.invoke({"text": input_text, "style": "a 90s rapper"})


@flaky(1735812000)
@pytest.mark.snapshot
def test_openai_sequential_chain_with_multiple_llm_sync(langchain, langchain_openai, request_vcr):
template = """Paraphrase this text:
Expand Down Expand Up @@ -661,6 +670,7 @@ async def test_openai_sequential_chain_with_multiple_llm_async(langchain, langch
await sequential_chain.ainvoke({"input_text": input_text})


@flaky(1735812000)
@pytest.mark.parametrize(
"ddtrace_config_langchain",
[dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
Expand Down Expand Up @@ -984,6 +994,7 @@ def test_openai_service_name(
assert err == b""


@flaky(1735812000)
@pytest.mark.parametrize(
"ddtrace_config_langchain",
[dict(metrics_enabled=False, logs_enabled=True, log_prompt_completion_sample_rate=1.0)],
Expand Down Expand Up @@ -1102,6 +1113,7 @@ def test_embedding_logs_when_response_not_completed(
)


@flaky(1735812000)
@pytest.mark.snapshot
def test_lcel_chain_simple(langchain_core, langchain_openai, request_vcr):
prompt = langchain_core.prompts.ChatPromptTemplate.from_messages(
Expand Down Expand Up @@ -1189,6 +1201,7 @@ def test_lcel_chain_batch_311(langchain_core, langchain_openai, request_vcr):
chain.batch(["chickens", "cows", "pigs"])


@flaky(1735812000)
@pytest.mark.snapshot
def test_lcel_chain_nested(langchain_core, langchain_openai, request_vcr):
"""
Expand Down Expand Up @@ -1303,6 +1316,7 @@ def _test_llmobs_invoke(
assert mock_llmobs_writer.enqueue.call_count == 1
mock_llmobs_writer.assert_has_calls(expected_llmobs_writer_calls)

@flaky(1735812000)
def test_llmobs_openai_llm(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
llm = langchain_openai.OpenAI()

Expand Down Expand Up @@ -1339,6 +1353,7 @@ def test_llmobs_ai21_llm(self, langchain_community, mock_llmobs_writer, mock_tra
cassette_name="ai21_completion_sync.yaml",
)

@flaky(1735812000)
def test_llmobs_openai_chat_model(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)

Expand All @@ -1353,6 +1368,7 @@ def test_llmobs_openai_chat_model(self, langchain_openai, mock_llmobs_writer, mo
output_role="assistant",
)

@flaky(1735812000)
def test_llmobs_openai_chat_model_custom_role(self, langchain_openai, mock_llmobs_writer, mock_tracer, request_vcr):
chat = langchain_openai.ChatOpenAI(temperature=0, max_tokens=256)

Expand Down

0 comments on commit d8491c8

Please sign in to comment.