From 2df6407c85ddce2d014f8cbc2f3441bfe8b08b99 Mon Sep 17 00:00:00 2001
From: jakerachleff <jake@langchain.dev>
Date: Thu, 14 Nov 2024 16:17:24 -0800
Subject: [PATCH] perf: cut down cpu time of aevaluate by 30% on 1-4MB examples
 with this one trick (#1217)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A huge percentage of our `aevaluate` calls were trying to serialize part
of the example to send in the evaluator trace (where we used to show the
first 10k characters). Let's just not do that.

## Experiment view before and after
<img width="1457" alt="Screenshot 2024-11-14 at 3 22 21 PM"
src="https://github.com/user-attachments/assets/5003ffe4-58ce-4e60-8854-a6616d2b8d36">
<img width="1366" alt="Screenshot 2024-11-14 at 3 23 06 PM"
src="https://github.com/user-attachments/assets/65fc0ef3-2c77-4f5e-b4df-ca543408a03e">


## flame graphs before and after
<img width="1509" alt="Screenshot 2024-11-14 at 2 43 36 PM"
src="https://github.com/user-attachments/assets/01eb0a2b-fb14-49ba-ae0c-90f5eb516fd9">

<img width="1510" alt="Screenshot 2024-11-14 at 3 32 50 PM"
src="https://github.com/user-attachments/assets/dea47939-ac99-4cdc-bd18-e698511f8bad">


## Benchmarking code
```
async def run_abenchmark(
    n_examples=200,
    min_size=1000000,
    max_size=4000000,
    n_evaluators=5,
    min_llm_time=0.2,
    max_llm_time=1.2,
    n_concurrency=None
):

    # setup dataset
    inputs = [
        {"key": "a" * randint(min_size / 2, max_size / 2)}
        for _ in range(n_examples)
    ]
    outputs = [
        {"key": "b" * randint(min_size / 2, max_size / 2)}
        for _ in range(n_examples)
    ]

    if ls_client.has_dataset(dataset_name="jake_benchmarking"):
        ls_client.delete_dataset(dataset_name="jake_benchmarking")

    print("Creating dataset...")
    dataset = ls_client.create_dataset("jake_benchmarking")

    print("Uploading examples...")
    for i in range(0, n_examples, UPLOAD_BATCH_SIZE):
        ls_client.create_examples(
            dataset_id=dataset.id,
            inputs=inputs[i:i+UPLOAD_BATCH_SIZE],
            outputs=outputs[i:i+UPLOAD_BATCH_SIZE]
        )

    # setup evaluators
    evaluators = []
    for i in range(n_evaluators):
        evaluators.append(create_aevaluator(f"jake_benchmarking_{i}", uniform(min_llm_time, max_llm_time)))

    async def target(input):
        await asyncio.sleep(uniform(min_llm_time, max_llm_time))
        return {"value": "b" * len(input["key"])}

    print("Running evaluation...")
    await aevaluate(
        target,
        data=dataset.id,
        evaluators=evaluators,
        max_concurrency=n_concurrency,
        client=ls_client
    )
```
---
 python/langsmith/schemas.py | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 80e112e46..dff6d212a 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -124,6 +124,10 @@ def url(self) -> Optional[str]:
             return f"{self._host_url}{path}"
         return None
 
+    def __repr__(self):
+        """Return a string representation of the RunBase object."""
+        return f"{self.__class__}(id={self.id}, dataset_id={self.dataset_id}, link='{self.url}')"
+
 
 class ExampleSearch(ExampleBase):
     """Example returned via search."""