langchain-ai · obi1kenobi · Jan 10, 2025 · Jan 10, 2025
diff --git a/python/langsmith/_internal/_serde.py b/python/langsmith/_internal/_serde.py
@@ -1,4 +1,4 @@
 from __future__ import annotations

 import base64
 import collections
@@ -146,7 +146,7 @@
         logger.debug(f"Orjson serialization failed: {repr(e)}. Falling back to json.")
         result = json.dumps(
             obj,
-            default=_simple_default,
+            default=_serialize_json,
             ensure_ascii=True,
         ).encode("utf-8")
         try:

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
@@ -19,6 +19,7 @@
 from pydantic import BaseModel
 from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
 
+from langsmith._internal._serde import dumps_json
 from langsmith.client import ID_TYPE, Client
 from langsmith.evaluation import aevaluate, evaluate
 from langsmith.schemas import (
@@ -1155,6 +1156,37 @@ def test_surrogates():
     )
 
 
+def test_fallback_json_serialization():
+    class Document(BaseModel):
+        content: str
+
+    raw_surrogates = [
+        ("Hello\ud83d\ude00", "Hello😀"),
+        ("Python\ud83d\udc0d", "Python🐍"),
+        ("Surrogate\ud834\udd1e", "Surrogate𝄞"),
+        ("Example\ud83c\udf89", "Example🎉"),
+        ("String\ud83c\udfa7", "String🎧"),
+        ("With\ud83c\udf08", "With🌈"),
+        ("Surrogates\ud83d\ude0e", "Surrogates😎"),
+        ("Embedded\ud83d\udcbb", "Embedded💻"),
+        ("In\ud83c\udf0e", "In🌎"),
+        ("The\ud83d\udcd6", "The📖"),
+        ("Text\ud83d\udcac", "Text💬"),
+        ("收花🙄·到", "收花🙄·到"),
+    ]
+    pydantic_surrogates = [
+        (Document(content=item), expected) for item, expected in raw_surrogates
+    ]
+
+    for item, expected in raw_surrogates:
+        output = dumps_json(item).decode("utf8")
+        assert f'"{expected}"' == output
+
+    for item, expected in pydantic_surrogates:
+        output = dumps_json(item).decode("utf8")
+        assert f'{{"content":"{expected}"}}' == output
+
+
 def test_runs_stats():
     langchain_client = Client()
     # We always have stuff in the "default" project...