Skip to content

Commit

Permalink
Serialize pydantic and other tricky objects correctly from Rust. (#1392)
Browse files Browse the repository at this point in the history
From the Rust bindings, import the `_serialize_json` function from
`langsmith._internal._serde`, then use it as the default fallback if
`orjson` serialization can't handle some object. This makes the Rust
serialization code equivalent to the `_orjson.dumps()` call inside
`langsmith._internal._serde.dumps_json`.

I will handle UTF surrogate characters in a subsequent PR.
  • Loading branch information
obi1kenobi authored Jan 14, 2025
1 parent 4864506 commit 89733fb
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 15 deletions.
5 changes: 5 additions & 0 deletions python/langsmith/_internal/_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ def _simple_default(obj):
]


# IMPORTANT: This function is used from Rust code in `langsmith-pyo3` serialization,
# in order to handle serializing these tricky Python types *from Rust*.
# Do not cause this function to become inaccessible (e.g. by deleting
# or renaming it) without also fixing the corresponding Rust code found in:
# rust/crates/langsmith-pyo3/src/serialization/mod.rs
def _serialize_json(obj: Any) -> Any:
try:
if isinstance(obj, (set, tuple)):
Expand Down
14 changes: 13 additions & 1 deletion rust/crates/langsmith-pyo3/src/py_run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ impl RunCreateExtended {

impl FromPyObject<'_> for RunCreateExtended {
fn extract_bound(value: &Bound<'_, PyAny>) -> PyResult<Self> {
// Perform a runtime check that we've successfully located the `langsmith` Python code
// used to transform Python objects which aren't natively serializeable by `orjson`.
//
// This assertion ensures that we won't later fail to serialize e.g. Pydantic objects.
serialization::assert_orjson_default_is_present();

let run_create = value.extract::<RunCreate>()?.into_inner();

let attachments = {
Expand Down Expand Up @@ -58,6 +64,12 @@ impl RunUpdateExtended {

impl FromPyObject<'_> for RunUpdateExtended {
fn extract_bound(value: &Bound<'_, PyAny>) -> PyResult<Self> {
// Perform a runtime check that we've successfully located the `langsmith` Python code
// used to transform Python objects which aren't natively serializeable by `orjson`.
//
// This assertion ensures that we won't later fail to serialize e.g. Pydantic objects.
serialization::assert_orjson_default_is_present();

let run_update = value.extract::<RunUpdate>()?.into_inner();

// TODO: attachments are WIP at the moment, ignore them here for now.
Expand Down Expand Up @@ -273,7 +285,7 @@ fn extract_string_like(value: &Bound<'_, PyAny>) -> PyResult<String> {
// However, orjson supports serializing UUID objects, so the easiest way to get
// a Rust string from a Python UUID object is to serialize the UUID to a JSON string
// and then parse out the string.
let Ok(buffer) = self::serialization::dumps(value.as_ptr()) else {
let Ok(buffer) = serialization::dumps(value.as_ptr()) else {
// orjson failed to deserialize the object. The fact that orjson is involved
// is an internal implementation detail, so return the original error instead.
// It looks like this:
Expand Down
66 changes: 52 additions & 14 deletions rust/crates/langsmith-pyo3/src/serialization/mod.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,59 @@
use std::ptr::NonNull;

use pyo3::types::PyAnyMethods as _;

mod writer;

thread_local! {
static ORJSON_DEFAULT: Result<NonNull<pyo3_ffi::PyObject>, String> = {
pyo3::Python::with_gil(|py| {
let module = match py.import("langsmith._internal._serde") {
Ok(m) => m,
Err(e) => {
let _ = py.import("langsmith").map_err(|_| "failed to import `langsmith` package; please make sure `langsmith-pyo3` is only used via the `langsmith` package".to_string())?;
return Err(format!("Failed to import `langsmith._internal._serde` even though `langsmith` can be imported. Did internal `langsmith` package structure change? Underlying error: {e}"));
}
};

let function = module.getattr("_serialize_json").map_err(|e| format!("`_serialize_json` function not found; underlying error: {e}"))?.as_ptr();
Ok(NonNull::new(function).expect("function was null, which shouldn't ever happen"))
})
}
}

/// Perform a runtime check that we've successfully located the `langsmith` Python code
/// used to transform Python objects which aren't natively serializeable by `orjson`.
///
/// This assertion ensures that we won't later fail to serialize e.g. Pydantic objects.
///
/// The cost of this call is trivial: just one easily branch-predictable comparison on
/// an already-initialized thread-local.
pub(crate) fn assert_orjson_default_is_present() {
ORJSON_DEFAULT.with(|res| {
if let Err(e) = res {
panic!("{e}");
}
})
}

pub(crate) fn dumps(ptr: *mut pyo3_ffi::PyObject) -> Result<Vec<u8>, String> {
let mut writer = writer::BufWriter::new();

let obj = orjson::PyObjectSerializer::new(
ptr,
orjson::SerializerState::new(Default::default()),
None,
);

let res = orjson::to_writer(&mut writer, &obj);
match res {
Ok(_) => Ok(writer.finish()),
Err(err) => {
// Make sure we drop the allocated buffer.
let _ = writer.into_inner();
Err(err.to_string())
ORJSON_DEFAULT.with(|default| {
let obj = orjson::PyObjectSerializer::new(
ptr,
orjson::SerializerState::new(Default::default()),
default.as_ref().cloned().ok(),
);

let res = orjson::to_writer(&mut writer, &obj);
match res {
Ok(_) => Ok(writer.finish()),
Err(err) => {
// Make sure we drop the allocated buffer.
let _ = writer.into_inner();
Err(err.to_string())
}
}
}
})
}

0 comments on commit 89733fb

Please sign in to comment.