diff --git a/python/langsmith/_internal/_serde.py b/python/langsmith/_internal/_serde.py index d3c7d7d10..05d3c8857 100644 --- a/python/langsmith/_internal/_serde.py +++ b/python/langsmith/_internal/_serde.py @@ -79,6 +79,11 @@ def _simple_default(obj): ] +# IMPORTANT: This function is used from Rust code in `langsmith-pyo3` serialization, +# in order to handle serializing these tricky Python types *from Rust*. +# Do not cause this function to become inaccessible (e.g. by deleting +# or renaming it) without also fixing the corresponding Rust code found in: +# rust/crates/langsmith-pyo3/src/serialization/mod.rs def _serialize_json(obj: Any) -> Any: try: if isinstance(obj, (set, tuple)): diff --git a/rust/crates/langsmith-pyo3/src/py_run.rs b/rust/crates/langsmith-pyo3/src/py_run.rs index 9725c5cd8..31ceb6cdf 100644 --- a/rust/crates/langsmith-pyo3/src/py_run.rs +++ b/rust/crates/langsmith-pyo3/src/py_run.rs @@ -22,6 +22,12 @@ impl RunCreateExtended { impl FromPyObject<'_> for RunCreateExtended { fn extract_bound(value: &Bound<'_, PyAny>) -> PyResult { + // Perform a runtime check that we've successfully located the `langsmith` Python code + // used to transform Python objects which aren't natively serializeable by `orjson`. + // + // This assertion ensures that we won't later fail to serialize e.g. Pydantic objects. + serialization::assert_orjson_default_is_present(); + let run_create = value.extract::()?.into_inner(); let attachments = { @@ -58,6 +64,12 @@ impl RunUpdateExtended { impl FromPyObject<'_> for RunUpdateExtended { fn extract_bound(value: &Bound<'_, PyAny>) -> PyResult { + // Perform a runtime check that we've successfully located the `langsmith` Python code + // used to transform Python objects which aren't natively serializeable by `orjson`. + // + // This assertion ensures that we won't later fail to serialize e.g. Pydantic objects. + serialization::assert_orjson_default_is_present(); + let run_update = value.extract::()?.into_inner(); // TODO: attachments are WIP at the moment, ignore them here for now. @@ -273,7 +285,7 @@ fn extract_string_like(value: &Bound<'_, PyAny>) -> PyResult { // However, orjson supports serializing UUID objects, so the easiest way to get // a Rust string from a Python UUID object is to serialize the UUID to a JSON string // and then parse out the string. - let Ok(buffer) = self::serialization::dumps(value.as_ptr()) else { + let Ok(buffer) = serialization::dumps(value.as_ptr()) else { // orjson failed to deserialize the object. The fact that orjson is involved // is an internal implementation detail, so return the original error instead. // It looks like this: diff --git a/rust/crates/langsmith-pyo3/src/serialization/mod.rs b/rust/crates/langsmith-pyo3/src/serialization/mod.rs index 74972cad9..44458338a 100644 --- a/rust/crates/langsmith-pyo3/src/serialization/mod.rs +++ b/rust/crates/langsmith-pyo3/src/serialization/mod.rs @@ -1,21 +1,59 @@ +use std::ptr::NonNull; + +use pyo3::types::PyAnyMethods as _; + mod writer; +thread_local! { + static ORJSON_DEFAULT: Result, String> = { + pyo3::Python::with_gil(|py| { + let module = match py.import("langsmith._internal._serde") { + Ok(m) => m, + Err(e) => { + let _ = py.import("langsmith").map_err(|_| "failed to import `langsmith` package; please make sure `langsmith-pyo3` is only used via the `langsmith` package".to_string())?; + return Err(format!("Failed to import `langsmith._internal._serde` even though `langsmith` can be imported. Did internal `langsmith` package structure change? Underlying error: {e}")); + } + }; + + let function = module.getattr("_serialize_json").map_err(|e| format!("`_serialize_json` function not found; underlying error: {e}"))?.as_ptr(); + Ok(NonNull::new(function).expect("function was null, which shouldn't ever happen")) + }) + } +} + +/// Perform a runtime check that we've successfully located the `langsmith` Python code +/// used to transform Python objects which aren't natively serializeable by `orjson`. +/// +/// This assertion ensures that we won't later fail to serialize e.g. Pydantic objects. +/// +/// The cost of this call is trivial: just one easily branch-predictable comparison on +/// an already-initialized thread-local. +pub(crate) fn assert_orjson_default_is_present() { + ORJSON_DEFAULT.with(|res| { + if let Err(e) = res { + panic!("{e}"); + } + }) +} + pub(crate) fn dumps(ptr: *mut pyo3_ffi::PyObject) -> Result, String> { let mut writer = writer::BufWriter::new(); - let obj = orjson::PyObjectSerializer::new( - ptr, - orjson::SerializerState::new(Default::default()), - None, - ); - - let res = orjson::to_writer(&mut writer, &obj); - match res { - Ok(_) => Ok(writer.finish()), - Err(err) => { - // Make sure we drop the allocated buffer. - let _ = writer.into_inner(); - Err(err.to_string()) + ORJSON_DEFAULT.with(|default| { + let obj = orjson::PyObjectSerializer::new( + ptr, + orjson::SerializerState::new(Default::default()), + default.as_ref().cloned().ok(), + ); + + let res = orjson::to_writer(&mut writer, &obj); + match res { + Ok(_) => Ok(writer.finish()), + Err(err) => { + // Make sure we drop the allocated buffer. + let _ = writer.into_inner(); + Err(err.to_string()) + } } - } + }) }