diff --git a/Cargo.lock b/Cargo.lock index 1999fcd..da5e7b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1066,7 +1066,7 @@ checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "openai_trtllm" -version = "0.1.0" +version = "0.2.1" dependencies = [ "anyhow", "async-stream", diff --git a/Cargo.toml b/Cargo.toml index afee6a7..cf2f13a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "openai_trtllm" -version = "0.1.0" +version = "0.2.1" edition = "2021" [dependencies] @@ -23,7 +23,7 @@ tracing-opentelemetry = { version = "0.22" } tower-http = { version = "0.5", features = ["trace"] } opentelemetry = { version = "0.21.0", features = ["metrics", "logs"] } opentelemetry_sdk = { version = "0.21.1", features = ["rt-tokio", "logs"] } -opentelemetry-otlp = { version = "0.14.0", features = ["tonic", "metrics", "logs"] } +opentelemetry-otlp = { version = "0.14.0", features = ["tonic", "metrics", "logs"] } opentelemetry-semantic-conventions = { version = "0.13.0" } axum-tracing-opentelemetry = "0.16.0" liquid = "0.26.4" diff --git a/src/routes/chat.rs b/src/routes/chat.rs index aa6dd0e..3133cd0 100644 --- a/src/routes/chat.rs +++ b/src/routes/chat.rs @@ -97,6 +97,7 @@ async fn chat_completions_stream( let raw_content = infer_response.raw_output_contents[0].clone(); let content = deserialize_bytes_tensor(raw_content)?.into_iter().collect::(); + tracing::debug!("deserialized triton infer response content: {:?}", content); if !content.is_empty() { let response = ChatCompletionChunk { @@ -182,6 +183,8 @@ async fn chat_completions( let raw_content = infer_response.raw_output_contents[0].clone(); let content = deserialize_bytes_tensor(raw_content)?.into_iter().collect(); + tracing::debug!("deserialized triton infer response content: {:?}", content); + contents.push(content); } diff --git a/src/routes/completions.rs b/src/routes/completions.rs index 3ee95b7..94ba4f4 100644 --- a/src/routes/completions.rs +++ b/src/routes/completions.rs @@ -91,6 +91,7 @@ async fn completions_stream( let content = deserialize_bytes_tensor(raw_content)? .into_iter() .collect::(); + tracing::debug!("deserialized triton infer response content: {:?}", content); if !content.is_empty() { let response = Completion { @@ -166,6 +167,8 @@ async fn completions( let raw_content = infer_response.raw_output_contents[0].clone(); let content = deserialize_bytes_tensor(raw_content)?.into_iter().collect(); + tracing::debug!("deserialized triton infer response content: {:?}", content); + contents.push(content); }