Skip to content

Commit

Permalink
core: filter empty payload points (#3549)
Browse files Browse the repository at this point in the history
  • Loading branch information
spolu authored Feb 1, 2024
1 parent 3e6a893 commit 0f687cf
Showing 1 changed file with 23 additions and 8 deletions.
31 changes: 23 additions & 8 deletions core/src/data_sources/data_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1999,18 +1999,33 @@ fn parse_points_into_chunks(
) -> Result<Vec<(String, Chunk)>, anyhow::Error> {
let chunks: Vec<(String, Chunk)> = points
.iter()
.map(|r| {
let (payload, maybe_score) = match r {
QdrantPoint::Retrieved(r) => (&r.payload, None),
QdrantPoint::Scored(s) => (&s.payload, Some(s.score as f64)),
};

.map(|r| match r {
QdrantPoint::Retrieved(r) => (&r.payload, None),
QdrantPoint::Scored(s) => (&s.payload, Some(s.score as f64)),
})
.filter(|(payload, _)| {
payload.get("document_id").is_some()
&& payload.get("text").is_some()
&& payload.get("chunk_hash").is_some()
&& payload.get("chunk_offset").is_some()
})
.map(|(payload, maybe_score)| {
let document_id = match payload.get("document_id") {
Some(t) => match t.kind {
Some(qdrant::value::Kind::StringValue(ref s)) => s.clone(),
_ => Err(anyhow!("Invalid `document_id` in chunk payload (data_source_id={} internal_id={} kind={:?})", data_source_id, internal_id, t.kind))?,
_ => Err(anyhow!(
"Invalid `document_id` in chunk payload \
(data_source_id={} internal_id={} kind={:?})",
data_source_id,
internal_id,
t.kind
))?,
},
None => Err(anyhow!("Missing `document_id` in chunk payload (data_source_id={} internal_id={})", data_source_id, internal_id))?,
None => Err(anyhow!(
"Missing `document_id` in chunk payload (data_source_id={} internal_id={})",
data_source_id,
internal_id
))?,
};
let text = match payload.get("text") {
Some(t) => match t.kind {
Expand Down

0 comments on commit 0f687cf

Please sign in to comment.