Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

fix: bm25 returned NaN scores due to wrong term frequency calculation upstream #109

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ require (
github.com/go-git/go-git/v5 v5.12.0
github.com/google/uuid v1.6.0
github.com/hupe1980/golc v0.0.112
github.com/iwilltry42/bm25-go v0.0.0-20240821172942-fe039bf617dd
github.com/iwilltry42/bm25-go v0.0.0-20240909111832-a928590cc9da
github.com/jmcarbo/stopwords v1.1.9
github.com/joho/godotenv v1.5.1
github.com/knadh/koanf/parsers/json v0.1.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ github.com/hupe1980/go-tiktoken v0.0.9 h1:qNs/XGTe7UHDUaFkU+jAPbhGzyi9BusOpxrNC8
github.com/hupe1980/go-tiktoken v0.0.9/go.mod h1:NME6d8hrE+Jo+kLUZHhXShYV8e40hYkm4BbSLQKtvAo=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/iwilltry42/bm25-go v0.0.0-20240821172942-fe039bf617dd h1:jXAPsEoBgiKLA3vS/2uZn7Ig5r0Kg2sR1QNP4ZBqDcM=
github.com/iwilltry42/bm25-go v0.0.0-20240821172942-fe039bf617dd/go.mod h1:AmA5WoRtPljzo03tlOVOoLKPh9doUNUQrtrHaq5VkUg=
github.com/iwilltry42/bm25-go v0.0.0-20240909111832-a928590cc9da h1:aqahDXw6bOtbupzBGZpnV57M3JShpN0jrtd6cyclH8I=
github.com/iwilltry42/bm25-go v0.0.0-20240909111832-a928590cc9da/go.mod h1:AmA5WoRtPljzo03tlOVOoLKPh9doUNUQrtrHaq5VkUg=
github.com/iwilltry42/chromem-go v0.0.0-20240906131132-6b288bcce06e h1:HY3d6DwV6Ipz/UJyEViRaG0tchtPSlTyVrvof62XrbU=
github.com/iwilltry42/chromem-go v0.0.0-20240906131132-6b288bcce06e/go.mod h1:hTd+wGEm/fFPQl7ilfCwQXkgEUxceYh86iIdoKMolPo=
github.com/iwilltry42/golc v0.0.113-0.20240802113826-d065a3c5b0c7 h1:2AzzbKVW1iP2F+ovqJKq801l6tgxYPt9m2zFKbs+i/Y=
Expand Down
1 change: 1 addition & 0 deletions pkg/datastore/retrievers/bm25.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ func (r *BM25Retriever) Retrieve(ctx context.Context, store store.Store, query s

for i, _ := range docs {
docs[i].SimilarityScore = float32(bm25scores[i])
slog.Debug("BM25 score", "docID", docs[i].ID, "score", docs[i].SimilarityScore)
}

slices.SortFunc(docs, scores.SortBySimilarityScore)
Expand Down