Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a sparse example #776

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/search.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Search
# Search

API to search data with the specified criteria.

Expand Down Expand Up @@ -52,7 +52,7 @@ for _, result := range sr {
}
}
if idColumn == nil {
log.Fatal("result field not math")
log.Fatal("result field not match")
}
for i := 0; i < result.ResultCount; i++ {
id, err := idColumn.ValueByIdx(i)
Expand Down
2 changes: 1 addition & 1 deletion examples/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ func main() {
}
}
if idColumn == nil {
log.Fatal("result field not math")
log.Fatal("result field not match")
}
for i := 0; i < result.ResultCount; i++ {
id, err := idColumn.ValueByIdx(i)
Expand Down
2 changes: 1 addition & 1 deletion examples/index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func main() {
}
}
if idColumn == nil {
log.Fatal("result field not math")
log.Fatal("result field not match")
}
for i := 0; i < result.ResultCount; i++ {
id, err := idColumn.ValueByIdx(i)
Expand Down
2 changes: 1 addition & 1 deletion examples/insert/insert.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func main() {
}
}
if idColumn == nil {
log.Fatal("result field not math")
log.Fatal("result field not match")
}
for i := 0; i < result.ResultCount; i++ {
id, err := idColumn.ValueByIdx(i)
Expand Down
216 changes: 216 additions & 0 deletions examples/sparse/sparse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
package main

import (
"context"
"encoding/csv"
"fmt"
"log"
"math/rand"
"os"
"strconv"
"strings"
"time"

"github.com/milvus-io/milvus-sdk-go/v2/client"
"github.com/milvus-io/milvus-sdk-go/v2/entity"
)

func main() {
// Milvus instance proxy address, may verify in your env/settings
milvusAddr := `localhost:19530`

// setup context for client creation, use 2 seconds here
ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()

c, err := client.NewClient(ctx, client.Config{
Address: milvusAddr,
})
if err != nil {
// handling error and exit, to make example simple here
log.Fatal("failed to connect to milvus:", err.Error())
}
// in a main func, remember to close the client
defer c.Close()

// here is the collection name we use in this example
collectionName := `gosdk_sparse_example`

has, err := c.HasCollection(ctx, collectionName)
if err != nil {
log.Fatal("failed to check whether collection exists:", err.Error())
}
if has {
// collection with same name exist, clean up mess
_ = c.DropCollection(ctx, collectionName)
}

// define collection schema, see film.csv
schema := entity.NewSchema().WithName(collectionName).WithDescription("this is the example collection for sparse vector").
WithField(entity.NewField().WithName("ID").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true)).
WithField(entity.NewField().WithName("Year").WithDataType(entity.FieldTypeInt32)).
WithField(entity.NewField().WithName("Vector").WithDataType(entity.FieldTypeSparseVector))

err = c.CreateCollection(ctx, schema, entity.DefaultShardNumber) // only 1 shard
if err != nil {
log.Fatal("failed to create collection:", err.Error())
}

films, err := loadFilmCSV()
if err != nil {
log.Fatal("failed to load film data csv:", err.Error())
}

// row-base covert to column-base
ids := make([]int64, 0, len(films))
years := make([]int32, 0, len(films))
vectors := make([]entity.SparseEmbedding, 0, len(films))
// string field is not supported yet
idTitle := make(map[int64]string)
for idx, film := range films {
ids = append(ids, film.ID)
idTitle[film.ID] = film.Title
years = append(years, film.Year)
vectors = append(vectors, films[idx].Vector)
}
idColumn := entity.NewColumnInt64("ID", ids)
yearColumn := entity.NewColumnInt32("Year", years)
vectorColumn := entity.NewColumnSparseVectors("Vector", vectors)

// insert into default partition
_, err = c.Insert(ctx, collectionName, "", idColumn, yearColumn, vectorColumn)
if err != nil {
log.Fatal("failed to insert film data:", err.Error())
}
log.Println("insert completed")
ctx, cancel = context.WithTimeout(context.Background(), time.Second*120)
defer cancel()
err = c.Flush(ctx, collectionName, false)
if err != nil {
log.Fatal("failed to flush collection:", err.Error())
}
log.Println("flush completed")

// Now add index
idx, err := entity.NewIndexSparseInverted(entity.IP, 0.3)
if err != nil {
log.Fatal("fail to create sparse inverted index:", err.Error())
}
err = c.CreateIndex(ctx, collectionName, "Vector", idx, false)
if err != nil {
log.Fatal("fail to create index:", err.Error())
}

// load collection with async=false
err = c.LoadCollection(ctx, collectionName, false)
if err != nil {
log.Fatal("failed to load collection:", err.Error())
}
log.Println("load collection completed")

searchFilm := films[0].Vector // use first fim to search
// Use flat search param
sp, _ := entity.NewIndexSparseInvertedSearchParam(0)
sr, err := c.Search(ctx, collectionName, []string{}, "Year > 1990", []string{"ID"}, []entity.Vector{searchFilm}, "Vector",
entity.IP, 10, sp)
if err != nil {
log.Fatal("fail to search collection:", err.Error())
}
for _, result := range sr {
var idColumn *entity.ColumnInt64
for _, field := range result.Fields {
if field.Name() == "ID" {
c, ok := field.(*entity.ColumnInt64)
if ok {
idColumn = c
}
}
}
if idColumn == nil {
log.Fatal("result field not match")
}
for i := 0; i < result.ResultCount; i++ {
id, err := idColumn.ValueByIdx(i)
if err != nil {
log.Fatal(err.Error())
}
title := idTitle[id]
fmt.Printf("file id: %d title: %s scores: %f\n", id, title, result.Scores[i])
}
}

// clean up
_ = c.DropCollection(ctx, collectionName)
}

type film struct {
ID int64
Title string
Year int32
Vector entity.SparseEmbedding
}

// unlike other examples, this converts the vector field into a random sparse vector
func loadFilmCSV() ([]film, error) {
f, err := os.Open("../films.csv") // assume you are in examples/insert folder, if not, please change the path
if err != nil {
return []film{}, err
}
r := csv.NewReader(f)
raw, err := r.ReadAll()
if err != nil {
return []film{}, err
}
films := make([]film, 0, len(raw))
for _, line := range raw {
if len(line) < 4 { // insuffcient column
continue
}
fi := film{}
// ID
v, err := strconv.ParseInt(line[0], 10, 64)
if err != nil {
continue
}
fi.ID = v
// Title
fi.Title = line[1]
// Year
v, err = strconv.ParseInt(line[2], 10, 64)
if err != nil {
continue
}
fi.Year = int32(v)
// Vector
vectorStr := strings.ReplaceAll(line[3], "[", "")
vectorStr = strings.ReplaceAll(vectorStr, "]", "")
parts := strings.Split(vectorStr, ",")

sparsePositions := make([]uint32, 0, len(parts))
// randomly pick some uint32 as dimension
uniquePositions := make(map[uint32]struct{})
for len(uniquePositions) < len(parts) {
uniquePositions[uint32(rand.Intn(1000))] = struct{}{}
}
for k := range uniquePositions {
sparsePositions = append(sparsePositions, k)
}

sparseValues := make([]float32, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
v, err := strconv.ParseFloat(part, 32)
if err != nil {
continue
}
sparseValues = append(sparseValues, float32(v))
}
fi.Vector, err = entity.NewSliceSparseEmbedding(sparsePositions, sparseValues)
if err != nil {
return nil, err
}
films = append(films, fi)
}
return films, nil
}
2 changes: 1 addition & 1 deletion examples/tls/tls.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func main() {
}
}
if idColumn == nil {
log.Fatal("result field not math")
log.Fatal("result field not match")
}
for i := 0; i < result.ResultCount; i++ {
id, err := idColumn.ValueByIdx(i)
Expand Down
Loading