From e62cf8e1a9be85be4223388f108d2f825a4cbc3c Mon Sep 17 00:00:00 2001 From: aoiasd Date: Mon, 13 Jan 2025 11:57:54 +0800 Subject: [PATCH 1/2] mark bm25 sparse field as has row data to skip load row data Signed-off-by: aoiasd --- internal/querynodev2/segments/segment.go | 6 +++++- internal/querynodev2/segments/segment_loader.go | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 8cf4abc8887e5..143ee8643d6dd 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -1054,7 +1054,11 @@ func (s *LocalSegment) innerLoadIndex(ctx context.Context, return err } updateIndexInfoSpan := tr.RecordSpan() - if !typeutil.IsVectorType(fieldType) || s.HasRawData(indexInfo.GetFieldID()) { + // Skip warnup chunk cache when + // . scalar data + // . index has row data + // . vector was function output + if !typeutil.IsVectorType(fieldType) || s.HasRawData(indexInfo.GetFieldID()) || fieldSchema.IsFunctionOutput { return nil } diff --git a/internal/querynodev2/segments/segment_loader.go b/internal/querynodev2/segments/segment_loader.go index 02b17f08b8dde..730b663b4d656 100644 --- a/internal/querynodev2/segments/segment_loader.go +++ b/internal/querynodev2/segments/segment_loader.go @@ -1509,7 +1509,7 @@ func getResourceUsageEstimateOfSegment(schema *schemapb.CollectionSchema, loadIn if !estimateResult.HasRawData && !isVectorType { shouldCalculateDataSize = true } - if !estimateResult.HasRawData && isVectorType { + if !estimateResult.HasRawData && isVectorType && !fieldSchema.IsFunctionOutput { mmapChunkCache := paramtable.Get().QueryNodeCfg.MmapChunkCache.GetAsBool() if mmapChunkCache { segmentDiskSize += binlogSize From 3bb9719774f03dcbdd0bc881d6495d4f470a545b Mon Sep 17 00:00:00 2001 From: aoiasd Date: Mon, 13 Jan 2025 15:04:32 +0800 Subject: [PATCH 2/2] skip load bm25 field row data Signed-off-by: aoiasd --- internal/querynodev2/segments/segment.go | 15 ++++++++++-- .../querynodev2/segments/segment_loader.go | 23 ++++++++++++++----- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 143ee8643d6dd..dda0d82db7b4c 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -62,6 +62,7 @@ import ( "github.com/milvus-io/milvus/pkg/util/indexparams" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/metautil" + "github.com/milvus-io/milvus/pkg/util/metric" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/typeutil" @@ -1057,8 +1058,18 @@ func (s *LocalSegment) innerLoadIndex(ctx context.Context, // Skip warnup chunk cache when // . scalar data // . index has row data - // . vector was function output - if !typeutil.IsVectorType(fieldType) || s.HasRawData(indexInfo.GetFieldID()) || fieldSchema.IsFunctionOutput { + // . vector was bm25 function output + + if !typeutil.IsVectorType(fieldType) || s.HasRawData(indexInfo.GetFieldID()) { + return nil + } + + metricType, err := funcutil.GetAttrByKeyFromRepeatedKV(common.MetricTypeKey, indexInfo.IndexParams) + if err != nil { + return fmt.Errorf("metric type not exist in index params") + } + + if metricType == metric.BM25 { return nil } diff --git a/internal/querynodev2/segments/segment_loader.go b/internal/querynodev2/segments/segment_loader.go index 730b663b4d656..7d18749ceda18 100644 --- a/internal/querynodev2/segments/segment_loader.go +++ b/internal/querynodev2/segments/segment_loader.go @@ -57,6 +57,7 @@ import ( "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/hardware" "github.com/milvus-io/milvus/pkg/util/merr" + "github.com/milvus-io/milvus/pkg/util/metric" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/syncutil" "github.com/milvus-io/milvus/pkg/util/timerecord" @@ -1509,12 +1510,22 @@ func getResourceUsageEstimateOfSegment(schema *schemapb.CollectionSchema, loadIn if !estimateResult.HasRawData && !isVectorType { shouldCalculateDataSize = true } - if !estimateResult.HasRawData && isVectorType && !fieldSchema.IsFunctionOutput { - mmapChunkCache := paramtable.Get().QueryNodeCfg.MmapChunkCache.GetAsBool() - if mmapChunkCache { - segmentDiskSize += binlogSize - } else { - segmentMemorySize += binlogSize + + if !estimateResult.HasRawData && isVectorType { + metricType, err := funcutil.GetAttrByKeyFromRepeatedKV(common.MetricTypeKey, fieldIndexInfo.IndexParams) + if err != nil { + return nil, errors.Wrapf(err, "failed to estimate resource usage of index, metric type nout found, collection %d, segment %d, indexBuildID %d", + loadInfo.GetCollectionID(), + loadInfo.GetSegmentID(), + fieldIndexInfo.GetBuildID()) + } + if metricType != metric.BM25 { + mmapChunkCache := paramtable.Get().QueryNodeCfg.MmapChunkCache.GetAsBool() + if mmapChunkCache { + segmentDiskSize += binlogSize + } else { + segmentMemorySize += binlogSize + } } } } else {