diff --git a/internal/core/src/index/Index.h b/internal/core/src/index/Index.h index 4038e21a707b9..87cb5ae683cb6 100644 --- a/internal/core/src/index/Index.h +++ b/internal/core/src/index/Index.h @@ -22,6 +22,7 @@ #include "common/EasyAssert.h" #include "knowhere/comp/index_param.h" #include "knowhere/dataset.h" +#include "knowhere/index/index_factory.h" #include "common/Tracer.h" #include "common/Types.h" #include "index/Meta.h" @@ -64,20 +65,10 @@ class IndexBase { bool IsMmapSupported() const { - return index_type_ == knowhere::IndexEnum::INDEX_HNSW || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFPQ || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFSQ8 || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_IDMAP || - index_type_ == knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP || - index_type_ == - knowhere::IndexEnum::INDEX_SPARSE_INVERTED_INDEX || - index_type_ == knowhere::IndexEnum::INDEX_SPARSE_WAND || - // support mmap for bitmap/hybrid index - index_type_ == milvus::index::BITMAP_INDEX_TYPE || - index_type_ == milvus::index::HYBRID_INDEX_TYPE; + return knowhere::IndexFactory::Instance().FeatureCheck(index_type_, knowhere::feature::MMAP) || + // support mmap for bitmap/hybrid index + index_type_ == milvus::index::BITMAP_INDEX_TYPE || + index_type_ == milvus::index::HYBRID_INDEX_TYPE; } const IndexType& diff --git a/internal/core/src/segcore/vector_index_c.cpp b/internal/core/src/segcore/vector_index_c.cpp new file mode 100644 index 0000000000000..b45a684d57903 --- /dev/null +++ b/internal/core/src/segcore/vector_index_c.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include "segcore/vector_index_c.h" + +#include "knowhere/utils.h" +#include "index/Meta.h" +#include "index/IndexFactory.h" + +int +GetIndexListSize() { + return knowhere::IndexFactory::Instance().GetIndexFeatures().size(); +} + +void +GetIndexFeatures(void* index_key_list, uint64_t* index_feature_list) { + auto features = knowhere::IndexFactory::Instance().GetIndexFeatures(); + int idx = 0; + + const char** index_keys = (const char**)index_key_list; + uint64_t* index_features = (uint64_t*)index_feature_list; + for (auto it = features.begin(); it != features.end(); ++it) { + index_keys[idx] = it->first.c_str(); + index_features[idx] = it->second; + idx++; + } +} + diff --git a/internal/core/src/segcore/vector_index_c.h b/internal/core/src/segcore/vector_index_c.h new file mode 100644 index 0000000000000..d38a06b7a1e15 --- /dev/null +++ b/internal/core/src/segcore/vector_index_c.h @@ -0,0 +1,28 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#ifdef __cplusplus +extern "C" { +#endif + +#include + +int +GetIndexListSize(); + +void +GetIndexFeatures(void* index_key_list, uint64_t* index_feature_list); + + +#ifdef __cplusplus +} +#endif diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt index f082a30ec8bf2..3585ee2f7b3cd 100644 --- a/internal/core/thirdparty/knowhere/CMakeLists.txt +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -14,8 +14,8 @@ # Update KNOWHERE_VERSION for the first occurrence milvus_add_pkg_config("knowhere") set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES "") -set( KNOWHERE_VERSION d20907f5 ) -set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git") +set( KNOWHERE_VERSION add_vector_index_mgr ) +set( GIT_REPOSITORY "https://github.com/foxspy/knowhere.git") message(STATUS "Knowhere repo: ${GIT_REPOSITORY}") message(STATUS "Knowhere version: ${KNOWHERE_VERSION}") @@ -60,5 +60,3 @@ endif() # get prometheus COMPILE_OPTIONS get_property( var DIRECTORY "${knowhere_SOURCE_DIR}" PROPERTY COMPILE_OPTIONS ) message( STATUS "knowhere src compile options: ${var}" ) - -set( KNOWHERE_INCLUDE_DIR ${knowhere_SOURCE_DIR}/include CACHE INTERNAL "Path to knowhere include directory" ) diff --git a/internal/datacoord/index_service.go b/internal/datacoord/index_service.go index 3c519341cb732..dea94dd9a49a2 100644 --- a/internal/datacoord/index_service.go +++ b/internal/datacoord/index_service.go @@ -222,10 +222,10 @@ func (s *Server) CreateIndex(ctx context.Context, req *indexpb.CreateIndexReques metrics.IndexRequestCounter.WithLabelValues(metrics.FailLabel).Inc() return merr.Status(err), nil } - if GetIndexType(req.GetIndexParams()) == indexparamcheck.IndexDISKANN && !s.indexNodeManager.ClientSupportDisk() { + if indexparamcheck.GetVecIndexMgrInstance().IsDiskANN(GetIndexType(req.IndexParams)) && !s.indexNodeManager.ClientSupportDisk() { errMsg := "all IndexNodes do not support disk indexes, please verify" log.Warn(errMsg) - err = merr.WrapErrIndexNotSupported(indexparamcheck.IndexDISKANN) + err = merr.WrapErrIndexNotSupported(GetIndexType(req.IndexParams)) metrics.IndexRequestCounter.WithLabelValues(metrics.FailLabel).Inc() return merr.Status(err), nil } diff --git a/internal/datacoord/task_index.go b/internal/datacoord/task_index.go index abb3fb0f7957e..7cf782322ef4d 100644 --- a/internal/datacoord/task_index.go +++ b/internal/datacoord/task_index.go @@ -132,7 +132,7 @@ func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskSchedule } indexParams := dependency.meta.indexMeta.GetIndexParams(segIndex.CollectionID, segIndex.IndexID) indexType := GetIndexType(indexParams) - if isFlatIndex(indexType) || segIndex.NumRows < Params.DataCoordCfg.MinSegmentNumRowsToEnableIndex.GetAsInt64() { + if isBruteForce(indexType) || segIndex.NumRows < Params.DataCoordCfg.MinSegmentNumRowsToEnableIndex.GetAsInt64() { log.Ctx(ctx).Info("segment does not need index really", zap.Int64("taskID", it.taskID), zap.Int64("segmentID", segIndex.SegmentID), zap.Int64("num rows", segIndex.NumRows)) it.SetState(indexpb.JobState_JobStateFinished, "fake finished index success") diff --git a/internal/datacoord/util.go b/internal/datacoord/util.go index 2def3eb484151..164727ddc746a 100644 --- a/internal/datacoord/util.go +++ b/internal/datacoord/util.go @@ -196,16 +196,16 @@ func GetIndexType(indexParams []*commonpb.KeyValuePair) string { return invalidIndex } -func isFlatIndex(indexType string) bool { - return indexType == indexparamcheck.IndexFaissIDMap || indexType == indexparamcheck.IndexFaissBinIDMap +func isBruteForce(indexType string) bool { + return indexparamcheck.GetVecIndexMgrInstance().IsBruteForce(indexType) } func isOptionalScalarFieldSupported(indexType string) bool { - return indexType == indexparamcheck.IndexHNSW + return indexparamcheck.GetVecIndexMgrInstance().IsMvSupported(indexType) } func isDiskANNIndex(indexType string) bool { - return indexType == indexparamcheck.IndexDISKANN + return indexparamcheck.GetVecIndexMgrInstance().IsDiskANN(indexType) } func parseBuildIDFromFilePath(key string) (UniqueID, error) { diff --git a/internal/indexnode/task_index.go b/internal/indexnode/task_index.go index 1848fdd69310a..e926eb3c6a73e 100644 --- a/internal/indexnode/task_index.go +++ b/internal/indexnode/task_index.go @@ -208,7 +208,7 @@ func (it *indexBuildTask) Execute(ctx context.Context) error { zap.Int32("currentIndexVersion", it.req.GetCurrentIndexVersion())) indexType := it.newIndexParams[common.IndexTypeKey] - if indexType == indexparamcheck.IndexDISKANN { + if indexparamcheck.GetVecIndexMgrInstance().IsDiskANN(indexType) { // check index node support disk index if !Params.IndexNodeCfg.EnableDisk.GetAsBool() { log.Warn("IndexNode don't support build disk index", diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index 15e6eca34920a..994912d9e292f 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -297,7 +297,7 @@ func (cit *createIndexTask) parseIndexParams() error { if !exist { return fmt.Errorf("IndexType not specified") } - if indexType == indexparamcheck.IndexDISKANN { + if indexparamcheck.GetVecIndexMgrInstance().IsDiskANN(indexType) { err := indexparams.FillDiskIndexParams(Params, indexParamsMap) if err != nil { return err diff --git a/internal/querynodev2/segments/index_attr_cache.go b/internal/querynodev2/segments/index_attr_cache.go index d4b8fbed637a9..393fdc6ce6f20 100644 --- a/internal/querynodev2/segments/index_attr_cache.go +++ b/internal/querynodev2/segments/index_attr_cache.go @@ -60,7 +60,7 @@ func (c *IndexAttrCache) GetIndexResourceUsage(indexInfo *querypb.FieldIndexInfo if err != nil { return 0, 0, fmt.Errorf("index type not exist in index params") } - if indexType == indexparamcheck.IndexDISKANN { + if indexparamcheck.GetVecIndexMgrInstance().IsDiskANN(indexType) { neededMemSize := indexInfo.IndexSize / UsedDiskMemoryRatio neededDiskSize := indexInfo.IndexSize - neededMemSize return uint64(neededMemSize), uint64(neededDiskSize), nil diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 58a21765cd90d..d342c63697bf5 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -1110,7 +1110,7 @@ func (s *LocalSegment) LoadIndex(ctx context.Context, indexInfo *querypb.FieldIn delete(indexParams, common.MmapEnabledKey) // some build params also exist in indexParams, which are useless during loading process - if indexParams["index_type"] == indexparamcheck.IndexDISKANN { + if indexparamcheck.GetVecIndexMgrInstance().IsDiskANN(indexParams["index_type"]) { if err := indexparams.SetDiskIndexLoadParams(paramtable.Get(), indexParams, indexInfo.GetNumRows()); err != nil { return err } diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index 35e85a34e60e9..d0d57c35d9980 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -53,24 +53,12 @@ const ( ) func IsGpuIndex(indexType IndexType) bool { - return indexType == IndexGpuBF || - indexType == IndexRaftIvfFlat || - indexType == IndexRaftIvfPQ || - indexType == IndexRaftCagra + return GetVecIndexMgrInstance().IsGPUVecIndex(indexType) } // IsVectorMmapIndex check if the vector index can be mmaped func IsVectorMmapIndex(indexType IndexType) bool { - return indexType == IndexFaissIDMap || - indexType == IndexFaissIvfFlat || - indexType == IndexFaissIvfPQ || - indexType == IndexFaissIvfSQ8 || - indexType == IndexFaissBinIDMap || - indexType == IndexFaissBinIvfFlat || - indexType == IndexHNSW || - indexType == IndexScaNN || - indexType == IndexSparseInverted || - indexType == IndexSparseWand + return GetVecIndexMgrInstance().IsMMapSupported(indexType) } func IsOffsetCacheSupported(indexType IndexType) bool { @@ -78,7 +66,7 @@ func IsOffsetCacheSupported(indexType IndexType) bool { } func IsDiskIndex(indexType IndexType) bool { - return indexType == IndexDISKANN + return GetVecIndexMgrInstance().IsDiskANN(indexType) } func IsScalarMmapIndex(indexType IndexType) bool { diff --git a/pkg/util/indexparamcheck/vector_index_mgr.go b/pkg/util/indexparamcheck/vector_index_mgr.go new file mode 100644 index 0000000000000..c6d9e1bcc22d9 --- /dev/null +++ b/pkg/util/indexparamcheck/vector_index_mgr.go @@ -0,0 +1,175 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package indexparamcheck + +/* +#cgo pkg-config: milvus_segcore + +#include // free +#include "segcore/vector_index_c.h" +*/ +import "C" + +import ( + "bytes" + "fmt" + "github.com/milvus-io/milvus/pkg/log" + "sync" + "unsafe" +) + +const ( + // BFFlag This flag indicates that there is no need to create any index structure + BFFlag uint64 = 1 << 16 + // KNNFlag This flag indicates that the index defaults to KNN search, meaning the recall rate is 100% + KNNFlag uint64 = 1 << 17 + // GpuFlag This flag indicates that the index is deployed on GPU (need GPU devices) + GpuFlag uint64 = 1 << 18 + // MmapFlag This flag indicates that the index support using mmap manage its mainly memory, which can significant improve the capacity + MmapFlag uint64 = 1 << 19 + // MvFlag This flag indicates that the index support using materialized view to accelerate filtering search + MvFlag uint64 = 1 << 20 + // DiskFlag This flag indicates that the index need disk + DiskFlag uint64 = 1 << 21 +) + +type VecIndexMgr interface { + init() error + IsFlatVecIndex(indexType IndexType) bool + IsBruteForce(indexType IndexType) bool + IsVecIndex(indexType IndexType) bool + IsDiskANN(indexType IndexType) bool + IsGPUVecIndex(indexType IndexType) bool + IsDiskVecIndex(indexType IndexType) bool + IsMMapSupported(indexType IndexType) bool + IsMvSupported(indexType IndexType) bool + GetMemoryRatio(indexType IndexType) float64 + GetDiskRatio(indexType IndexType) float64 +} + +type VecIndexMgrImpl struct { + features map[string]uint64 + once sync.Once +} + +func (mgr *VecIndexMgrImpl) IsBruteForce(indexType IndexType) bool { + feature, ok := mgr.features[indexType] + if !ok { + return false + } + return (feature & BFFlag) == BFFlag +} + +func (mgr *VecIndexMgrImpl) IsDiskANN(indexType IndexType) bool { + return indexType == "DISKANN" +} + +func (mgr *VecIndexMgrImpl) init() error { + size := int(C.GetIndexListSize()) + if size == 0 { + return fmt.Errorf("vector index list is null") + } + vecIndexList := make([]unsafe.Pointer, size) + vecIndexFeatures := make([]uint64, size) + + C.GetIndexFeatures(unsafe.Pointer(&vecIndexList[0]), (*C.uint64_t)(unsafe.Pointer(&vecIndexFeatures[0]))) + mgr.features = make(map[string]uint64) + var featureLog bytes.Buffer + for i := 0; i < size; i++ { + key := C.GoString((*C.char)(vecIndexList[i])) + mgr.features[key] = vecIndexFeatures[i] + featureLog.WriteString(key + " : " + fmt.Sprintf("%d", vecIndexFeatures[i]) + ",") + } + log.Info("init vecIndex features : " + featureLog.String()) + return nil +} + +func (mgr *VecIndexMgrImpl) IsFlatVecIndex(indexType IndexType) bool { + feature, ok := mgr.features[indexType] + if !ok { + return false + } + return (feature & KNNFlag) == KNNFlag +} + +func (mgr *VecIndexMgrImpl) IsMvSupported(indexType IndexType) bool { + feature, ok := mgr.features[indexType] + if !ok { + return false + } + return (feature & MvFlag) == MvFlag +} + +func (mgr *VecIndexMgrImpl) GetMemoryRatio(indexType IndexType) float64 { + if indexType == "DISKANN" { + return 0.25 + } + return 1.0 +} + +func (mgr *VecIndexMgrImpl) GetDiskRatio(indexType IndexType) float64 { + return 1.0 +} + +func (mgr *VecIndexMgrImpl) IsGPUVecIndex(indexType IndexType) bool { + feature, ok := mgr.features[indexType] + if !ok { + return false + } + return (feature & GpuFlag) == GpuFlag +} + +func (mgr *VecIndexMgrImpl) IsMMapSupported(indexType IndexType) bool { + feature, ok := mgr.features[indexType] + if !ok { + return false + } + return (feature & MmapFlag) == MmapFlag +} + +func (mgr *VecIndexMgrImpl) IsVecIndex(indexType IndexType) bool { + _, ok := mgr.features[indexType] + return ok +} + +func (mgr *VecIndexMgrImpl) IsDiskVecIndex(indexType IndexType) bool { + feature, ok := mgr.features[indexType] + if !ok { + return false + } + return (feature & DiskFlag) == DiskFlag +} + +func newVecIndexMgr() *VecIndexMgrImpl { + log.Info("newVecIndexMgr") + mgr := &VecIndexMgrImpl{} + mgr.init() + return mgr +} + +var vecIndexMgr VecIndexMgr + +var getVecIndexMgrOnce sync.Once + +// GetVecIndexMgrInstance gets the instance of VecIndexMgrInstance. +func GetVecIndexMgrInstance() VecIndexMgr { + log.Info("GetVecIndexMgrInstance") + getVecIndexMgrOnce.Do(func() { + vecIndexMgr = newVecIndexMgr() + }) + return vecIndexMgr +} diff --git a/pkg/util/indexparamcheck/vector_index_mgr_test.go b/pkg/util/indexparamcheck/vector_index_mgr_test.go new file mode 100644 index 0000000000000..0d0b51581ec33 --- /dev/null +++ b/pkg/util/indexparamcheck/vector_index_mgr_test.go @@ -0,0 +1,10 @@ +package indexparamcheck + +import ( + "testing" +) + +func Test_VecIndex_Init(t *testing.T) { + mgr := GetVecIndexMgrInstance() + mgr.init() +}