From e228eed51b4f8943e5b740eee7d7aad09e5da3de Mon Sep 17 00:00:00 2001 From: chyezh Date: Mon, 2 Sep 2024 17:49:37 +0800 Subject: [PATCH] fix: SkipIndex cause segment fault Signed-off-by: chyezh --- internal/core/src/index/SkipIndex.cpp | 4 +- internal/core/src/index/SkipIndex.h | 68 +++++++++++++-------------- 2 files changed, 34 insertions(+), 38 deletions(-) diff --git a/internal/core/src/index/SkipIndex.cpp b/internal/core/src/index/SkipIndex.cpp index 11949db2bcf9c..20780a4bbc159 100644 --- a/internal/core/src/index/SkipIndex.cpp +++ b/internal/core/src/index/SkipIndex.cpp @@ -119,8 +119,8 @@ SkipIndex::LoadString(milvus::FieldId field_id, auto chunkMetrics = std::make_unique(); if (num_rows > 0) { auto info = ProcessStringFieldMetrics(var_column); - chunkMetrics->min_ = Metrics(info.min_); - chunkMetrics->max_ = Metrics(info.max_); + chunkMetrics->min_ = Metrics(std::move(info.min_)); + chunkMetrics->max_ = Metrics(std::move(info.max_)); chunkMetrics->null_count_ = info.null_count_; } diff --git a/internal/core/src/index/SkipIndex.h b/internal/core/src/index/SkipIndex.h index b6b2b33da6305..21acbf5da5359 100644 --- a/internal/core/src/index/SkipIndex.h +++ b/internal/core/src/index/SkipIndex.h @@ -19,12 +19,13 @@ namespace milvus { -using Metrics = std:: - variant; +using Metrics = + std::variant; +// MetricsDataType is used to avoid copy when get min/max value fro FieldChunkMetrics template using MetricsDataType = - std::conditional_t, std::string_view, T>; + std::conditional_t, std::string, T>; struct FieldChunkMetrics { Metrics min_; @@ -32,7 +33,23 @@ struct FieldChunkMetrics { bool hasValue_; int64_t null_count_; - FieldChunkMetrics() : hasValue_(false){}; + FieldChunkMetrics() : hasValue_(false) {}; + + template + std::pair + GetMinMax() const { + AssertInfo(hasValue_, + "GetMinMax should never be called when hasValue_ is false"); + T lower_bound; + T upper_bound; + try { + lower_bound = std::get>(min_); + upper_bound = std::get>(max_); + } catch (const std::bad_variant_access& e) { + return {}; + } + return {lower_bound, upper_bound}; + } }; class SkipIndex { @@ -99,22 +116,6 @@ class SkipIndex { static constexpr bool value = isAllowedType && !isDisabledType; }; - template - std::pair, MetricsDataType> - GetMinMax(const FieldChunkMetrics& field_chunk_metrics) const { - MetricsDataType lower_bound; - MetricsDataType upper_bound; - try { - lower_bound = - std::get>(field_chunk_metrics.min_); - upper_bound = - std::get>(field_chunk_metrics.max_); - } catch (const std::bad_variant_access&) { - return {}; - } - return {lower_bound, upper_bound}; - } - template std::enable_if_t::value, bool> MinMaxUnaryFilter(const FieldChunkMetrics& field_chunk_metrics, @@ -123,13 +124,11 @@ class SkipIndex { if (!field_chunk_metrics.hasValue_) { return false; } - std::pair, MetricsDataType> minMax = - GetMinMax(field_chunk_metrics); - if (minMax.first == MetricsDataType() || - minMax.second == MetricsDataType()) { + auto [lower_bound, upper_bound] = field_chunk_metrics.GetMinMax(); + if (lower_bound == T() || upper_bound == T()) { return false; } - return RangeShouldSkip(val, minMax.first, minMax.second, op_type); + return RangeShouldSkip(val, lower_bound, upper_bound, op_type); } template @@ -150,15 +149,11 @@ class SkipIndex { if (!field_chunk_metrics.hasValue_) { return false; } - std::pair, MetricsDataType> minMax = - GetMinMax(field_chunk_metrics); - if (minMax.first == MetricsDataType() || - minMax.second == MetricsDataType()) { + auto [lower_bound, upper_bound] = field_chunk_metrics.GetMinMax(); + if (lower_bound == T() || upper_bound == T()) { return false; } bool should_skip = false; - MetricsDataType lower_bound = minMax.first; - MetricsDataType upper_bound = minMax.second; if (lower_inclusive && upper_inclusive) { should_skip = (lower_val > upper_bound) || (upper_val < lower_bound); @@ -188,8 +183,8 @@ class SkipIndex { template bool RangeShouldSkip(const T& value, - const MetricsDataType lower_bound, - const MetricsDataType upper_bound, + const T& lower_bound, + const T& upper_bound, OpType op_type) const { bool should_skip = false; switch (op_type) { @@ -267,7 +262,7 @@ class SkipIndex { return {minValue, maxValue, null_count}; } - metricInfo + metricInfo ProcessStringFieldMetrics( const milvus::VariableColumn& var_column) { int num_rows = var_column.NumRows(); @@ -281,7 +276,7 @@ class SkipIndex { break; } if (start > num_rows - 1) { - return {std::string_view(), std::string_view(), num_rows}; + return {std::string(), std::string(), num_rows}; } std::string_view min_string = var_column.RawAt(start); std::string_view max_string = var_column.RawAt(start); @@ -299,7 +294,8 @@ class SkipIndex { max_string = val; } } - return {min_string, max_string, null_count}; + // The field data may be released, so we need to copy the string to avoid invalid memory access. + return {std::string(min_string), std::string(max_string), null_count}; } private: