Skip to content

Commit

Permalink
enhance: Rename API GenDataset to GenFieldData in unittest (#39386)
Browse files Browse the repository at this point in the history
Issue: #38666

Signed-off-by: Cai Yudong <[email protected]>
  • Loading branch information
cydrain authored Jan 17, 2025
1 parent d14bb67 commit 64feeb0
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 67 deletions.
10 changes: 8 additions & 2 deletions internal/core/unittest/bench/bench_indexbuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ IndexBuilder_build(benchmark::State& state) {
std::to_string(knowhere::Version::GetCurrentVersion().VersionNumber());

auto is_binary = state.range(2);
auto dataset = GenDataset(NB, metric_type, is_binary);
auto dataset = GenFieldData(NB,
metric_type,
is_binary ? milvus::DataType::VECTOR_BINARY
: milvus::DataType::VECTOR_FLOAT);
auto xb_data = dataset.get_col<float>(milvus::FieldId(START_USER_FIELDID));
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());

Expand Down Expand Up @@ -98,7 +101,10 @@ IndexBuilder_build_and_codec(benchmark::State& state) {
}

auto is_binary = state.range(2);
auto dataset = GenDataset(NB, metric_type, is_binary);
auto dataset = GenFieldData(NB,
metric_type,
is_binary ? milvus::DataType::VECTOR_BINARY
: milvus::DataType::VECTOR_FLOAT);
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
auto xb_dataset = knowhere::GenDataSet(NB, DIM, xb_data.data());

Expand Down
17 changes: 9 additions & 8 deletions internal/core/unittest/test_index_c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ TEST(FloatVecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, false);
auto dataset = GenFieldData(NB, metric_type);
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));

CDataType dtype = FloatVector;
Expand Down Expand Up @@ -93,8 +93,8 @@ TEST(SparseFloatVecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
milvus::FieldId(100));
CDataType dtype = SparseFloatVector;
Expand Down Expand Up @@ -157,8 +157,8 @@ TEST(Float16VecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));

CDataType dtype = Float16Vector;
Expand Down Expand Up @@ -216,8 +216,8 @@ TEST(BFloat16VecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));

CDataType dtype = BFloat16Vector;
Expand Down Expand Up @@ -276,7 +276,8 @@ TEST(BinaryVecIndex, All) {
ok = google::protobuf::TextFormat::PrintToString(index_params,
&index_params_str);
assert(ok);
auto dataset = GenDataset(NB, metric_type, true);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BINARY);
auto xb_data = dataset.get_col<uint8_t>(milvus::FieldId(100));

CDataType dtype = BinaryVector;
Expand Down
15 changes: 7 additions & 8 deletions internal/core/unittest/test_index_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,12 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
vec_field_data_type, config, file_manager_context);
knowhere::DataSetPtr xb_dataset;
if (vec_field_data_type == DataType::VECTOR_BINARY) {
auto dataset = GenDataset(NB, metric_type, true);
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
auto bin_vecs = dataset.get_col<uint8_t>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, bin_vecs.data());
ASSERT_NO_THROW(index->Build(xb_dataset));
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
auto sparse_vecs = dataset.get_col<knowhere::sparse::SparseRow<float>>(
milvus::FieldId(100));
xb_dataset =
Expand All @@ -143,7 +142,7 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
ASSERT_NO_THROW(index->Build(xb_dataset));
} else {
// VECTOR_FLOAT
auto dataset = GenDataset(NB, metric_type, false);
auto dataset = GenFieldData(NB, metric_type);
auto f_vecs = dataset.get_col<float>(milvus::FieldId(100));
xb_dataset = knowhere::GenDataSet(NB, DIM, f_vecs.data());
ASSERT_NO_THROW(index->Build(xb_dataset));
Expand Down Expand Up @@ -173,14 +172,13 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
std::unique_ptr<SearchResult> result;
if (vec_field_data_type == DataType::VECTOR_FLOAT) {
auto nb_for_nq = NQ + query_offset;
auto dataset = GenDataset(nb_for_nq, metric_type, false);
auto dataset = GenFieldData(nb_for_nq, metric_type);
auto xb_data = dataset.get_col<float>(milvus::FieldId(100));
auto xq_dataset =
knowhere::GenDataSet(NQ, DIM, xb_data.data() + DIM * query_offset);
result = vec_index->Query(xq_dataset, search_info, nullptr);
} else if (vec_field_data_type == DataType::VECTOR_SPARSE_FLOAT) {
auto dataset = GenDatasetWithDataType(
NQ, metric_type, milvus::DataType::VECTOR_SPARSE_FLOAT);
auto dataset = GenFieldData(NQ, metric_type, vec_field_data_type);
auto xb_data = dataset.get_col<knowhere::sparse::SparseRow<float>>(
milvus::FieldId(100));
auto xq_dataset =
Expand All @@ -189,7 +187,8 @@ TEST_P(IndexWrapperTest, BuildAndQuery) {
result = vec_index->Query(xq_dataset, search_info, nullptr);
} else {
auto nb_for_nq = NQ + query_offset;
auto dataset = GenDataset(nb_for_nq, metric_type, true);
auto dataset =
GenFieldData(nb_for_nq, metric_type, DataType::VECTOR_BINARY);
auto xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
// offset of binary vector is 8-aligned bit-wise representation.
auto xq_dataset = knowhere::GenDataSet(
Expand Down
13 changes: 6 additions & 7 deletions internal/core/unittest/test_indexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,7 @@ class IndexTest : public ::testing::TestWithParam<Param> {
vec_field_data_type = milvus::DataType::VECTOR_FLOAT;
}

auto dataset =
GenDatasetWithDataType(NB, metric_type, vec_field_data_type);
auto dataset = GenFieldData(NB, metric_type, vec_field_data_type);
if (is_binary) {
// binary vector
xb_bin_data = dataset.get_col<uint8_t>(milvus::FieldId(100));
Expand Down Expand Up @@ -788,7 +787,7 @@ TEST(Indexing, SearchDiskAnnWithInvalidParam) {
};

// build disk ann index
auto dataset = GenDataset(NB, metric_type, false);
auto dataset = GenFieldData(NB, metric_type);
FixedVector<float> xb_data =
dataset.get_col<float>(milvus::FieldId(field_id));
knowhere::DataSetPtr xb_dataset =
Expand Down Expand Up @@ -871,8 +870,8 @@ TEST(Indexing, SearchDiskAnnWithFloat16) {
};

// build disk ann index
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_FLOAT16);
FixedVector<float16> xb_data =
dataset.get_col<float16>(milvus::FieldId(field_id));
knowhere::DataSetPtr xb_dataset =
Expand Down Expand Up @@ -954,8 +953,8 @@ TEST(Indexing, SearchDiskAnnWithBFloat16) {
};

// build disk ann index
auto dataset = GenDatasetWithDataType(
NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
auto dataset =
GenFieldData(NB, metric_type, milvus::DataType::VECTOR_BFLOAT16);
FixedVector<bfloat16> xb_data =
dataset.get_col<bfloat16>(milvus::FieldId(field_id));
knowhere::DataSetPtr xb_dataset =
Expand Down
52 changes: 10 additions & 42 deletions internal/core/unittest/test_utils/indexbuilder_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,49 +218,17 @@ generate_params(const knowhere::IndexType& index_type,
}

auto
GenDataset(int64_t N,
const knowhere::MetricType& metric_type,
bool is_binary,
int64_t dim = DIM) {
GenFieldData(int64_t N,
const knowhere::MetricType& metric_type,
milvus::DataType data_type = milvus::DataType::VECTOR_FLOAT,
int64_t dim = DIM) {
auto schema = std::make_shared<milvus::Schema>();
if (!is_binary) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else {
schema->AddDebugField(
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
}
}

auto
GenDatasetWithDataType(int64_t N,
const knowhere::MetricType& metric_type,
milvus::DataType data_type,
int64_t dim = DIM) {
auto schema = std::make_shared<milvus::Schema>();
if (data_type == milvus::DataType::VECTOR_FLOAT16) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_FLOAT16, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else if (data_type == milvus::DataType::VECTOR_BFLOAT16) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_BFLOAT16, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else if (data_type == milvus::DataType::VECTOR_FLOAT) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_FLOAT, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
} else if (data_type == milvus::DataType::VECTOR_SPARSE_FLOAT) {
schema->AddDebugField(
"fakevec", milvus::DataType::VECTOR_SPARSE_FLOAT, 0, metric_type);
return milvus::segcore::DataGen(schema, N);
} else {
schema->AddDebugField(
"fakebinvec", milvus::DataType::VECTOR_BINARY, dim, metric_type);
return milvus::segcore::DataGen(schema, N);
}
schema->AddDebugField(
"fakevec",
data_type,
(data_type != milvus::DataType::VECTOR_SPARSE_FLOAT ? dim : 0),
metric_type);
return milvus::segcore::DataGen(schema, N);
}

using QueryResultPtr = std::unique_ptr<milvus::SearchResult>;
Expand Down

0 comments on commit 64feeb0

Please sign in to comment.