Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Added NMSLIB patched allowing load/write APIs with a stream object. #2162

Merged
merged 1 commit into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions jni/cmake/init-nmslib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ if(NOT DEFINED APPLY_LIB_PATCHES OR "${APPLY_LIB_PATCHES}" STREQUAL true)
set(PATCH_FILE_LIST)
list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0001-Initialize-maxlevel-during-add-from-enterpoint-level.patch")
list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0002-Adds-ability-to-pass-ef-parameter-in-the-query-for-h.patch")
list(APPEND PATCH_FILE_LIST "${CMAKE_CURRENT_SOURCE_DIR}/patches/nmslib/0003-Adding-two-apis-using-stream-to-load-save-in-Hnsw.patch")

# Get patch id of the last commit
execute_process(COMMAND sh -c "git --no-pager show HEAD | git patch-id --stable" OUTPUT_VARIABLE PATCH_ID_OUTPUT_FROM_COMMIT WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
From 7e099ec111e5c9db4b243da249c73f0ecc206281 Mon Sep 17 00:00:00 2001
From: Dooyong Kim <[email protected]>
Date: Thu, 26 Sep 2024 15:20:53 -0700
Subject: [PATCH] Adding two apis using stream to load/save in Hnsw.

Signed-off-by: Dooyong Kim <[email protected]>
---
similarity_search/include/method/hnsw.h | 4 +++
similarity_search/src/method/hnsw.cc | 44 +++++++++++++++++++++++++
2 files changed, 48 insertions(+)

diff --git a/similarity_search/include/method/hnsw.h b/similarity_search/include/method/hnsw.h
index 57d99d0..7ff3f3d 100644
--- a/similarity_search/include/method/hnsw.h
+++ b/similarity_search/include/method/hnsw.h
@@ -455,8 +455,12 @@ namespace similarity {
public:
virtual void SaveIndex(const string &location) override;

+ void SaveIndexWithStream(std::ostream& output);
+
virtual void LoadIndex(const string &location) override;

+ void LoadIndexWithStream(std::istream& in);
+
Hnsw(bool PrintProgress, const Space<dist_t> &space, const ObjectVector &data);
void CreateIndex(const AnyParams &IndexParams) override;

diff --git a/similarity_search/src/method/hnsw.cc b/similarity_search/src/method/hnsw.cc
index 35b372c..e7a2c9e 100644
--- a/similarity_search/src/method/hnsw.cc
+++ b/similarity_search/src/method/hnsw.cc
@@ -771,6 +771,25 @@ namespace similarity {
output.close();
}

+ template <typename dist_t>
+ void Hnsw<dist_t>::SaveIndexWithStream(std::ostream &output) {
+ output.exceptions(ios::badbit | ios::failbit);
+
+ unsigned int optimIndexFlag = data_level0_memory_ != nullptr;
+
+ writeBinaryPOD(output, optimIndexFlag);
+
+ if (!optimIndexFlag) {
+#if USE_TEXT_REGULAR_INDEX
+ SaveRegularIndexText(output);
+#else
+ SaveRegularIndexBin(output);
+#endif
+ } else {
+ SaveOptimizedIndex(output);
+ }
+ }
+
template <typename dist_t>
void
Hnsw<dist_t>::SaveOptimizedIndex(std::ostream& output) {
@@ -1021,6 +1040,31 @@ namespace similarity {

}

+ template <typename dist_t>
+ void Hnsw<dist_t>::LoadIndexWithStream(std::istream& input) {
+ LOG(LIB_INFO) << "Loading index from an input stream.";
+ CHECK_MSG(input, "Cannot open file for reading with an input stream");
+
+ input.exceptions(ios::badbit | ios::failbit);
+
+#if USE_TEXT_REGULAR_INDEX
+ LoadRegularIndexText(input);
+#else
+ unsigned int optimIndexFlag= 0;
+
+ readBinaryPOD(input, optimIndexFlag);
+
+ if (!optimIndexFlag) {
+ LoadRegularIndexBin(input);
+ } else {
+ LoadOptimizedIndex(input);
+ }
+#endif
+
+ LOG(LIB_INFO) << "Finished loading index";
+ visitedlistpool = new VisitedListPool(1, totalElementsStored_);
+ }
+

template <typename dist_t>
void
--
2.39.5 (Apple Git-154)

Loading