-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor + maintain node's in-degree field in HNSW (#478)
* Refactor + implement in-degree maintained and update sanity test. WIP - take care of serializer for benchmarks * Move graph data structures to a new file, add new serialization version and adjust loading accordingly. * tmp adding full benchmark file path for debug * Add test for coverage of the new decoding HNSW version * Add the files for the test * revert file name * revert file name + format * test for serialization v3 only for float32 * Addressing Meirav's CR * format * remove flakiness, revert formatting * move remove to vector * include mutex
- Loading branch information
Showing
19 changed files
with
618 additions
and
450 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
|
||
#pragma once | ||
|
||
#include <cassert> | ||
#include <algorithm> | ||
#include <mutex> | ||
#include "VecSim/utils/vec_utils.h" | ||
|
||
template <typename DistType> | ||
using candidatesList = vecsim_stl::vector<std::pair<DistType, idType>>; | ||
|
||
typedef uint16_t linkListSize; | ||
|
||
struct ElementLevelData { | ||
// A list of ids that are pointing to the node where each edge is *unidirectional* | ||
vecsim_stl::vector<idType> *incomingUnidirectionalEdges; | ||
// Total size of incoming links to the node (both uni and bi directional). | ||
linkListSize totalIncomingLinks; | ||
linkListSize numLinks; | ||
// Flexible array member - https://en.wikipedia.org/wiki/Flexible_array_member | ||
// Using this trick, we can have the links list as part of the ElementLevelData struct, and | ||
// avoid the need to dereference a pointer to get to the links list. We have to calculate the | ||
// size of the struct manually, as `sizeof(ElementLevelData)` will not include this member. We | ||
// do so in the constructor of the index, under the name `levelDataSize` (and | ||
// `elementGraphDataSize`). Notice that this member must be the last member of the struct and | ||
// all nesting structs. | ||
idType links[]; | ||
|
||
explicit ElementLevelData(std::shared_ptr<VecSimAllocator> allocator) | ||
: incomingUnidirectionalEdges(new (allocator) vecsim_stl::vector<idType>(allocator)), | ||
totalIncomingLinks(0), numLinks(0) {} | ||
|
||
linkListSize getNumLinks() const { return this->numLinks; } | ||
idType getLinkAtPos(size_t pos) const { | ||
assert(pos < numLinks); | ||
return this->links[pos]; | ||
} | ||
const vecsim_stl::vector<idType> &getIncomingEdges() const { | ||
return *incomingUnidirectionalEdges; | ||
} | ||
std::vector<idType> copyLinks() { | ||
std::vector<idType> links_copy; | ||
links_copy.assign(links, links + numLinks); | ||
return links_copy; | ||
} | ||
// Sets the outgoing links of the current element. | ||
// Assumes that the object has the capacity to hold all the links. | ||
void setLinks(vecsim_stl::vector<idType> &links) { | ||
numLinks = links.size(); | ||
memcpy(this->links, links.data(), numLinks * sizeof(idType)); | ||
} | ||
template <typename DistType> | ||
void setLinks(candidatesList<DistType> &links) { | ||
numLinks = 0; | ||
for (auto &link : links) { | ||
this->links[numLinks++] = link.second; | ||
} | ||
} | ||
void popLink() { this->numLinks--; } | ||
void setNumLinks(linkListSize num) { this->numLinks = num; } | ||
void setLinkAtPos(size_t pos, idType node_id) { this->links[pos] = node_id; } | ||
void appendLink(idType node_id) { this->links[this->numLinks++] = node_id; } | ||
void newIncomingUnidirectionalEdge(idType node_id) { | ||
this->incomingUnidirectionalEdges->push_back(node_id); | ||
} | ||
bool removeIncomingUnidirectionalEdgeIfExists(idType node_id) { | ||
return this->incomingUnidirectionalEdges->remove(node_id); | ||
} | ||
void increaseTotalIncomingEdgesNum() { this->totalIncomingLinks++; } | ||
void decreaseTotalIncomingEdgesNum() { this->totalIncomingLinks--; } | ||
void swapNodeIdInIncomingEdges(idType id_before, idType id_after) { | ||
auto it = std::find(this->incomingUnidirectionalEdges->begin(), | ||
this->incomingUnidirectionalEdges->end(), id_before); | ||
// This should always succeed | ||
assert(it != this->incomingUnidirectionalEdges->end()); | ||
*it = id_after; | ||
} | ||
}; | ||
|
||
struct ElementGraphData { | ||
size_t toplevel; | ||
std::mutex neighborsGuard; | ||
ElementLevelData *others; | ||
ElementLevelData level0; | ||
|
||
ElementGraphData(size_t maxLevel, size_t high_level_size, | ||
std::shared_ptr<VecSimAllocator> allocator) | ||
: toplevel(maxLevel), others(nullptr), level0(allocator) { | ||
if (toplevel > 0) { | ||
others = (ElementLevelData *)allocator->callocate(high_level_size * toplevel); | ||
if (others == nullptr) { | ||
throw std::runtime_error("VecSim index low memory error"); | ||
} | ||
for (size_t i = 0; i < maxLevel; i++) { | ||
new ((char *)others + i * high_level_size) ElementLevelData(allocator); | ||
} | ||
} | ||
} | ||
~ElementGraphData() = delete; // should be destroyed using `destroy' | ||
|
||
void destroy(size_t levelDataSize, std::shared_ptr<VecSimAllocator> allocator) { | ||
delete this->level0.incomingUnidirectionalEdges; | ||
ElementLevelData *cur_ld = this->others; | ||
for (size_t i = 0; i < this->toplevel; i++) { | ||
delete cur_ld->incomingUnidirectionalEdges; | ||
cur_ld = reinterpret_cast<ElementLevelData *>(reinterpret_cast<char *>(cur_ld) + | ||
levelDataSize); | ||
} | ||
allocator->free_allocation(this->others); | ||
} | ||
ElementLevelData &getElementLevelData(size_t level, size_t levelDataSize) { | ||
assert(level <= this->toplevel); | ||
if (level == 0) { | ||
return this->level0; | ||
} | ||
return *reinterpret_cast<ElementLevelData *>(reinterpret_cast<char *>(this->others) + | ||
(level - 1) * levelDataSize); | ||
} | ||
}; |
Oops, something went wrong.