Skip to content

Commit

Permalink
Improve formatting strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
markkohdev committed Mar 29, 2024
1 parent 56028bc commit 49165f7
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 197 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
- name: Check C++ Formatting
uses: jidicula/[email protected]
with:
clang-format-version: 14
clang-format-version: 16
fallback-style: LLVM

run-java-tests:
Expand Down
15 changes: 9 additions & 6 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ make test
## Style
Use [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html) for C++ code, and `black` with defaults for Python code.

In order to check and run formatting within the python module (but not the c++ core module), you can use tox to facilitate this.
### Python
In order to check and run formatting within the python module, you can use tox to facilitate this.
```bash
cd python
# Check formatting only (don't change files)
Expand All @@ -111,13 +112,15 @@ tox -e check-formatting
tox -e format
```

For C++ code within the core `cpp` module, you can use the following command to check formatting:
### C++
If you are working on any C++ code throughout the repo, ensure you have `clang-format` (version 16) installed, and then use clang-format to handle C++ formatting:
```bash
cd cpp
# Check formatting only
clang-format --verbose --dry-run -i src/*.h
# Run formatter
clang-format --verbose -i src/*.h
cmake .
# Check formatting only (don't change files)
make check-formatting
# Run formatter
make format
```

### Updating Documentation
Expand Down
17 changes: 17 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,20 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_subdirectory(include)
add_subdirectory(src)
add_subdirectory(test)

# Define the command
set(FIND_COMMAND find .. -type d -path ./include -prune -o -type d -path ../python/.tox -prune -o -type f -name \"*.cpp\" -o -name \"*.h\" -print)
set(CHECK_FORMAT_COMMAND clang-format --dry-run -i)
set(FORMAT_COMMAND clang-format --verbose -i)

# Check formatting only
add_custom_target(check-formatting
COMMAND ${CHECK_FORMAT_COMMAND} `${FIND_COMMAND}`
COMMENT "Checking C++ formatting"
)

# Run formatter
add_custom_target(check-formatting
COMMAND ${FORMAT_COMMAND} `${FIND_COMMAND}`
COMMENT "Running C++ formatter"
)
291 changes: 107 additions & 184 deletions python/bindings.cpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion python/test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pytest
tox
black
clang-format
clang-format~=16.0
zstd
3 changes: 0 additions & 3 deletions python/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ deps =
allowlist_externals =
bash
commands =
bash -c "[ -L voyager-headers ] || [ -d voyager-headers ] || cp -r {toxinidir}/../src ./voyager-headers/"
python -m pip install .
pytest {posargs}

Expand All @@ -22,7 +21,6 @@ deps =
skip_install = true
commands =
black voyager tests --diff --check --line-length 120
clang-format -style=LLVM -Werror --dry-run bindings.cpp

[testenv:format]
basepython = python3
Expand All @@ -32,7 +30,6 @@ deps =
skip_install = true
commands =
black voyager tests --line-length 120
clang-format -style=LLVM -i bindings.cpp

[testenv:lint]
basepython = python3
Expand Down
2 changes: 1 addition & 1 deletion python/typenames.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ template <> const std::string typeName<unsigned int>() { return "uint32"; }
template <> const std::string typeName<float>() { return "float32"; }
template <> const std::string typeName<long>() { return "int64"; }
template <> const std::string typeName<unsigned long>() { return "uint64"; }
template <> const std::string typeName<double>() { return "float64"; }
template <> const std::string typeName<double>() { return "float64"; }
1 change: 0 additions & 1 deletion python/voyager-headers

This file was deleted.

36 changes: 36 additions & 0 deletions python/voyager/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ class Index:
1234 in index # => returns True or False
"""

def __len__(self) -> int:
"""
Returns the number of non-deleted vectors in this index.
Expand All @@ -183,6 +184,7 @@ class Index:
.. note::
This value may differ from :py:attr:`num_elements` if elements have been deleted.
"""

@classmethod
def __new__(
cls,
Expand All @@ -199,6 +201,7 @@ class Index:
See documentation for :py:meth:`Index.__init__` for details on required arguments.
"""

def add_item(
self,
vector: numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]],
Expand Down Expand Up @@ -226,6 +229,7 @@ class Index:
If calling :py:meth:`add_item` in a loop, consider batching your
calls by using :py:meth:`add_items` instead, which will be faster.
"""

def add_items(
self,
vectors: numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]],
Expand Down Expand Up @@ -258,6 +262,7 @@ class Index:
The IDs that were assigned to the provided vectors (either auto-generated or provided), in the
same order as the provided vectors.
"""

def as_bytes(self) -> bytes:
"""
Returns the contents of this index as a :py:class:`bytes` object. The resulting object
Expand All @@ -275,10 +280,12 @@ class Index:
index: voyager.Index = ...
serialized_index = bytes(index)
"""

def get_distance(self, a: typing.List[float], b: typing.List[float]) -> float:
"""
Get the distance between two provided vectors. The vectors must share the dimensionality of the index.
"""

def get_vector(self, id: int) -> numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]]:
"""
Get the vector stored in this index at the provided integer ID.
Expand All @@ -293,6 +300,7 @@ class Index:
will return a normalized version of the vector that was
originally added to this index.
"""

def get_vectors(self, ids: typing.List[int]) -> numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]]:
"""
Get one or more vectors stored in this index at the provided integer IDs.
Expand All @@ -304,6 +312,7 @@ class Index:
will return normalized versions of the vector that were
originally added to this index.
"""

@staticmethod
@typing.overload
def load(
Expand Down Expand Up @@ -396,6 +405,7 @@ class Index:
However, chunks of data of up to 100MB in size will be read from the file-like
object at once, hopefully reducing the impact of the GIL.
"""

@staticmethod
@typing.overload
def load(filename: str) -> Index: ...
Expand Down Expand Up @@ -442,6 +452,7 @@ class Index:
index: voyager.Index = ...
del index[1234] # deletes the ID 1234
"""

def query(
self,
vectors: numpy.ndarray[typing.Any, numpy.dtype[numpy.float32]],
Expand Down Expand Up @@ -508,6 +519,7 @@ class Index:
for i, (neighbor_ids, distances) in enumerate(query_neighbor_ids, query_distances):
print(f"\t{i}-th closest neighbor is {neighbor_id}, {distance} away")
"""

def resize(self, new_size: int) -> None:
"""
Resize this index, allocating space for up to ``new_size`` elements to
Expand All @@ -526,6 +538,7 @@ class Index:
in advance, as subsequent calls to :py:meth:`add_items` will not need
to resize the index on-the-fly.
"""

@typing.overload
def save(self, output_path: str) -> None:
"""
Expand All @@ -548,6 +561,7 @@ class Index:
one or more chunks of data (of up to 100MB each) to the provided object for writing.
"""

@typing.overload
def save(self, file_like: typing.BinaryIO) -> None: ...
def unmark_deleted(self, id: int) -> None:
Expand All @@ -557,6 +571,7 @@ class Index:
Once unmarked as deleted, an existing ID will show up in the results of
calls to :py:meth:`query` again.
"""

@property
def M(self) -> int:
"""
Expand All @@ -567,6 +582,7 @@ class Index:
"""

@property
def ef(self) -> int:
"""
Expand All @@ -584,6 +600,7 @@ class Index:
"""

@ef.setter
def ef(self, arg1: int) -> None:
"""
Expand All @@ -599,6 +616,7 @@ class Index:
by passing the ``query_ef`` parameter, allowing finer-grained control over query
speed and recall.
"""

@property
def ef_construction(self) -> int:
"""
Expand All @@ -608,6 +626,7 @@ class Index:
"""

@property
def ids(self) -> LabelSetView:
"""
Expand All @@ -629,6 +648,7 @@ class Index:
"""

@property
def max_elements(self) -> int:
"""
Expand All @@ -649,6 +669,7 @@ class Index:
"""

@max_elements.setter
def max_elements(self, arg1: int) -> None:
"""
Expand All @@ -667,13 +688,15 @@ class Index:
Note that assigning to this property is functionally identical to
calling :py:meth:`resize`.
"""

@property
def num_dimensions(self) -> int:
"""
The number of dimensions in each vector stored by this index.
"""

@property
def num_elements(self) -> int:
"""
Expand All @@ -684,13 +707,15 @@ class Index:
"""

@property
def space(self) -> Space:
"""
Return the :py:class:`Space` used to calculate distances between vectors.
"""

@property
def storage_data_type(self) -> StorageDataType:
"""
Expand All @@ -709,13 +734,15 @@ class E4M3T:
"""
Cast the given E4M3 number to a float.
"""

@typing.overload
def __init__(self, value: float) -> None:
"""
Create an E4M3 number given a floating-point value. If out of range, the value will be clipped.
Create an E4M3 number given a sign, exponent, and mantissa. If out of range, the values will be clipped.
"""

@typing.overload
def __init__(self, sign: int, exponent: int, mantissa: int) -> None: ...
def __repr__(self) -> str: ...
Expand All @@ -724,41 +751,47 @@ class E4M3T:
"""
Create an E4M3 number given a raw 8-bit value.
"""

@property
def exponent(self) -> int:
"""
The effective exponent of this E4M3 number, expressed as an integer.
"""

@property
def mantissa(self) -> int:
"""
The effective mantissa (non-exponent part) of this E4M3 number, expressed as an integer.
"""

@property
def raw_exponent(self) -> int:
"""
The raw value of the exponent part of this E4M3 number, expressed as an integer.
"""

@property
def raw_mantissa(self) -> float:
"""
The raw value of the mantissa (non-exponent part) of this E4M3 number, expressed as a floating point value.
"""

@property
def sign(self) -> int:
"""
The sign bit from this E4M3 number. Will be ``1`` if the number is negative, ``0`` otherwise.
"""

@property
def size(self) -> int:
"""
Expand All @@ -785,6 +818,7 @@ class Float8Index(Index):
"""
Create a new, empty index.
"""

def __repr__(self) -> str: ...
pass

Expand All @@ -805,6 +839,7 @@ class FloatIndex(Index):
"""
Create a new, empty index.
"""

def __repr__(self) -> str: ...
pass

Expand All @@ -825,6 +860,7 @@ class E4M3Index(Index):
"""
Create a new, empty index.
"""

def __repr__(self) -> str: ...
pass

Expand Down

0 comments on commit 49165f7

Please sign in to comment.