Skip to content

Commit

Permalink
Merge branch 'main' into backport/backport-2378-to-main
Browse files Browse the repository at this point in the history
Signed-off-by: Tommy Shao <[email protected]>
  • Loading branch information
anntians authored Jan 24, 2025
2 parents 0ec8952 + d142366 commit 6d7d62a
Show file tree
Hide file tree
Showing 92 changed files with 2,610 additions and 340 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# This should match the owning team set up in https://github.com/orgs/opensearch-project/teams
* @heemin32 @navneet1v @VijayanB @vamshin @jmazanec15 @naveentatikonda @junqiu-lei @martin-gaievski @ryanbogan @luyuncheng @shatejas
* @heemin32 @navneet1v @VijayanB @vamshin @jmazanec15 @naveentatikonda @junqiu-lei @martin-gaievski @ryanbogan @luyuncheng @shatejas @0ctopus13prime
14 changes: 10 additions & 4 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,21 @@ jobs:
chown -R 1000:1000 `pwd`
if lscpu | grep -i avx512f | grep -i avx512cd | grep -i avx512vl | grep -i avx512dq | grep -i avx512bw
then
echo "avx512 available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Dnproc.count=`nproc`"
if lscpu | grep -q "GenuineIntel" && lscpu | grep -i avx512_fp16 | grep -i avx512_bf16 | grep -i avx512_vpopcntdq
then
echo "the system is an Intel(R) Sapphire Rapids or a newer-generation processor"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx512_spr.enabled=true -Dnproc.count=`nproc`"
else
echo "avx512 available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx512_spr.enabled=false -Dnproc.count=`nproc`"
fi
elif lscpu | grep -i avx2
then
echo "avx2 available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Dnproc.count=`nproc` -Davx512.enabled=false"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx512.enabled=false -Davx512_spr.enabled=false -Dnproc.count=`nproc`"
else
echo "avx512 and avx2 not available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx2.enabled=false -Davx512.enabled=false -Dnproc.count=`nproc`"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx2.enabled=false -Davx512.enabled=false -Davx512_spr.enabled=false -Dnproc.count=`nproc`"
fi
Expand Down
12 changes: 9 additions & 3 deletions .github/workflows/backwards_compatibility_tests_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,21 @@ jobs:
echo "Running restart-upgrade backwards compatibility tests ..."
if lscpu | grep -i avx512f | grep -i avx512cd | grep -i avx512vl | grep -i avx512dq | grep -i avx512bw
then
if lscpu | grep -q "GenuineIntel" && lscpu | grep -i avx512_fp16 | grep -i avx512_bf16 | grep -i avx512_vpopcntdq
then
echo "the system is an Intel(R) Sapphire Rapids or a newer-generation processor"
./gradlew :qa:restart-upgrade:testRestartUpgrade -Dtests.bwc.version=$BWC_VERSION_RESTART_UPGRADE -Dnproc.count=`nproc` -Davx512_spr.enabled=true
else
echo "avx512 available on system"
./gradlew :qa:restart-upgrade:testRestartUpgrade -Dtests.bwc.version=$BWC_VERSION_RESTART_UPGRADE -Dnproc.count=`nproc`
./gradlew :qa:restart-upgrade:testRestartUpgrade -Dtests.bwc.version=$BWC_VERSION_RESTART_UPGRADE -Dnproc.count=`nproc` -Davx512_spr.enabled=false
fi
elif lscpu | grep -i avx2
then
echo "avx2 available on system"
./gradlew :qa:restart-upgrade:testRestartUpgrade -Dtests.bwc.version=$BWC_VERSION_RESTART_UPGRADE -Dnproc.count=`nproc` -Davx512.enabled=false
./gradlew :qa:restart-upgrade:testRestartUpgrade -Dtests.bwc.version=$BWC_VERSION_RESTART_UPGRADE -Davx512.enabled=false -Davx512_spr.enabled=false -Dnproc.count=`nproc`
else
echo "avx512 and avx2 not available on system"
./gradlew :qa:restart-upgrade:testRestartUpgrade -Dtests.bwc.version=$BWC_VERSION_RESTART_UPGRADE -Davx2.enabled=false -Davx512.enabled=false -Dsimd.enabled=false -Dnproc.count=`nproc`
./gradlew :qa:restart-upgrade:testRestartUpgrade -Dtests.bwc.version=$BWC_VERSION_RESTART_UPGRADE -Davx2.enabled=false -Davx512.enabled=false -Davx512_spr.enabled=false -Dsimd.enabled=false -Dnproc.count=`nproc`
fi
Rolling-Upgrade-BWCTests-k-NN:
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/test_security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,19 @@ jobs:
chown -R 1000:1000 `pwd`
if lscpu | grep -i avx512f | grep -i avx512cd | grep -i avx512vl | grep -i avx512dq | grep -i avx512bw
then
echo "avx512 available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Dnproc.count=`nproc`"
if lscpu | grep -q "GenuineIntel" && lscpu | grep -i avx512_fp16 | grep -i avx512_bf16 | grep -i avx512_vpopcntdq
then
echo "the system is an Intel(R) Sapphire Rapids or a newer-generation processor"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx512_spr.enabled=true -Dnproc.count=`nproc`"
else
echo "avx512 available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx512_spr.enabled=false -Dnproc.count=`nproc`"
fi
elif lscpu | grep -i avx2
then
echo "avx2 available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Dnproc.count=`nproc` -Davx512.enabled=false"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx512.enabled=false -Davx512_spr.enabled=false -Dnproc.count=`nproc`"
else
echo "avx512 and avx2 not available on system"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx2.enabled=false -Davx512.enabled=false -Dnproc.count=`nproc`"
su `id -un 1000` -c "whoami && java -version && ./gradlew build -Davx2.enabled=false -Davx512.enabled=false -Davx512_spr.enabled=false -Dnproc.count=`nproc`"
fi
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Bug Fixes
### Infrastructure
* Removed JDK 11 and 17 version from CI runs [#1921](https://github.com/opensearch-project/k-NN/pull/1921)
* Upgrade min JDK compatibility to JDK 21 [#2422](https://github.com/opensearch-project/k-NN/pull/2422)
### Documentation
### Maintenance
### Refactoring
Expand All @@ -19,6 +20,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add Support for Multi Values in innerHit for Nested k-NN Fields in Lucene and FAISS (#2283)[https://github.com/opensearch-project/k-NN/pull/2283]
- Add binary index support for Lucene engine. (#2292)[https://github.com/opensearch-project/k-NN/pull/2292]
- Add expand_nested_docs Parameter support to NMSLIB engine (#2331)[https://github.com/opensearch-project/k-NN/pull/2331]
- Add a new build mode, `FAISS_OPT_LEVEL=avx512_spr`, which enables the use of advanced AVX-512 instructions introduced with Intel(R) Sapphire Rapids (#2404)[https://github.com/opensearch-project/k-NN/pull/2404]
- Add cosine similarity support for faiss engine (#2376)[https://github.com/opensearch-project/k-NN/pull/2376]
### Enhancements
- Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241]
- Allow method parameter override for training based indices (#2290) https://github.com/opensearch-project/k-NN/pull/2290]
Expand All @@ -29,18 +32,33 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Make the build work for M series MacOS without manual code changes and local JAVA_HOME config (#2397)[https://github.com/opensearch-project/k-NN/pull/2397]
- Remove DocsWithFieldSet reference from NativeEngineFieldVectorsWriter (#2408)[https://github.com/opensearch-project/k-NN/pull/2408]
- Remove skip building graph check for quantization use case (#2430)[https://github.com/opensearch-project/k-NN/2430]
- Add check to directly use ANN Search when filters match all docs. (#2320)[https://github.com/opensearch-project/k-NN/pull/2320]
- Use one formula to calculate cosine similarity (#2357)[https://github.com/opensearch-project/k-NN/pull/2357]
- Add WithFieldName implementation to KNNQueryBuilder (#2398)[https://github.com/opensearch-project/k-NN/pull/2398]
- Make the build work for M series MacOS without manual code changes and local JAVA_HOME config (#2397)[https://github.com/opensearch-project/k-NN/pull/2397]
- Remove DocsWithFieldSet reference from NativeEngineFieldVectorsWriter (#2408)[https://github.com/opensearch-project/k-NN/pull/2408]
### Bug Fixes
* Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282]
* Fixing the bug where search fails with "fields" parameter for an index with a knn_vector field (#2314)[https://github.com/opensearch-project/k-NN/pull/2314]
* Fix for NPE while merging segments after all the vector fields docs are deleted (#2365)[https://github.com/opensearch-project/k-NN/pull/2365]
* Allow validation for non knn index only after 2.17.0 (#2315)[https://github.com/opensearch-project/k-NN/pull/2315]
* Fixing the bug to prevent updating the index.knn setting after index creation(#2348)[https://github.com/opensearch-project/k-NN/pull/2348]
* Release query vector memory after execution (#2346)[https://github.com/opensearch-project/k-NN/pull/2346]
* Fix shard level rescoring disabled setting flag (#2352)[https://github.com/opensearch-project/k-NN/pull/2352]
* Fix filter rewrite logic which was resulting in getting inconsistent / incorrect results for cases where filter was getting rewritten for shards (#2359)[https://github.com/opensearch-project/k-NN/pull/2359]
* Fixing it to retrieve space_type from index setting when both method and top level don't have the value. [#2374](https://github.com/opensearch-project/k-NN/pull/2374)
* Fixing the bug where setting rescore as false for on_disk knn_vector query is a no-op (#2399)[https://github.com/opensearch-project/k-NN/pull/2399]
* Fixing bug where mapping accepts both dimension and model-id (#2410)[https://github.com/opensearch-project/k-NN/pull/2410]
### Infrastructure
* Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259)
* Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279)
### Documentation
### Maintenance
* Select index settings based on cluster version[2236](https://github.com/opensearch-project/k-NN/pull/2236)
* Added periodic cache maintenance for QuantizationStateCache and NativeMemoryCache [#2308](https://github.com/opensearch-project/k-NN/pull/2308)
* Added null checks for fieldInfo in ExactSearcher to avoid NPE while running exact search for segments with no vector field (#2278)[https://github.com/opensearch-project/k-NN/pull/2278]
* Added Lucene BWC tests (#2313)[https://github.com/opensearch-project/k-NN/pull/2313]
* Upgrade jsonpath from 2.8.0 to 2.9.0[2325](https://github.com/opensearch-project/k-NN/pull/2325)
* Bump Faiss commit from 1f42e81 to 0cbc2a8 to accelerate hamming distance calculation using _mm512_popcnt_epi64 intrinsic and also add avx512-fp16 instructions to boost performance [#2381](https://github.com/opensearch-project/k-NN/pull/2381)
* Enabled indices.breaker.total.use_real_memory setting via build.gradle for integTest Cluster to catch heap CB in local ITs and github CI actions [#2395](https://github.com/opensearch-project/k-NN/pull/2395/)
### Refactoring
73 changes: 51 additions & 22 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
- [Getting Started](#getting-started)
- [Fork OpenSearch k-NN Repo](#fork-opensearch-k-nn-repo)
- [Install Prerequisites](#install-prerequisites)
- [JDK 11](#jdk-11)
- [JDK 21](#jdk-21)
- [CMake](#cmake)
- [Faiss Dependencies](#Faiss-Dependencies)
- [Environment](#Environment)
Expand Down Expand Up @@ -39,23 +39,20 @@ git clone https://github.com/[your username]/OpenSearch.git

### Install Prerequisites

#### JDK 11
#### JDK 21

OpenSearch builds using Java 11 at a minimum. This means you must have a JDK 11 installed with the environment variable
`JAVA_HOME` referencing the path to Java home for your JDK 11 installation, e.g. `JAVA_HOME=/usr/lib/jvm/jdk-11`.
OpenSearch builds using Java 21 at a minimum. This means you must have a JDK 21 installed with the environment variable
`JAVA_HOME` referencing the path to Java home for your JDK 21 installation, e.g. `JAVA_HOME=/usr/lib/jvm/jdk-21`.

One easy way to get Java 11 on *nix is to use [sdkman](https://sdkman.io/).
One easy way to get Java 21 on *nix is to use [sdkman](https://sdkman.io/).

```bash
curl -s "https://get.sdkman.io" | bash
source ~/.sdkman/bin/sdkman-init.sh
sdk install java 11.0.2-open
sdk use java 11.0.2-open
sdk install java 21.0.2-open
sdk use java 21.0.2-open
```

Team has to replace minimum JDK version 14 as it was not an LTS release. JDK 14 should still work for most scenarios.
In addition to this, the plugin has been tested with JDK 17, and this JDK version is fully supported.

#### CMake

The plugin requires that cmake >= 3.24.0 is installed in order to build the JNI libraries.
Expand Down Expand Up @@ -143,6 +140,21 @@ export OPENSEARCH_HOME=the directory of opensearch...
export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:$OPENSEARCH_HOME/plugins/opensearch-knn/lib
```
CMAKE will use as JAVA_HOME environment whatever your gradle is currently using. For example:
```bash
Java home directory found by gradle: /opt/homebrew/Cellar/openjdk@21/21.0.5/libexec/openjdk.jdk/Contents/Home
=======================================
OpenSearch Build Hamster says Hello!
Gradle Version : 8.4
OS Info : Mac OS X 14.4 (aarch64)
JDK Version : 21 (Homebrew JDK)
JAVA_HOME : /opt/homebrew/Cellar/openjdk@21/21.0.5/libexec/openjdk.jdk/Contents/Home
Random Testing Seed : 8AB32A4719AA345E
In FIPS 140 mode : false
=======================================
```
The JAVA_HOME used by gradle will be the default that the project will be using.
#### Environment
Currently, the plugin only supports Linux on x64 and arm platforms.
Expand Down Expand Up @@ -186,10 +198,10 @@ Please follow these formatting guidelines:
OpenSearch k-NN uses a [Gradle](https://docs.gradle.org/6.6.1/userguide/userguide.html) wrapper for its build.
Run `gradlew` on Unix systems.
Tests use `JAVA11_HOME` environment variable, make sure to add it in the export path else the tests might fail.
Tests use `JAVA21_HOME` environment variable, make sure to add it in the export path else the tests might fail.
e.g
```
echo "export JAVA11_HOME=<JDK11 path>" >> ~/.zshrc
echo "export JAVA21_HOME=<JDK21 path>" >> ~/.zshrc
source ~/.zshrc
```
Expand Down Expand Up @@ -285,21 +297,38 @@ make -j 4
### Enable SIMD Optimization
SIMD(Single Instruction/Multiple Data) Optimization is enabled by default on Linux and Mac which boosts the performance
by enabling `AVX2` and `AVX512` on `x86 architecture` and `NEON` on `ARM64 architecture` where applicable while building the Faiss library. But to enable SIMD,
the underlying processor should support these capabilities (AVX512, AVX2 or NEON). It can be disabled by setting the parameter `avx2.enabled` to `false` and
`avx512.enabled` to `false`. If your processor supports `AVX512` or `AVX2`, they can be set by enabling the setting . By default, these values are enabled on
OpenSearch. Some exceptions: As of now, SIMD support is not supported on Windows OS, and AVX512 is not present on MAC systems due to hardware not supporting the
feature.
the underlying processor should support these capabilities (AVX512, AVX2 or NEON). It can be disabled by setting the parameter `avx2.enabled`, `avx512.enabled`,
and `avx512_spr.enabled` to `false`. If your processor supports `AVX512` or `AVX2`, they can be set by enabling the setting. On Intel(R) Sapphire Rapids and
newer-generation systems, enabling `avx512_spr` offers support for `AVX512-FP16` and other features. By default, these values are enabled on OpenSearch.
Some exceptions: As of now, SIMD support is not supported on Windows OS, and AVX512 is not present on MAC systems due to hardware not supporting the feature.
```
# While building OpenSearch k-NN
./gradlew build -Davx2.enabled=true -Davx512.enabled=true
# if (system_supports_avx512_spr) generate_avx512_spr_binaries()
# else if (system_supports_avx512) generate_avx512_binaries()
# else if (system_supports_ avx2) generate_avx2_binaries()
# else() generate_generic_binaries()
# While running OpenSearch k-NN
./gradlew run -Davx2.enabled=true -Davx512.enabled=true
# generate avx2 binaries
./gradlew build -Davx2.enabled=true -Davx512.enabled=false -Davx512_spr.enabled=false
# if (system_supports_avx512_spr) generate_avx512_spr_binaries()
# else if (system_supports_avx512) generate_avx512_binaries()
# else() generate_generic_binaries()
./gradlew build -Davx2.enabled=false -Davx512.enabled=true
# if (system_supports_avx512_spr) generate_avx512_spr_binaries()
# else if (system_supports_avx2) generate_avx2_binaries()
# else() generate_generic_binaries()
./gradlew build -Davx512.enabled=false -Davx512_spr.enabled=true
# if (system_supports_avx512) generate_avx512_binaries()
# else if (system_supports_avx2) generate_avx2_binaries()
# else() generate_generic_binaries()
./gradlew build -Davx512.enabled=true -Davx512_spr.enabled=false
# While building the JNI libraries
# similar logic applies for jni
cd jni
cmake . -DAVX2_ENABLED=true -DAVX512_ENABLED=true
cmake . -DAVX2_ENABLED=true -DAVX512_ENABLED=true -DAVX512_SPR_ENABLED=true
```
## Run OpenSearch k-NN
Expand Down
1 change: 1 addition & 0 deletions MAINTAINERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ This document contains a list of maintainers in this repo. See [opensearch-proje

| Maintainer | GitHub ID | Affiliation |
|-------------------------|-------------------------------------------------------|-------------|
| Doo Yong Kim | [0ctopus13prime](https://github.com/0ctopus13prime) | Amazon |
| Heemin Kim | [heemin32](https://github.com/heemin32) | Amazon |
| Jack Mazanec | [jmazanec15](https://github.com/jmazanec15) | Amazon |
| Junqiu Lei | [junqiu-lei](https://github.com/junqiu-lei) | Amazon |
Expand Down
Loading

0 comments on commit 6d7d62a

Please sign in to comment.