From 366d5101ae471dc93bcb842541078d6616d6fdbd Mon Sep 17 00:00:00 2001 From: manel1874 Date: Tue, 7 Jan 2025 17:04:53 +0000 Subject: [PATCH] feat: adding nilai_chat_completion function and readme clean up Co-authored-by: Dimitris Mouris --- README.md | 228 ++++++++------------- examples/client_query.py | 96 +++++---- examples/data_owner_upload.py | 43 ++-- examples/query_nildb_config.json | 22 ++ pyproject.toml | 7 + src/nilrag/__init__.py | 30 +-- src/nilrag/nildb_requests.py | 339 ++++++++++++++++++++----------- src/nilrag/util.py | 88 ++++---- uv.lock | 148 ++++++++++++++ 9 files changed, 612 insertions(+), 389 deletions(-) create mode 100644 examples/query_nildb_config.json diff --git a/README.md b/README.md index 653138a..3a2460d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # nilRAG [![GitHub license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/NillionNetwork/nilrag/blob/main/LICENSE) Retrieval Augmented Generation (RAG) using Nillion's -[nilDB](https://github.com/NillionNetwork/nildb) and +[nilAI](https://github.com/NillionNetwork/nilAI), +[nilDB](https://github.com/NillionNetwork/nildb), and [nilQL](https://github.com/NillionNetwork/nilql-py). RAG is a technique that grants large language models information retrieval capabilities and context that they might be missing. @@ -8,67 +9,77 @@ they might be missing. nilRAG combines multiparty computation (MPC) and trusted execution environment (TEE) technologies. -# Use case +# Overview -Data owners often possess valuable files that clients wish to query to enhance their LLM-based inferences. However, ensuring privacy is a key challenge: data owners want to keep their data confidential, and clients are equally concerned about safeguarding their queries. +Data owners often possess valuable files that clients wish to query to enhance +their LLM-based inferences. However, ensuring privacy is a key challenge: data +owners want to keep their data confidential, and clients are equally concerned +about safeguarding their queries. -nilRAG addresses this challenge by enabling secure data sharing and querying. It allows data owners to store their data securely in a nilDB cluster while allowing clients to query the data without exposing their queries or compromising the data's privacy. +nilRAG addresses this challenge by enabling secure data sharing and querying. It +allows data owners to store their data securely in a nilDB cluster while +allowing clients to query the data without exposing their queries or +compromising the data's privacy. -The process involves leveraging a Trusted Execution Environment (TEE) server for secure computation. Data owners upload their information to the nilDB cluster, while the TEE server processes client queries and retrieves the most relevant results (top-k) without revealing sensitive information from either party. +The process involves leveraging a Trusted Execution Environment (TEE) server for +secure computation through nilAI. Data owners upload their information to the +nilDB cluster, while nilAI processes client queries and retrieves the most +relevant results (top-k) without revealing sensitive information from either +party. ## Entities summary Let us deep dive into the entities and their roles in the system. -### Data Owners: Secure stores files for RAG -Data owners contribute multiple files, where each file contains several paragraphs. Before sending the files to the nilDB instances, they are processed into N chunks of data and their corresponding embeddings: - -Data Representation: -Chunks (ch_i): Represented as encoded strings. -Embeddings (e_i): Represented as vectors of floats (fixed-point values). - -Once the files are encoded into chunks and embeddings, they are blinded before being uploaded to the NilDB, where each chunk and embedding is secret-shared. - - -### Client: Issues a query q -A client submits a query q to search against the data owners' files stored in NilDB and perform RAG (retrieve the most relevant data and use the top-k results for privacy-preserving machine learning (PPML) inference). - -Similar to the data encoding by data owners, the query is processed into its corresponding embeddings: - -### NilDB: Secure Storage and Query Handling -NilDB stores the blinded chunks and embeddings provided by data owners. When a client submits a query, NilDB computes the differences between the query’s embeddings and each stored embedding in a privacy-preserving manner: - -```python -differences = [embedding - query for embedding in embeddings] -``` - -Key Points: -- The number of differences (N) corresponds to the number of chunks uploaded by the data owners. -- For secret-sharing-based NilDB, the computation is performed on the shares. - -### nilTEE: Secure Processing and Retrieval -The nilTEE performs the following steps: - -1. Retrieve and Reveal Differences: -- Connect to NilDB to fetch the blinded differences. -- Reveal the differences by reconstructing shares. - -2. Identify Top-k Indices: -- Sort the differences while retaining their indices to find the `top_k` matches: -```python -indexed_diff = list(enumerate(differences)) -sorted_indexed_diff = sorted(indexed_diff, key=lambda x: x[1]) -indices = [x[0] for x in sorted_indexed_diff] -k = 5 -top_k_indices = indices[:k] -``` - -3. Fetch Relevant Chunks: -- Request NilDB to retrieve the blinded chunks corresponding to the `top_k_indices`. - -4. Prepare for Inference: -- Combine the retrieved `top_k_chunks` with the original query. -- Use the data with an LLM inside the nilTEE for secure inference. +1) **Data Owners:** Secure stores files for RAG Data owners contribute multiple +files, where each file contains several paragraphs. Before sending the files to +the nilDB instances, they are processed into N chunks of data and their +corresponding embeddings: + ``` + Chunks (ch_i): Represented as encoded strings. + Embeddings (e_i): Represented as vectors of floats (fixed-point values). + ``` + + Once the files are encoded into chunks and embeddings, they are blinded before being uploaded to the NilDB, where each chunk and embedding is secret-shared. + +2) **Client:** Issues a query q A client submits a query q to search against the +data owners' files stored in NilDB and perform RAG (retrieve the most relevant +data and use the top-k results for privacy-preserving machine learning (PPML) +inference). + + Similar to the data encoding by data owners, the query is processed into its corresponding embeddings: + +3) **NilDB:** Secure Storage and Query Handling + NilDB stores the blinded chunks and embeddings provided by data owners. When a client submits a query, NilDB computes the differences between the query’s embeddings and each stored embedding in a privacy-preserving manner: + ```python + differences = [embedding - query for embedding in embeddings] + ``` + + Key Points: + - The number of differences (N) corresponds to the number of chunks uploaded by the data owners. + - For secret-sharing-based NilDB, the computation is performed on the shares. + +4) **nilAI:** Secure Processing and Retrieval The nilTEE performs the following +steps: + 1. Retrieve and Reveal Differences: Connect to NilDB to fetch the blinded + differences and then reveal the differences by reconstructing shares. + + 2. Identify Top-k Indices: Sort the differences while retaining their + indices to find the `top_k` matches: + ```python + indexed_diff = list(enumerate(differences)) + sorted_indexed_diff = sorted(indexed_diff, key=lambda x: x[1]) + indices = [x[0] for x in sorted_indexed_diff] + k = 5 + top_k_indices = indices[:k] + ``` + + 3. Fetch Relevant Chunks: Request NilDB to retrieve the blinded chunks + corresponding to the `top_k_indices`. + + 4. Prepare for Inference: Combine the retrieved `top_k_chunks` with the + original query. Use the data with an LLM inside the nilTEE for secure + inference. # How to use @@ -85,72 +96,44 @@ Then either follow the local installation: # Install package in development mode uv pip install -e . ``` -or use pip *(not available yet)*: +or use `pip`: ```bash pip install nilrag ``` ## Data owner -### Initialization -This initialization step needs to happen before anything else. Note, the -initialization *only needs to be run once* by the data owner. - -This initialization introduces: +### 1. Initialization +If you would like to initialize your own schema, contact Nillion or open an +issue in https://github.com/NillionNetwork/nilrag. You will be provided with: 1. `schema`: which is the structure of the data that the data owner will store. - In this case we have `embedding` (`vector`) and `chunk` - (`string`). Each data owner will upload multiple `embedding`s and `chunk`. + In this case we have `embedding` (`vector`) and `chunk` (`string`). + Each data owner will upload multiple `embedding`s and `chunk`. 2. `query`: This is the nilDB query that will compute the differences under MPC between the stored data owner embeddings and the client's embedding. -In [examples/data_owner_init.py](examples/data_owner_init.py), we provide an example of how to -define the nilDB nodes. Modify [examples/uninitialized_nildb_config.py](examples/uninitialized_nildb_config.py) by adding more nodes and defining the correct URLs, ORGs, and Tokens. - -The nilDB instance is initialized as follows: -```python -nilDB = NilDB(nilDB_nodes) -``` -The schema and query are initialized as follows: -```python -# Initialize schema and queries -nilDB.init_schema() -nilDB.init_diff_query() -``` - -By running the script, the `schema` and `query` are saved to the `initialized_nildb_config.json` file: -```bash -uv run examples/data_owner_init.py -``` - -### Uploading Documents -After initialization, the data owner can upload their documents to the nilDB instance. We provide an example of how to do this in [examples/data_owner_upload.py](examples/data_owner_upload.py). +### 2. Uploading Documents +After initialization, the data owner can upload their documents to the nilDB +instance. We provide an example of how to do this in +[examples/data_owner_upload.py](examples/data_owner_upload.py). By running the script, the documents are uploaded to the nilDB instance in secret-shared form: ```bash uv run examples/data_owner_upload.py ``` -## TEE Server -Start the TEE server with a specific config file: - -```bash -# Using default config (tee_nildb_config.json) -uv run examples/launch_tee.py - -# Using a custom config file -uv run examples/launch_tee.py -c custom_nildb_config.json -# or -uv run examples/launch_tee.py --config path/to/config.json -``` - -The server will start on http://0.0.0.0:8000 with: -- API documentation at `/docs` +## 3. nilAI (TEE Server) +If you would like access to nilAI contact Nillion or open an issue in +https://github.com/NillionNetwork/nilrag. -## Client query -After having nilDB initialized, documents uploaded, and the TEE server running, the client can query the nilDB instance. We provide an example of how to do this in [examples/client_query.py](examples/client_query.py). +## 4. Client Query +After having nilDB initialized, documents uploaded, and access to nilAI, the +client can query the nilDB instance. We provide an example of how to do this in +[examples/client_query.py](examples/client_query.py). -By running the script, the client's query is sent to the TEE server and the response is returned: +By running the script, the client's query is sent to nilAI and the response is +returned: ```bash uv run examples/client_query.py ``` @@ -160,46 +143,3 @@ uv run examples/client_query.py # Run a specific test file uv run -m unittest test.rag ``` - -You can also add verbose output with -v: -```bash -uv run -m unittest test.rag -v -``` - -## Project Structure -``` -nilrag/ -├── src/ -│ └── nilrag/ -│ ├── __init__.py # Package exports -│ ├── __main__.py -│ ├── app.py # FastAPI application and TEE server -│ ├── nildb.py # NilDB and Node classes -│ └── util.py # Utility functions for RAG -├── test/ -│ ├── __init__.py -│ └── rag.py # Test suite for RAG functionality -├── examples/ -| ├── data/ -| │ └── cities.txt # Sample data for testing -│ ├── client_query.py # Client query example -│ ├── data_owner_init.py # Data owner initialization example -│ ├── data_owner_upload.py # Data owner upload example -| ├── launch_tee.py # TEE server launcher -│ ├── nildb_config.json # Example config with initialized nodes -│ ├── tee_nildb_config.json -│ └── uninitialized_nildb_config.json -├── scripts/ # Utility scripts -├── pyproject.toml # Project metadata and dependencies -├── requirements.txt # Pinned dependencies -├── uv.lock # UV lockfile -└── README.md # Documentation -``` - -### Key Components: -- `src/nilrag/app.py`: TEE server implementation with FastAPI -- `src/nilrag/nildb.py`: Core NilDB interaction logic -- `src/nilrag/util.py`: RAG utilities (embeddings, chunking, rational encoding) -- `test/rag.py`: Test suite for RAG functionality -- `examples`: Script examples for data owner, client, and TEE server -- `scripts`: Utility scripts examples for maintenance and initialization diff --git a/examples/client_query.py b/examples/client_query.py index 40308bc..85842c6 100644 --- a/examples/client_query.py +++ b/examples/client_query.py @@ -1,53 +1,49 @@ -import requests +""" +Example of querying nilDB with NilAI using nilRAG. +""" + +import os +import sys import json +from nilrag.nildb_requests import NilDB, Node + + +JSON_FILE = "examples/query_nildb_config.json" + +# Load NilDB from JSON file if it exists +if os.path.exists(JSON_FILE): + print("Loading NilDB configuration from file...") + with open(JSON_FILE, "r", encoding="utf-8") as f: + data = json.load(f) + nodes = [] + for node_data in data["nodes"]: + nodes.append( + Node( + url=node_data["url"], + node_id=None, + org=None, + bearer_token=node_data.get("bearer_token"), + schema_id=node_data.get("schema_id"), + diff_query_id=node_data.get("diff_query_id"), + ) + ) + nilDB = NilDB(nodes) +else: + print("Error: NilDB configuration file not found.") + sys.exit(1) -def query_rag_system(query_text: str, base_url: str = "http://localhost:8000") -> dict: - """ - Send a query to the RAG system's FastAPI endpoint. - - Args: - query_text (str): The query text to send - base_url (str): Base URL of the FastAPI server - - Returns: - dict: The response from the server - """ - # Construct the endpoint URL - endpoint = f"{base_url}/process-client-query" - - # Prepare the request payload - payload = { - "query": query_text - } - - # Set headers for JSON content - headers = { - "accept": "application/json", - "Content-Type": "application/json" - } - - try: - # Send POST request - response = requests.post(endpoint, json=payload, headers=headers) - - # Raise an exception for bad status codes - response.raise_for_status() - - # Return the JSON response - return response.json() - - except requests.exceptions.RequestException as e: - print(f"Error making request: {e}") - return None +print("NilDB instance:", nilDB) +print() -if __name__ == "__main__": - # Example query - query = "Tell me about places in Asia." - - # Make the request - result = query_rag_system(query) - - # Print the result - if result: - print("Response from server:") - print(json.dumps(result, indent=2)) \ No newline at end of file +print('Query nilAI with nilRAG...') +response = nilDB.nilai_chat_completion( + nilai_url="http://127.0.0.1:8080/", + token="1770c101-dd83-4fbc-b996-ef8121889172", + messages=[ + {"role": "user", "content": "Tell me about Asia."} + ], + temperature=0.2, + max_tokens=2048, + stream=False, +) +print(response) diff --git a/examples/data_owner_upload.py b/examples/data_owner_upload.py index df78659..64d3640 100644 --- a/examples/data_owner_upload.py +++ b/examples/data_owner_upload.py @@ -1,29 +1,47 @@ +""" +Script to upload data to nilDB using nilRAG. +""" + import os -import nilql import json -from nilrag.util import create_chunks, encrypt_float_list, generate_embeddings_huggingface, load_file +import sys +import nilql +from nilrag.util import ( + create_chunks, + encrypt_float_list, + generate_embeddings_huggingface, + load_file, +) from nilrag.nildb_requests import NilDB, Node -json_file = "examples/nildb_config.json" +JSON_FILE = "examples/nildb_config.json" +# Update with your secret key +SECRET_KEY = "XXXXXXXXXXXXXXXXXXXXXXXX" +FILE_PATH = 'examples/data/cities.txt' # Load NilDB from JSON file if it exists -if os.path.exists(json_file): +if os.path.exists(JSON_FILE): print("Loading NilDB configuration from file...") - with open(json_file, "r") as f: + with open(JSON_FILE, "r", encoding="utf-8") as f: data = json.load(f) nodes = [] for node_data in data["nodes"]: nodes.append( - Node(node_data["url"], node_data["node_id"], node_data["org"], None, node_data.get("schema_id")) + Node( + node_data["url"], + node_data["node_id"], + node_data["org"], + None, + node_data.get("schema_id"), + ) ) nilDB = NilDB(nodes) else: print("Error: NilDB configuration file not found.") - exit(1) + sys.exit(1) -secret_key = "add_here_your_secret_key" -nilDB.generate_jwt(secret_key) +nilDB.generate_jwt(SECRET_KEY, ttl=100000000) print("NilDB instance:", nilDB) print() @@ -34,8 +52,7 @@ xor_key = nilql.secret_key({'nodes': [{}] * num_nodes}, {'store': True}) # Load and process input file -file_path = 'examples/data/cities.txt' -paragraphs = load_file(file_path) +paragraphs = load_file(FILE_PATH) chunks = create_chunks(paragraphs, chunk_size=50, overlap=10) # Generate embeddings @@ -45,7 +62,9 @@ # Encrypt chunks and embeddings chunks_shares = [nilql.encrypt(xor_key, chunk) for chunk in chunks] -embeddings_shares = [encrypt_float_list(additive_key, embedding) for embedding in embeddings] +embeddings_shares = [ + encrypt_float_list(additive_key, embedding) for embedding in embeddings +] # Upload encrypted data to nilDB print('Uploading data...') diff --git a/examples/query_nildb_config.json b/examples/query_nildb_config.json new file mode 100644 index 0000000..ca525c2 --- /dev/null +++ b/examples/query_nildb_config.json @@ -0,0 +1,22 @@ +{ + "nodes": [ + { + "url": "https://nildb-node-a50d.sandbox.app-cluster.sandbox.nilogy.xyz/api/v1", + "bearer_token": "XXXXXXXXXXXXXXXX", + "schema_id": "6aa651af-7762-4aaa-9089-82f8eab16201", + "diff_query_id": "dfcee886-231d-4a9d-9bdd-857f74a72964" + }, + { + "url": "https://nildb-node-dvml.sandbox.app-cluster.sandbox.nilogy.xyz/api/v1", + "bearer_token": "XXXXXXXXXXXXXXXX", + "schema_id": "6aa651af-7762-4aaa-9089-82f8eab16201", + "diff_query_id": "dfcee886-231d-4a9d-9bdd-857f74a72964" + }, + { + "url": "https://nildb-node-guue.sandbox.app-cluster.sandbox.nilogy.xyz/api/v1", + "bearer_token": "XXXXXXXXXXXXXXXX", + "schema_id": "6aa651af-7762-4aaa-9089-82f8eab16201", + "diff_query_id": "dfcee886-231d-4a9d-9bdd-857f74a72964" + } + ] +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 36c330c..5cd6923 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,13 @@ dependencies = [ "sentence-transformers>=3.3.1", ] +[dependency-groups] +dev = [ + "black>=24.10.0", + "isort>=5.13.2", + "pylint>=3.3.3", +] + [tool.setuptools.packages.find] where = ["src"] include = ["nilrag*"] diff --git a/src/nilrag/__init__.py b/src/nilrag/__init__.py index ee6bf01..af99511 100644 --- a/src/nilrag/__init__.py +++ b/src/nilrag/__init__.py @@ -1,21 +1,13 @@ -from .util import ( # noqa: F401 - load_file, - create_chunks, - generate_embeddings_huggingface, - euclidean_distance, - find_closest_chunks, - group_shares_by_id, - to_fixed_point, - from_fixed_point, - encrypt_float_list, - decrypt_float_list, - encrypt_string_list, - decrypt_string_list, -) +""" +nilRAG init file. +""" -from .nildb_requests import ( # noqa: F401 - Node, - NilDB, -) +from .nildb_requests import NilDB, Node # noqa: F401 +from .util import decrypt_float_list # noqa: F401 +from .util import (create_chunks, decrypt_string_list, encrypt_float_list, + encrypt_string_list, euclidean_distance, + find_closest_chunks, from_fixed_point, + generate_embeddings_huggingface, group_shares_by_id, + load_file, to_fixed_point) -__version__ = "0.1.0" \ No newline at end of file +__version__ = "0.1.0" diff --git a/src/nilrag/nildb_requests.py b/src/nilrag/nildb_requests.py index d5f96c3..31be314 100644 --- a/src/nilrag/nildb_requests.py +++ b/src/nilrag/nildb_requests.py @@ -1,12 +1,19 @@ -import requests -import json +""" +nilDB class definition for secure data storage and RAG inference. +""" + import base64 -import jwt +import json import time -from ecdsa import SigningKey, SECP256k1 +from typing import Optional from uuid import uuid4 -class Node: +import jwt +import requests +from ecdsa import SECP256k1, SigningKey + + +class Node: # pylint: disable=too-few-public-methods """ Represents a node in the NilDB network. @@ -22,7 +29,17 @@ class Node: diff_query_id (str, optional): ID of the differences query for this node """ - def __init__(self, url, node_id, org, bearer_token=None, schema_id=None, diff_query_id=None): + def __init__( + # pylint: disable=too-many-positional-arguments + # pylint: disable=too-many-arguments + self, + url: str, + node_id: Optional[str] = None, + org: Optional[str] = None, + bearer_token: Optional[str] = None, + schema_id: Optional[str] = None, + diff_query_id: Optional[str] = None, + ): """ Initialize a new Node instance. @@ -33,14 +50,13 @@ def __init__(self, url, node_id, org, bearer_token=None, schema_id=None, diff_qu schema_id (str, optional): Associated schema ID diff_query_id (str, optional): Associated differences query ID """ - self.url = url[:-1] if url.endswith('/') else url - self.node_id = str(node_id) - self.org = str(org) + self.url = url[:-1] if url.endswith("/") else url + self.node_id = node_id + self.org = org self.bearer_token = bearer_token self.schema_id = schema_id self.diff_query_id = diff_query_id - def __repr__(self): """ Returns a string representation of the Node instance. @@ -55,6 +71,7 @@ def __repr__(self): \nSchema ID: {self.schema_id}\ \nDifferences Query ID: {self.diff_query_id}" + class NilDB: """ A class to manage distributed nilDB nodes for secure data storage and retrieval. @@ -66,7 +83,7 @@ class NilDB: nodes (list): List of Node instances representing the distributed nilDB nodes """ - def __init__(self, nodes): + def __init__(self, nodes: list[Node]): """ Initialize NilDB with a list of nilDB nodes. @@ -77,9 +94,11 @@ def __init__(self, nodes): def __repr__(self): """Return string representation of NilDB showing all nodes.""" - return "\n".join(f"\nNode({i}):\n{repr(node)}" for i, node in enumerate(self.nodes)) + return "\n".join( + f"\nNode({i}):\n{repr(node)}" for i, node in enumerate(self.nodes) + ) - def init_schema(self): + def _init_schema(self): """ Initialize the nilDB schema across all nodes. @@ -90,14 +109,16 @@ def init_schema(self): Raises: ValueError: If schema creation fails on any nilDB node """ - schema_id = str(uuid4()) # the schema_id is assumed to be the same across different nildb instances + schema_id = str( + uuid4() + ) # the schema_id is assumed to be the same across different nildb instances for node in self.nodes: node.schema_id = schema_id url = node.url + "/schemas" headers = { "Authorization": "Bearer " + str(node.bearer_token), - "Content-Type": "application/json" + "Content-Type": "application/json", } payload = { "_id": schema_id, @@ -111,35 +132,32 @@ def init_schema(self): "items": { "type": "object", "properties": { - "_id": { - "type": "string", - "format": "uuid", - "coerce": True - }, + "_id": {"type": "string", "format": "uuid", "coerce": True}, "embedding": { "description": "Chunks embeddings", "type": "array", - "items": { - "type": "integer" - } + "items": {"type": "integer"}, }, "chunk": { "type": "string", - "description": "Chunks of text inserted by the user" - } + "description": "Chunks of text inserted by the user", + }, }, "required": ["_id", "embedding", "chunk"], - "additionalProperties": False - } - } + "additionalProperties": False, + }, + }, } - response = requests.post(url, headers=headers, data=json.dumps(payload)) + response = requests.post( + url, headers=headers, data=json.dumps(payload), timeout=3600 + ) if response.status_code != 200: - raise ValueError(f"Error in POST request: {response.status_code}, {response.text}") + raise ValueError( + f"Error in POST request: {response.status_code}, {response.text}" + ) print("Response JSON:", response.json()) - - def init_diff_query(self): + def _init_diff_query(self): """ Initialize the difference query across all nilDB nodes. @@ -149,14 +167,16 @@ def init_diff_query(self): Raises: ValueError: If query creation fails on any nilDB node """ - diff_query_id = str(uuid4()) # the diff_query_id is assumed to be the same across different nildb instances + diff_query_id = str( + uuid4() + ) # the diff_query_id is assumed to be the same across different nildb instances for node in self.nodes: node.diff_query_id = diff_query_id url = node.url + "/queries" headers = { "Authorization": "Bearer " + str(node.bearer_token), - "Content-Type": "application/json" + "Content-Type": "application/json", } payload = { "_id": node.diff_query_id, @@ -167,17 +187,11 @@ def init_diff_query(self): "query_embedding": { "description": "The query embedding", "type": "array", - "items": { - "type": "number" - } + "items": {"type": "number"}, } }, "pipeline": [ - { - "$addFields": { - "query_embedding": "##query_embedding" - } - }, + {"$addFields": {"query_embedding": "##query_embedding"}}, { "$project": { "_id": 1, @@ -185,36 +199,32 @@ def init_diff_query(self): "$map": { "input": { "$zip": { - "inputs": [ - "$embedding", - "$query_embedding" - ] + "inputs": ["$embedding", "$query_embedding"] } }, "as": "pair", "in": { "$subtract": [ - { - "$arrayElemAt": ["$$pair", 0] - }, - { - "$arrayElemAt": ["$$pair", 1] - } + {"$arrayElemAt": ["$$pair", 0]}, + {"$arrayElemAt": ["$$pair", 1]}, ] - } + }, } - } + }, } - } - ] + }, + ], } - response = requests.post(url, headers=headers, data=json.dumps(payload)) + response = requests.post( + url, headers=headers, data=json.dumps(payload), timeout=3600 + ) if response.status_code != 200: - raise ValueError(f"Error in POST request: {response.status_code}, {response.text}") + raise ValueError( + f"Error in POST request: {response.status_code}, {response.text}" + ) print("Response JSON:", response.json()) - - def generate_jwt(self, secret_key, ttl=3600): + def generate_jwt(self, secret_key: str, ttl: int = 3600): """ Create JWTs signed with ES256K for multiple node_ids. @@ -232,19 +242,14 @@ def generate_jwt(self, secret_key, ttl=3600): payload = { "iss": node.org, "aud": node.node_id, - "exp": int(time.time()) + ttl + "exp": int(time.time()) + ttl, } # Create and sign the JWT - node.bearer_token = jwt.encode( - payload, - signer.to_pem(), - algorithm="ES256K" - ) + node.bearer_token = jwt.encode(payload, signer.to_pem(), algorithm="ES256K") print(f"Generated JWT for {node.node_id}: {node.bearer_token}") - - def diff_query_execute(self, nilql_query_embedding): + def diff_query_execute(self, nilql_query_embedding: list[list[bytes]]): """ Execute the difference query across all nilDB nodes. @@ -323,22 +328,24 @@ def diff_query_execute(self, nilql_query_embedding): # Authorization header with the provided token headers = { "Authorization": "Bearer " + str(node.bearer_token), - "Content-Type": "application/json" + "Content-Type": "application/json", } diff_query_id = node.diff_query_id # Schema payload payload = { - "id": str(diff_query_id), - "variables": { - "query_embedding": query_embedding_shares[i] - } + "id": str(diff_query_id), + "variables": {"query_embedding": query_embedding_shares[i]}, } # Send POST request - response = requests.post(url, headers=headers, data=json.dumps(payload)) + response = requests.post( + url, headers=headers, data=json.dumps(payload), timeout=3600 + ) if response.status_code != 200: - raise ValueError(f"Error in POST request: {response.status_code}, {response.text}") + raise ValueError( + f"Error in POST request: {response.status_code}, {response.text}" + ) try: difference_shares_party_i = response.json().get("data") if difference_shares_party_i is None: @@ -350,8 +357,7 @@ def diff_query_execute(self, nilql_query_embedding): return difference_shares - - def chunk_query_execute(self, chunk_ids): + def chunk_query_execute(self, chunk_ids: list[str]): """ Retrieve chunks by their IDs from all nilDB nodes. @@ -363,12 +369,12 @@ def chunk_query_execute(self, chunk_ids): [ # Shares from node 1 [ - { - '_id': '123e4567-e89b-12d3-a456-426614174000', # Same ID across all nodes for the same secret + { # Same ID across all nodes for the same secret + '_id': '123e4567-e89b-12d3-a456-426614174000', 'chunk': 'base64EncodedShare1ForChunk1' }, - { - '_id': '987fcdeb-51a2-43d7-9012-345678901234', # Same ID across all nodes for the same secret + { # Same ID across all nodes for the same secret + '_id': '987fcdeb-51a2-43d7-9012-345678901234', 'chunk': 'base64EncodedShare1ForChunk2' } ], @@ -413,23 +419,20 @@ def chunk_query_execute(self, chunk_ids): # Authorization header with the provided token headers = { "Authorization": "Bearer " + str(node.bearer_token), - "Content-Type": "application/json" + "Content-Type": "application/json", } # Schema payload - payload = { - "schema": node.schema_id, - "filter": { - "_id": { - "$in": chunk_ids - } - } - } + payload = {"schema": node.schema_id, "filter": {"_id": {"$in": chunk_ids}}} # Send POST request - response = requests.post(url, headers=headers, data=json.dumps(payload)) + response = requests.post( + url, headers=headers, data=json.dumps(payload), timeout=3600 + ) if response.status_code != 200: - raise ValueError(f"Error in POST request: {response.status_code}, {response.text}") + raise ValueError( + f"Error in POST request: {response.status_code}, {response.text}" + ) try: chunk_shares_party_i = response.json().get("data") if chunk_shares_party_i is None: @@ -441,18 +444,22 @@ def chunk_query_execute(self, chunk_ids): return chunk_shares - - - def upload_data(self, lst_embedding_shares, lst_chunk_shares): + def upload_data( + self, lst_embedding_shares: list[list[int]], lst_chunk_shares: list[list[bytes]] + ): """ Upload embeddings and chunks to all nilDB nodes. Args: - lst_embedding_shares (list): List of embedding shares for each document, e.g. for 3 nodes: + lst_embedding_shares (list): List of embedding shares for each document, + e.g. for 3 nodes: [ - [ # First document's embedding vector (384 dimensions) - [1234567890, 987654321, 2072745085], # First dimension split into 3 shares (sum mod 2^32) - [3141592653, 2718281828, 3435092815], # Second dimension split into 3 shares (sum mod 2^32) + # First document's embedding vector (384 dimensions) + [ + # First dimension split into 3 shares (sum mod 2^32) + [1234567890, 987654321, 2072745085], + # Second dimension split into 3 shares (sum mod 2^32) + [3141592653, 2718281828, 3435092815], # ... 382 more dimensions, each split into 3 shares ], # More documents... @@ -474,7 +481,8 @@ def upload_data(self, lst_embedding_shares, lst_chunk_shares): >>> >>> # Generate embeddings and chunks >>> chunks = create_chunks(paragraphs, chunk_size=50, overlap=10) - >>> embeddings = generate_embeddings_huggingface(chunks) # Each embedding is 384-dimensional + >>> # Each embedding is 384-dimensional + >>> embeddings = generate_embeddings_huggingface(chunks) >>> >>> # Create shares >>> chunks_shares = [nilql.encrypt(xor_key, chunk) for chunk in chunks] @@ -487,13 +495,14 @@ def upload_data(self, lst_embedding_shares, lst_chunk_shares): AssertionError: If number of embeddings and chunks don't match ValueError: If upload fails on any nilDB node """ - # lst_embeddings_shares [20][384][3] - # lst_chunks_shares [20][3][268] - # Check sizes: same number of embeddings and chunks - assert len(lst_embedding_shares) == len(lst_chunk_shares), f"Mismatch: {len(lst_embedding_shares)} embeddings vs {len(lst_chunk_shares)} chunks." + assert len(lst_embedding_shares) == len( + lst_chunk_shares + ), f"Mismatch: {len(lst_embedding_shares)} embeddings vs {len(lst_chunk_shares)} chunks." - for (embedding_shares, chunk_shares) in zip(lst_embedding_shares, lst_chunk_shares): + for embedding_shares, chunk_shares in zip( + lst_embedding_shares, lst_chunk_shares + ): # embeddings_shares [384][3] # chunks_shares [3][268] @@ -504,13 +513,15 @@ def upload_data(self, lst_embedding_shares, lst_chunk_shares): # Authorization header with the provided token headers = { "Authorization": "Bearer " + str(node.bearer_token), - "Content-Type": "application/json" + "Content-Type": "application/json", } # Join the shares of one embedding in one vector node_i_embedding_shares = [e[i] for e in embedding_shares] node_i_chunk_share = chunk_shares[i] # encode to be parsed in json - encoded_node_i_chunk_share = base64.b64encode(node_i_chunk_share).decode('utf-8') + encoded_node_i_chunk_share = base64.b64encode( + node_i_chunk_share + ).decode("utf-8") # Schema payload payload = { "schema": node.schema_id, @@ -518,20 +529,112 @@ def upload_data(self, lst_embedding_shares, lst_chunk_shares): { "_id": data_id, "embedding": node_i_embedding_shares, - "chunk": encoded_node_i_chunk_share + "chunk": encoded_node_i_chunk_share, } - ] + ], } # Send POST request - response = requests.post(url, headers=headers, data=json.dumps(payload)) + response = requests.post( + url, headers=headers, data=json.dumps(payload), timeout=3600 + ) if response.status_code != 200: - raise ValueError(f"Error in POST request: {response.status_code}, {response.text}") - else: - print( - { - "status_code": response.status_code, - "message": "Success", - "response_json": response.json() - } + raise ValueError( + f"Error in POST request: {response.status_code}, {response.text}" ) + print( + { + "status_code": response.status_code, + "message": "Success", + "response_json": response.json(), + } + ) + + def nilai_chat_completion( + # pylint: disable=too-many-positional-arguments + # pylint: disable=too-many-arguments + self, + nilai_url: str, + token: str, + messages: list[dict], + model: str = "Llama-3.2-1B-Instruct", + temperature: float = 0.7, + max_tokens: int = 2048, + stream: bool = False, + ) -> dict: + """ + Query the chat completion endpoint of the nilai API. + + Args: + nilai_url (str): Base URL for the nilai API + token (str): Bearer token for authentication + messages (list[dict]): List of message dictionaries (role and content) + model (str): AI model to use for completion (default: "Llama-3.2-1B-Instruct") + temperature (float): Sampling temperature (default: 0.7) + max_tokens (int): Maximum tokens to generate (default: 2048) + stream (bool): Whether to stream the response (default: False) + + Returns: + dict: Chat response from the nilai API + """ + # Ensure URL format + nilai_url = nilai_url.rstrip("/") + "/v1/chat/completions" + + # Authorization header + headers = { + "Authorization": f"Bearer {token}", + "accept": "application/json", + "Content-Type": "application/json", + } + + # Ensure messages include required roles + has_system = any(message.get("role") == "system" for message in messages) + has_user = any(message.get("role") == "user" for message in messages) + + if not has_system: + messages.insert( + 0, {"role": "system", "content": "You are a helpful assistant."} + ) + if not has_user: + messages.append({"role": "user", "content": "What is your name?"}) + + # Construct the `nilrag` payload + nilrag = { + "nodes": [ + { + "url": node.url, + "bearer_token": node.bearer_token, + "schema_id": node.schema_id, + "diff_query_id": node.diff_query_id, + } + for node in self.nodes + ] + } + + # Construct payload + payload = { + "model": model, + "messages": messages, + "temperature": temperature, + "max_tokens": max_tokens, + "stream": stream, + "nilrag": nilrag, + } + + try: + # Send POST request + response = requests.post( + nilai_url, headers=headers, json=payload, timeout=3600 + ) + + # Handle response + if response.status_code != 200: + raise ValueError( + f"Error in POST request: {response.status_code}, {response.text}" + ) + + return response.json() # Return the parsed JSON response + except Exception as e: + raise RuntimeError( + f"An error occurred while querying the chat completion endpoint: {str(e)}" + ) from e diff --git a/src/nilrag/util.py b/src/nilrag/util.py index 350f74c..0ff713a 100644 --- a/src/nilrag/util.py +++ b/src/nilrag/util.py @@ -1,9 +1,16 @@ -from sentence_transformers import SentenceTransformer -import numpy as np +""" +Utility functions for nilRAG. +""" + +from typing import Union + import nilql +import numpy as np +from sentence_transformers import SentenceTransformer + # Load text from file -def load_file(file_path): +def load_file(file_path: str): """ Load text from a file and split it into paragraphs. @@ -13,13 +20,15 @@ def load_file(file_path): Returns: list: List of non-empty paragraphs with whitespace stripped """ - with open(file_path, 'r', encoding='utf-8') as f: + with open(file_path, "r", encoding="utf-8") as f: text = f.read() - paragraphs = text.split('\n\n') # Split by double newline to get paragraphs - return [para.strip() for para in paragraphs if para.strip()] # Clean empty paragraphs + paragraphs = text.split("\n\n") # Split by double newline to get paragraphs + return [ + para.strip() for para in paragraphs if para.strip() + ] # Clean empty paragraphs -def create_chunks(paragraphs, chunk_size=500, overlap=100): +def create_chunks(paragraphs: list[str], chunk_size: int = 500, overlap: int = 100): """ Split paragraphs into overlapping chunks of words. @@ -35,18 +44,21 @@ def create_chunks(paragraphs, chunk_size=500, overlap=100): for para in paragraphs: words = para.split() for i in range(0, len(words), chunk_size - overlap): - chunk = ' '.join(words[i:i + chunk_size]) + chunk = " ".join(words[i : i + chunk_size]) chunks.append(chunk) return chunks -def generate_embeddings_huggingface(chunks_or_query, model_name='sentence-transformers/all-MiniLM-L6-v2'): +def generate_embeddings_huggingface( + chunks_or_query: Union[str, list], + model_name: str = "sentence-transformers/all-MiniLM-L6-v2", +): """ Generate embeddings for text using a HuggingFace sentence transformer model. Args: chunks_or_query (str or list): Text string(s) to generate embeddings for - model_name (str, optional): Name of the HuggingFace model to use. + model_name (str, optional): Name of the HuggingFace model to use. Defaults to 'sentence-transformers/all-MiniLM-L6-v2'. Returns: @@ -57,7 +69,7 @@ def generate_embeddings_huggingface(chunks_or_query, model_name='sentence-transf return embeddings -def euclidean_distance(a, b): +def euclidean_distance(a: list, b: list): """ Calculate Euclidean distance between two vectors. @@ -71,7 +83,9 @@ def euclidean_distance(a, b): return np.linalg.norm(np.array(a) - np.array(b)) -def find_closest_chunks(query_embedding, chunks, embeddings, top_k=2): +def find_closest_chunks( + query_embedding: list, chunks: list, embeddings: list, top_k: int = 2 +): """ Find chunks closest to a query embedding using Euclidean distance. @@ -88,32 +102,33 @@ def find_closest_chunks(query_embedding, chunks, embeddings, top_k=2): sorted_indices = np.argsort(distances) return [(chunks[idx], distances[idx]) for idx in sorted_indices[:top_k]] -def group_shares_by_id(shares_per_party, transform_share_fn): + +def group_shares_by_id(shares_per_party: list, transform_share_fn: callable): """ Groups shares by their ID and applies a transform function to each share. - + Args: shares_per_party (list): List of shares from each party transform_share_fn (callable): Function to transform each share value - + Returns: dict: Dictionary mapping IDs to list of transformed shares """ shares_by_id = {} for party_shares in shares_per_party: for share in party_shares: - id = share['_id'] - if id not in shares_by_id: - shares_by_id[id] = [] - shares_by_id[id].append(transform_share_fn(share)) + share_id = share["_id"] + if share_id not in shares_by_id: + shares_by_id[share_id] = [] + shares_by_id[share_id].append(transform_share_fn(share)) return shares_by_id PRECISION = 7 -SCALING_FACTOR = 10 ** PRECISION +SCALING_FACTOR = 10**PRECISION -def to_fixed_point(value): +def to_fixed_point(value: float) -> int: """ Convert a floating-point value to fixed-point representation. @@ -126,8 +141,8 @@ def to_fixed_point(value): return int(round(value * SCALING_FACTOR)) -def from_fixed_point(value): - """ +def from_fixed_point(value: int) -> float: + """s Convert a fixed-point value back to floating-point. Args: @@ -139,7 +154,7 @@ def from_fixed_point(value): return value / SCALING_FACTOR -def encrypt_float_list(sk, lst): +def encrypt_float_list(sk, lst: list[float]) -> list[list]: """ Encrypt a list of floats using a secret key. @@ -153,7 +168,7 @@ def encrypt_float_list(sk, lst): return [nilql.encrypt(sk, to_fixed_point(l)) for l in lst] -def decrypt_float_list(sk, lst): +def decrypt_float_list(sk, lst: list[list]) -> list[float]: """ Decrypt a list of encrypted fixed-point values to floats. @@ -167,7 +182,7 @@ def decrypt_float_list(sk, lst): return [from_fixed_point(nilql.decrypt(sk, l)) for l in lst] -def encrypt_string_list(sk, lst): +def encrypt_string_list(sk, lst: list) -> list: """ Encrypt a list of strings using a secret key. @@ -181,7 +196,7 @@ def encrypt_string_list(sk, lst): return [nilql.encrypt(sk, l) for l in lst] -def decrypt_string_list(sk, lst): +def decrypt_string_list(sk, lst: list) -> list: """ Decrypt a list of encrypted strings. @@ -193,22 +208,3 @@ def decrypt_string_list(sk, lst): list: List of decrypted strings """ return [nilql.decrypt(sk, l) for l in lst] - - -# if __name__ == "__main__": -# secret_key = "b1f6a40ae05a69d8fefd43af420b5ecb1a75e736eb2cce3d34eebfe9b45fb688" -# org_did = "did:nil:testnet:nillion12d545xtad899pqp6xzvvnwqdkwlz0klysxljzn" -# node_ids = [ -# "did:nil:testnet:nillion15lcjxgafgvs40rypvqu73gfvx6pkx7ugdja50d", -# "did:nil:testnet:nillion17bkjqvcqyfjdnf04hfztrh9rfkj9qfjlzjqvn2", -# "did:nil:testnet:nillion18zmcgyfjqz94lq7tfd8w4qvxdw99jfdmznd7hv" -# ] -# generate_jwt(secret_key, org_did, node_ids) - - -# nilrag.generate_jwt(secret_key, org, ttl, node_config) -# Generates and outputs new jwt -# Stores it in nilDB nodes -# nilrag.query(jwt, query_string, schema_id, query_id, tee_config, node_config) -# Generates json request for chat completion endpoint -# Calls chat completion endpoint (link with nilAI/TEE) diff --git a/uv.lock b/uv.lock index 6c9b83b..68c548c 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,18 @@ version = 1 requires-python = ">=3.11" +resolution-markers = [ + "python_full_version < '3.12'", + "python_full_version >= '3.12'", +] + +[[package]] +name = "astroid" +version = "3.3.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/80/c5/5c83c48bbf547f3dd8b587529db7cf5a265a3368b33e85e76af8ff6061d3/astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b", size = 398196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/28/0bc8a17d6cd4cc3c79ae41b7105a2b9a327c110e5ddd37a8a27b29a5c8a2/astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c", size = 275153 }, +] [[package]] name = "bcl" @@ -29,6 +42,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/45/302d6712a8ff733a259446a7d24ff3c868715103032f50eef0d93ba70221/bcl-2.3.1-cp39-abi3-win_amd64.whl", hash = "sha256:52cf26c4ecd76e806c6576c4848633ff44ebfff528fca63ad0e52085b6ba5aa9", size = 96394 }, ] +[[package]] +name = "black" +version = "24.10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "mypy-extensions" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d8/0d/cc2fb42b8c50d80143221515dd7e4766995bd07c56c9a3ed30baf080b6dc/black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875", size = 645813 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/cc/7496bb63a9b06a954d3d0ac9fe7a73f3bf1cd92d7a58877c27f4ad1e9d41/black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad", size = 1607468 }, + { url = "https://files.pythonhosted.org/packages/2b/e3/69a738fb5ba18b5422f50b4f143544c664d7da40f09c13969b2fd52900e0/black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50", size = 1437270 }, + { url = "https://files.pythonhosted.org/packages/c9/9b/2db8045b45844665c720dcfe292fdaf2e49825810c0103e1191515fc101a/black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392", size = 1737061 }, + { url = "https://files.pythonhosted.org/packages/a3/95/17d4a09a5be5f8c65aa4a361444d95edc45def0de887810f508d3f65db7a/black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175", size = 1423293 }, + { url = "https://files.pythonhosted.org/packages/90/04/bf74c71f592bcd761610bbf67e23e6a3cff824780761f536512437f1e655/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3", size = 1644256 }, + { url = "https://files.pythonhosted.org/packages/4c/ea/a77bab4cf1887f4b2e0bce5516ea0b3ff7d04ba96af21d65024629afedb6/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65", size = 1448534 }, + { url = "https://files.pythonhosted.org/packages/4e/3e/443ef8bc1fbda78e61f79157f303893f3fddf19ca3c8989b163eb3469a12/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f", size = 1761892 }, + { url = "https://files.pythonhosted.org/packages/52/93/eac95ff229049a6901bc84fec6908a5124b8a0b7c26ea766b3b8a5debd22/black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8", size = 1434796 }, + { url = "https://files.pythonhosted.org/packages/d0/a0/a993f58d4ecfba035e61fca4e9f64a2ecae838fc9f33ab798c62173ed75c/black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981", size = 1643986 }, + { url = "https://files.pythonhosted.org/packages/37/d5/602d0ef5dfcace3fb4f79c436762f130abd9ee8d950fa2abdbf8bbc555e0/black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b", size = 1448085 }, + { url = "https://files.pythonhosted.org/packages/47/6d/a3a239e938960df1a662b93d6230d4f3e9b4a22982d060fc38c42f45a56b/black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2", size = 1760928 }, + { url = "https://files.pythonhosted.org/packages/dd/cf/af018e13b0eddfb434df4d9cd1b2b7892bab119f7a20123e93f6910982e8/black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b", size = 1436875 }, + { url = "https://files.pythonhosted.org/packages/8d/a7/4b27c50537ebca8bec139b872861f9d2bf501c5ec51fcf897cb924d9e264/black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d", size = 206898 }, +] + [[package]] name = "certifi" version = "2024.8.30" @@ -137,6 +178,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 }, ] +[[package]] +name = "click" +version = "8.1.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -146,6 +199,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, ] +[[package]] +name = "dill" +version = "0.3.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/70/43/86fe3f9e130c4137b0f1b50784dd70a5087b911fe07fa81e53e0c4c47fea/dill-0.3.9.tar.gz", hash = "sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c", size = 187000 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/d1/e73b6ad76f0b1fb7f23c35c6d95dbc506a9c8804f43dda8cb5b0fa6331fd/dill-0.3.9-py3-none-any.whl", hash = "sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a", size = 119418 }, +] + [[package]] name = "ecdsa" version = "0.19.0" @@ -203,6 +265,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, ] +[[package]] +name = "isort" +version = "5.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/87/f9/c1eb8635a24e87ade2efce21e3ce8cd6b8630bb685ddc9cdaca1349b2eb5/isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109", size = 175303 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/b3/8def84f539e7d2289a02f0524b944b15d7c75dab7628bedf1c4f0992029c/isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6", size = 92310 }, +] + [[package]] name = "jinja2" version = "3.1.4" @@ -272,6 +343,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 }, ] +[[package]] +name = "mccabe" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/ff/0ffefdcac38932a54d2b5eed4e0ba8a408f215002cd178ad1df0f2806ff8/mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", size = 9658 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e", size = 7350 }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -281,6 +361,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 }, ] +[[package]] +name = "mypy-extensions" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/a4/1ab47638b92648243faf97a5aeb6ea83059cc3624972ab6b8d2316078d3f/mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782", size = 4433 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/e2/5d3f6ada4297caebe1a2add3b126fe800c96f56dbe5d1988a2cbe0b267aa/mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", size = 4695 }, +] + [[package]] name = "networkx" version = "3.4.2" @@ -314,6 +403,13 @@ dependencies = [ { name = "sentence-transformers" }, ] +[package.dependency-groups] +dev = [ + { name = "black" }, + { name = "isort" }, + { name = "pylint" }, +] + [package.metadata] requires-dist = [ { name = "ecdsa", specifier = ">=0.19.0" }, @@ -323,6 +419,13 @@ requires-dist = [ { name = "sentence-transformers", specifier = ">=3.3.1" }, ] +[package.metadata.dependency-groups] +dev = [ + { name = "black", specifier = ">=24.10.0" }, + { name = "isort", specifier = ">=5.13.2" }, + { name = "pylint", specifier = ">=3.3.3" }, +] + [[package]] name = "numpy" version = "2.2.0" @@ -500,6 +603,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, ] +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, +] + [[package]] name = "pillow" version = "11.0.0" @@ -549,6 +661,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/85/9c33f2517add612e17f3381aee7c4072779130c634921a756c97bc29fb49/pillow-11.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3", size = 2256828 }, ] +[[package]] +name = "platformdirs" +version = "4.3.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/fc/128cc9cb8f03208bdbf93d3aa862e16d376844a14f9a0ce5cf4507372de4/platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907", size = 21302 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 }, +] + [[package]] name = "pycparser" version = "2.22" @@ -567,6 +688,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 }, ] +[[package]] +name = "pylint" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "astroid" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "dill" }, + { name = "isort" }, + { name = "mccabe" }, + { name = "platformdirs" }, + { name = "tomlkit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/fd/e9a739afac274a39596bbe562e9d966db6f3917fdb2bd7322ffc56da0ba2/pylint-3.3.3.tar.gz", hash = "sha256:07c607523b17e6d16e2ae0d7ef59602e332caa762af64203c24b41c27139f36a", size = 1516550 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/e1/26d55acea92b1ea4d33672e48f09ceeb274e84d7d542a4fb9a32a556db46/pylint-3.3.3-py3-none-any.whl", hash = "sha256:26e271a2bc8bce0fc23833805a9076dd9b4d5194e2a02164942cb3cdc37b4183", size = 521918 }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -862,6 +1001,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/69/d21eb253fa91622da25585d362a874fa4710be600f0ea9446d8d0217cec1/tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", size = 2389192 }, ] +[[package]] +name = "tomlkit" +version = "0.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b1/09/a439bec5888f00a54b8b9f05fa94d7f901d6735ef4e55dcec9bc37b5d8fa/tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79", size = 192885 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/b6/a447b5e4ec71e13871be01ba81f5dfc9d0af7e473da256ff46bc0e24026f/tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde", size = 37955 }, +] + [[package]] name = "torch" version = "2.5.1"