Merge branch 'dev' of github.com:blakeblackshear/frigate into testing

ivanjx · Oct 11, 2024 · b2f8f08 · b2f8f08
2 parents 66c2d04 + ee8091b
commit b2f8f08
Show file tree

Hide file tree

Showing 34 changed files with 1,065 additions and 672 deletions.
diff --git a/.cspell/frigate-dictionary.txt b/.cspell/frigate-dictionary.txt
@@ -212,6 +212,7 @@ rcond
 RDONLY
 rebranded
 referer
+reindex
 Reolink
 restream
 restreamed

diff --git a/docker/main/Dockerfile b/docker/main/Dockerfile
@@ -180,9 +180,6 @@ RUN /build_pysqlite3.sh
 COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
 RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt
 
-COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt
-RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt
-
 
 # Collect deps in a single layer
 FROM scratch AS deps-rootfs
@@ -225,14 +222,6 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
     python3 -m pip install --upgrade pip && \
     pip3 install -U /deps/wheels/*.whl
 
-# We have to uninstall this dependency specifically
-# as it will break onnxruntime-openvino
-RUN pip3 uninstall -y onnxruntime
-
-RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \
-    python3 -m pip install --upgrade pip && \
-    pip3 install -U /deps/wheels/*.whl
-
 COPY --from=deps-rootfs / /
 
 RUN ldconfig

diff --git a/docker/main/requirements-wheels-post.txt b/docker/main/requirements-wheels-post.txt
diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt
@@ -30,11 +30,12 @@ norfair == 2.2.*
 setproctitle == 1.3.*
 ws4py == 0.5.*
 unidecode == 1.3.*
-# OpenVino (ONNX installed in wheels-post)
+# OpenVino & ONNX
 openvino == 2024.3.*
+onnxruntime-openvino == 1.19.* ; platform_machine == 'x86_64'
+onnxruntime == 1.19.* ; platform_machine == 'aarch64'
 # Embeddings
 transformers == 4.45.*
-onnx_clip == 4.0.*
 # Generative AI
 google-generativeai == 0.8.*
 ollama == 0.3.*

diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md
@@ -518,6 +518,10 @@ semantic_search:
   enabled: False
   # Optional: Re-index embeddings database from historical tracked objects (default: shown below)
   reindex: False
+  # Optional: Set device used to run embeddings, options are AUTO, CPU, GPU. (default: shown below)
+  device: "AUTO"
+  # Optional: Set the model size used for embeddings. (default: shown below)
+  model_size: "small"
 
 # Optional: Configuration for AI generated tracked object descriptions
 # NOTE: Semantic Search must be enabled for this to do anything.

diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md
@@ -5,7 +5,7 @@ title: Using Semantic Search
 
 Semantic Search in Frigate allows you to find tracked objects within your review items using either the image itself, a user-defined text description, or an automatically generated one. This feature works by creating _embeddings_ — numerical vector representations — for both the images and text descriptions of your tracked objects. By comparing these embeddings, Frigate assesses their similarities to deliver relevant search results.
 
-Frigate has support for two models to create embeddings, both of which run locally: [OpenAI CLIP](https://openai.com/research/clip) and [all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). Embeddings are then saved to Frigate's database.
+Frigate has support for [Jina AI's CLIP model](https://huggingface.co/jinaai/jina-clip-v1) to create embeddings, which runs locally. Embeddings are then saved to Frigate's database.
 
 Semantic Search is accessed via the _Explore_ view in the Frigate UI.
 
@@ -27,13 +27,27 @@ If you are enabling the Search feature for the first time, be advised that Friga
 
 :::
 
-### OpenAI CLIP
+### Jina AI CLIP
 
-This model is able to embed both images and text into the same vector space, which allows `image -> image` and `text -> image` similarity searches. Frigate uses this model on tracked objects to encode the thumbnail image and store it in the database. When searching for tracked objects via text in the search box, Frigate will perform a `text -> image` similarity search against this embedding. When clicking "Find Similar" in the tracked object detail pane, Frigate will perform an `image -> image` similarity search to retrieve the closest matching thumbnails.
+:::tip
+
+The CLIP models are downloaded in ONNX format, which means they will be accelerated using GPU hardware when available. This depends on the Docker build that is used. See [the object detector docs](../configuration/object_detectors.md) for more information.
+
+:::
+
+The vision model is able to embed both images and text into the same vector space, which allows `image -> image` and `text -> image` similarity searches. Frigate uses this model on tracked objects to encode the thumbnail image and store it in the database. When searching for tracked objects via text in the search box, Frigate will perform a `text -> image` similarity search against this embedding. When clicking "Find Similar" in the tracked object detail pane, Frigate will perform an `image -> image` similarity search to retrieve the closest matching thumbnails.
+
+The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions.
 
-### all-MiniLM-L6-v2
+Differently weighted CLIP models are available and can be selected by setting the `model_size` config option:
+
+```yaml
+semantic_search:
+  enabled: True
+  model_size: small
+```
 
-This is a sentence embedding model that has been fine tuned on over 1 billion sentence pairs. This model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions.
+Using `large` as the model size setting employs the full Jina model appropriate for high performance systems running a GPU. The `small` size uses a quantized version of the model that uses much less RAM and runs faster on CPU with a very negligible difference in embedding quality. Most users will not need to change this setting from the default of `small`.
 
 ## Usage
 

diff --git a/frigate/api/defs/events_body.py b/frigate/api/defs/events_body.py
@@ -11,9 +11,7 @@ class EventsSubLabelBody(BaseModel):
 
 
 class EventsDescriptionBody(BaseModel):
-    description: Union[str, None] = Field(
-        title="The description of the event", min_length=1
-    )
+    description: Union[str, None] = Field(title="The description of the event")
 
 
 class EventsCreateBody(BaseModel):

diff --git a/frigate/api/event.py b/frigate/api/event.py
@@ -472,7 +472,7 @@ def events_search(request: Request, params: EventsSearchQueryParams = Depends())
                 status_code=404,
             )
 
-        thumb_result = context.embeddings.search_thumbnail(search_event)
+        thumb_result = context.search_thumbnail(search_event)
         thumb_ids = dict(
             zip(
                 [result[0] for result in thumb_result],
@@ -487,7 +487,7 @@ def events_search(request: Request, params: EventsSearchQueryParams = Depends())
         search_types = search_type.split(",")
 
         if "thumbnail" in search_types:
-            thumb_result = context.embeddings.search_thumbnail(query)
+            thumb_result = context.search_thumbnail(query)
             thumb_ids = dict(
                 zip(
                     [result[0] for result in thumb_result],
@@ -504,7 +504,7 @@ def events_search(request: Request, params: EventsSearchQueryParams = Depends())
             )
 
         if "description" in search_types:
-            desc_result = context.embeddings.search_description(query)
+            desc_result = context.search_description(query)
             desc_ids = dict(
                 zip(
                     [result[0] for result in desc_result],
@@ -927,27 +927,19 @@ def set_description(
 
     new_description = body.description
 
-    if new_description is None or len(new_description) == 0:
-        return JSONResponse(
-            content=(
-                {
-                    "success": False,
-                    "message": "description cannot be empty",
-                }
-            ),
-            status_code=400,
-        )
-
     event.data["description"] = new_description
     event.save()
 
     # If semantic search is enabled, update the index
     if request.app.frigate_config.semantic_search.enabled:
         context: EmbeddingsContext = request.app.embeddings
-        context.embeddings.upsert_description(
-            event_id=event_id,
-            description=new_description,
-        )
+        if len(new_description) > 0:
+            context.update_description(
+                event_id,
+                new_description,
+            )
+        else:
+            context.db.delete_embeddings_description(event_ids=[event_id])
 
     response_message = (
         f"Event {event_id} description is now blank"
@@ -1033,8 +1025,8 @@ def delete_event(request: Request, event_id: str):
     # If semantic search is enabled, update the index
     if request.app.frigate_config.semantic_search.enabled:
         context: EmbeddingsContext = request.app.embeddings
-        context.embeddings.delete_thumbnail(id=[event_id])
-        context.embeddings.delete_description(id=[event_id])
+        context.db.delete_embeddings_thumbnail(event_ids=[event_id])
+        context.db.delete_embeddings_description(event_ids=[event_id])
     return JSONResponse(
         content=({"success": True, "message": "Event " + event_id + " deleted"}),
         status_code=200,

diff --git a/frigate/app.py b/frigate/app.py
@@ -581,12 +581,12 @@ def start(self) -> None:
         self.init_recording_manager()
         self.init_review_segment_manager()
         self.init_go2rtc()
+        self.start_detectors()
+        self.init_embeddings_manager()
         self.bind_database()
         self.check_db_data_migrations()
         self.init_inter_process_communicator()
         self.init_dispatcher()
-        self.start_detectors()
-        self.init_embeddings_manager()
         self.init_embeddings_client()
         self.start_video_output_processor()
         self.start_ptz_autotracker()
@@ -699,7 +699,7 @@ def stop(self) -> None:
 
         # Save embeddings stats to disk
         if self.embeddings:
-            self.embeddings.save_stats()
+            self.embeddings.stop()
 
         # Stop Communicators
         self.inter_process_communicator.stop()