OpenTrafficCam · randy-seng · Oct 29, 2024 · Sep 20, 2024 · Sep 24, 2024 · Oct 1, 2024
diff --git a/OTVision/detect/yolo.py b/OTVision/detect/yolo.py
@@ -25,7 +25,10 @@
 from time import perf_counter
 from typing import Generator
 
+import av
+import numpy
 import torch
+from numpy import ndarray
 from tqdm import tqdm
 from ultralytics import YOLO as YOLOv8
 from ultralytics.engine.results import Boxes, Results
@@ -45,6 +48,8 @@
 from OTVision.helpers.log import LOGGER_NAME
 from OTVision.track.preprocess import Detection
 
+DISPLAYMATRIX = "DISPLAYMATRIX"
+
 log = logging.getLogger(LOGGER_NAME)
 
 
@@ -74,6 +79,29 @@ def detect(self, video: Path) -> list[list[Detection]]:
         pass
 
 
+def rotate(array: ndarray, side_data: dict) -> ndarray:
+    """
+    Rotate a numpy array using the DISPLAYMATRIX rotation angle defined in side_data.
+
+    Args:
+        array: to rotate
+        side_data: metadata dictionary to read the angle from
+
+    Returns: rotated array
+
+    """
+    if DISPLAYMATRIX in side_data:
+        angle = side_data[DISPLAYMATRIX]
+        if angle % 90 != 0:
+            raise ValueError(
+                f"Rotation angle must be multiple of 90 degrees, but is {angle}"
+            )
+        rotation = angle / 90
+        rotated_image = numpy.rot90(array, rotation)
+        return rotated_image
+    return array
+
+
 class Yolov8(ObjectDetection):
     """Wrapper to YOLOv8 object detection model.
 
@@ -145,17 +173,25 @@ def _load_model(self) -> YOLOv8:
         return model
 
     def _predict(self, video: Path) -> Generator[Results, None, None]:
-        return self.model.predict(
-            source=video,
-            conf=self.confidence,
-            iou=self.iou,
-            half=self.half_precision,
-            imgsz=self.img_size,
-            device=0 if torch.cuda.is_available() else "cpu",
-            stream=True,
-            verbose=False,
-            agnostic_nms=True,
-        )
+        with av.open(str(video.absolute())) as container:
+            container.streams.video[0].thread_type = "AUTO"
+            side_data = container.streams.video[0].side_data
+            for frame in container.decode(video=0):
+                ndarray = frame.to_ndarray(format="rgb24")
+                rotated_image = rotate(ndarray, side_data)
+                results = self.model.predict(
+                    source=rotated_image,
+                    conf=self.confidence,
+                    iou=self.iou,
+                    half=self.half_precision,
+                    imgsz=self.img_size,
+                    device=0 if torch.cuda.is_available() else "cpu",
+                    stream=False,
+                    verbose=False,
+                    agnostic_nms=True,
+                )
+                for result in results:
+                    yield result
 
     def _parse_detections(self, detection_result: Boxes) -> list[Detection]:
         bboxes = detection_result.xywhn if self.normalized else detection_result.xywh

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
+av==13.0.0
 geopandas==1.0.1
 ijson==3.3.0
 moviepy==1.0.3

diff --git a/tests/data/detect/rotated-Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.mp4 b/tests/data/detect/rotated-Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.mp4
diff --git a/tests/detect/detect_test.py b/tests/detect/detect_test.py
@@ -4,7 +4,6 @@
 import os
 import platform
 import shutil
-import subprocess
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
@@ -14,7 +13,6 @@
 
 import OTVision.config as config
 from OTVision.config import DEFAULT_EXPECTED_DURATION
-from OTVision.convert.convert import _get_ffmpeg_command, convert
 from OTVision.dataformat import (
     CLASS,
     CONFIDENCE,
@@ -37,6 +35,15 @@
 from OTVision.detect.yolo import Yolov8, loadmodel
 from tests.conftest import YieldFixture
 
+CYCLIST_VIDEO_LENGTH = timedelta(seconds=3)
+DEVIATION = 0.22
+BICYCLE_UPPER_LIMIT = int(60 * (1 + DEVIATION))
+PERSON_UPPER_LIMIT = int(120 * (1 + DEVIATION))
+CAR_UPPER_LIMIT = int(120 * (1 + DEVIATION))
+BICYCLE_LOWER_LIMIT = int(60 * (1 - DEVIATION))
+PERSON_LOWER_LIMIT = int(120 * (1 - DEVIATION))
+CAR_LOWER_LIMIT = int(120 * (1 - DEVIATION))
+
 CAR = "car"
 TRUCK = "truck"
 PERSON = "person"
@@ -177,6 +184,15 @@ def cyclist_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path:
     return dest
 
 
+@pytest.fixture(scope="module")
+def rotated_cyclist_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path:
+    file_name = "rotated-Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.mp4"
+    src = detect_test_data_dir / file_name
+    dest = detect_test_tmp_dir / file_name
+    shutil.copy2(src, dest)
+    return dest
+
+
 @pytest.fixture(scope="module")
 def truck_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path:
     file_name = "Testvideo_Cars-Truck_FR20_2020-01-01_00-00-00.mp4"
@@ -390,71 +406,47 @@ def test_detect_overwrite(
     def test_detect_fulfill_minimum_detection_requirements(
         self, yolov8m: Yolov8, cyclist_mp4: Path
     ) -> None:
-        deviation = 0.2
-
-        class_counts = self._get_detection_counts_for(cyclist_mp4, yolov8m)
+        class_counts = self._get_detection_counts_for(
+            cyclist_mp4, yolov8m, CYCLIST_VIDEO_LENGTH
+        )
 
-        assert class_counts[CAR] >= 120 * (1 - deviation)
-        assert class_counts[PERSON] >= 120 * (1 - deviation)
-        assert class_counts[BICYCLE] >= 60 * (1 - deviation)
-        assert class_counts[CAR] <= 120 * (1 + deviation)
-        assert class_counts[PERSON] <= 120 * (1 + deviation)
-        assert class_counts[BICYCLE] <= 60 * (1 + deviation)
+        assert class_counts[CAR] >= CAR_LOWER_LIMIT
+        assert class_counts[PERSON] >= PERSON_LOWER_LIMIT
+        assert class_counts[BICYCLE] >= BICYCLE_LOWER_LIMIT
+        assert class_counts[CAR] <= CAR_UPPER_LIMIT
+        assert class_counts[PERSON] <= PERSON_UPPER_LIMIT
+        assert class_counts[BICYCLE] <= BICYCLE_UPPER_LIMIT
 
     def test_detection_in_rotated_video(
         self,
         yolov8m: Yolov8,
         cyclist_mp4: Path,
+        rotated_cyclist_mp4: Path,
         test_data_dir: Path,
         test_data_tmp_dir: Path,
     ) -> None:
-        output_filetype = ".mp4"
-        input_file = (
-            test_data_dir / "Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.h264"
-        )
-        rotated_video = test_data_tmp_dir / f"rotate-{input_file.name}"
-        filter_cmds = ["-vf", "transpose=1, transpose=1"]
-        ffmpeg_cmd = _get_ffmpeg_command(
-            input_file,
-            20,
-            0,
-            20,
-            rotated_video,
-            filter_cmds=filter_cmds,
-        )
-
-        subprocess.run(
-            ffmpeg_cmd,
-            check=True,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.STDOUT,
-        )
-
-        convert(
-            input_video_file=rotated_video,
-            output_filetype=output_filetype,
-            rotation=180,
-            fps_from_filename=False,
+        rotated_counts = self._get_detection_counts_for(
+            rotated_cyclist_mp4, yolov8m, CYCLIST_VIDEO_LENGTH
         )
 
-        converted_video = test_data_tmp_dir / f"{rotated_video.stem}{output_filetype}"
-
-        rotated_counts = self._get_detection_counts_for(converted_video, yolov8m)
-
-        normal_counts = self._get_detection_counts_for(cyclist_mp4, yolov8m)
-        deviation = 0.05
-        for key in [CAR, PERSON, BICYCLE]:
-            assert rotated_counts[key] >= normal_counts[key] * (1 - deviation)
-            assert rotated_counts[key] <= normal_counts[key] * (1 + deviation)
+        assert rotated_counts[CAR] >= CAR_LOWER_LIMIT
+        assert rotated_counts[PERSON] >= PERSON_LOWER_LIMIT
+        assert rotated_counts[BICYCLE] >= BICYCLE_LOWER_LIMIT
+        assert rotated_counts[CAR] <= CAR_UPPER_LIMIT
+        assert rotated_counts[PERSON] <= PERSON_UPPER_LIMIT
+        assert rotated_counts[BICYCLE] <= BICYCLE_UPPER_LIMIT
 
     def _get_detection_counts_for(
-        self, converted_video: Path, yolov8m: Yolov8
+        self,
+        converted_video: Path,
+        yolov8m: Yolov8,
+        expected_duration: timedelta = DEFAULT_EXPECTED_DURATION,
     ) -> dict[str, float]:
         yolov8m.confidence = 0.5
         detect(
             paths=[converted_video],
             model=yolov8m,
-            expected_duration=DEFAULT_EXPECTED_DURATION,
+            expected_duration=expected_duration,
         )
         result_otdet = converted_video.parent / converted_video.with_suffix(".otdet")
         otdet_dict = read_bz2_otdet(result_otdet)

diff --git a/tests/detect/yolo_test.py b/tests/detect/yolo_test.py
@@ -1,14 +1,41 @@
 from pathlib import Path
 from unittest.mock import Mock, patch
 
+import numpy
 import pytest
 from cv2 import VideoCapture
+from numpy.testing import assert_array_equal
 from torch import Tensor
 
-from OTVision.detect.yolo import Yolov8
+from OTVision.detect.yolo import DISPLAYMATRIX, Yolov8, rotate
 from OTVision.track.preprocess import Detection
 
 
+@pytest.mark.parametrize(
+    "angle, expected",
+    [
+        (90, [[2, 4], [1, 3]]),
+        (-90, [[3, 1], [4, 2]]),
+        (-180, [[4, 3], [2, 1]]),
+        (180, [[4, 3], [2, 1]]),
+    ],
+)
+def test_rotate(angle: int, expected: list[list[int]]) -> None:
+    actual_array = numpy.array([[1, 2], [3, 4]], int)
+    expected_array = numpy.array(expected, int)
+
+    result = rotate(actual_array, {DISPLAYMATRIX: angle})
+
+    assert_array_equal(result, expected_array)
+
+
+def test_rotate_by_non_90_degree() -> None:
+    actual_array = numpy.array([[1, 2], [3, 4]], int)
+
+    with pytest.raises(ValueError):
+        rotate(actual_array, {DISPLAYMATRIX: 20})
+
+
 @pytest.fixture
 def video_path() -> str:
     return str(