diff --git a/OTVision/detect/yolo.py b/OTVision/detect/yolo.py index 3744187c..9ace4f2d 100644 --- a/OTVision/detect/yolo.py +++ b/OTVision/detect/yolo.py @@ -25,7 +25,10 @@ from time import perf_counter from typing import Generator +import av +import numpy import torch +from numpy import ndarray from tqdm import tqdm from ultralytics import YOLO as YOLOv8 from ultralytics.engine.results import Boxes, Results @@ -45,6 +48,8 @@ from OTVision.helpers.log import LOGGER_NAME from OTVision.track.preprocess import Detection +DISPLAYMATRIX = "DISPLAYMATRIX" + log = logging.getLogger(LOGGER_NAME) @@ -74,6 +79,29 @@ def detect(self, video: Path) -> list[list[Detection]]: pass +def rotate(array: ndarray, side_data: dict) -> ndarray: + """ + Rotate a numpy array using the DISPLAYMATRIX rotation angle defined in side_data. + + Args: + array: to rotate + side_data: metadata dictionary to read the angle from + + Returns: rotated array + + """ + if DISPLAYMATRIX in side_data: + angle = side_data[DISPLAYMATRIX] + if angle % 90 != 0: + raise ValueError( + f"Rotation angle must be multiple of 90 degrees, but is {angle}" + ) + rotation = angle / 90 + rotated_image = numpy.rot90(array, rotation) + return rotated_image + return array + + class Yolov8(ObjectDetection): """Wrapper to YOLOv8 object detection model. @@ -145,17 +173,25 @@ def _load_model(self) -> YOLOv8: return model def _predict(self, video: Path) -> Generator[Results, None, None]: - return self.model.predict( - source=video, - conf=self.confidence, - iou=self.iou, - half=self.half_precision, - imgsz=self.img_size, - device=0 if torch.cuda.is_available() else "cpu", - stream=True, - verbose=False, - agnostic_nms=True, - ) + with av.open(str(video.absolute())) as container: + container.streams.video[0].thread_type = "AUTO" + side_data = container.streams.video[0].side_data + for frame in container.decode(video=0): + ndarray = frame.to_ndarray(format="rgb24") + rotated_image = rotate(ndarray, side_data) + results = self.model.predict( + source=rotated_image, + conf=self.confidence, + iou=self.iou, + half=self.half_precision, + imgsz=self.img_size, + device=0 if torch.cuda.is_available() else "cpu", + stream=False, + verbose=False, + agnostic_nms=True, + ) + for result in results: + yield result def _parse_detections(self, detection_result: Boxes) -> list[Detection]: bboxes = detection_result.xywhn if self.normalized else detection_result.xywh diff --git a/requirements.txt b/requirements.txt index 21d6360d..ed2f5c8a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +av==13.0.0 geopandas==1.0.1 ijson==3.3.0 moviepy==1.0.3 diff --git a/tests/data/detect/rotated-Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.mp4 b/tests/data/detect/rotated-Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.mp4 new file mode 100644 index 00000000..3a6d2a7b Binary files /dev/null and b/tests/data/detect/rotated-Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.mp4 differ diff --git a/tests/detect/detect_test.py b/tests/detect/detect_test.py index ae848aa3..9dcc3fa3 100644 --- a/tests/detect/detect_test.py +++ b/tests/detect/detect_test.py @@ -4,7 +4,6 @@ import os import platform import shutil -import subprocess from dataclasses import dataclass from datetime import datetime, timedelta, timezone from pathlib import Path @@ -14,7 +13,6 @@ import OTVision.config as config from OTVision.config import DEFAULT_EXPECTED_DURATION -from OTVision.convert.convert import _get_ffmpeg_command, convert from OTVision.dataformat import ( CLASS, CONFIDENCE, @@ -37,6 +35,15 @@ from OTVision.detect.yolo import Yolov8, loadmodel from tests.conftest import YieldFixture +CYCLIST_VIDEO_LENGTH = timedelta(seconds=3) +DEVIATION = 0.22 +BICYCLE_UPPER_LIMIT = int(60 * (1 + DEVIATION)) +PERSON_UPPER_LIMIT = int(120 * (1 + DEVIATION)) +CAR_UPPER_LIMIT = int(120 * (1 + DEVIATION)) +BICYCLE_LOWER_LIMIT = int(60 * (1 - DEVIATION)) +PERSON_LOWER_LIMIT = int(120 * (1 - DEVIATION)) +CAR_LOWER_LIMIT = int(120 * (1 - DEVIATION)) + CAR = "car" TRUCK = "truck" PERSON = "person" @@ -177,6 +184,15 @@ def cyclist_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path: return dest +@pytest.fixture(scope="module") +def rotated_cyclist_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path: + file_name = "rotated-Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.mp4" + src = detect_test_data_dir / file_name + dest = detect_test_tmp_dir / file_name + shutil.copy2(src, dest) + return dest + + @pytest.fixture(scope="module") def truck_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path: file_name = "Testvideo_Cars-Truck_FR20_2020-01-01_00-00-00.mp4" @@ -390,71 +406,47 @@ def test_detect_overwrite( def test_detect_fulfill_minimum_detection_requirements( self, yolov8m: Yolov8, cyclist_mp4: Path ) -> None: - deviation = 0.2 - - class_counts = self._get_detection_counts_for(cyclist_mp4, yolov8m) + class_counts = self._get_detection_counts_for( + cyclist_mp4, yolov8m, CYCLIST_VIDEO_LENGTH + ) - assert class_counts[CAR] >= 120 * (1 - deviation) - assert class_counts[PERSON] >= 120 * (1 - deviation) - assert class_counts[BICYCLE] >= 60 * (1 - deviation) - assert class_counts[CAR] <= 120 * (1 + deviation) - assert class_counts[PERSON] <= 120 * (1 + deviation) - assert class_counts[BICYCLE] <= 60 * (1 + deviation) + assert class_counts[CAR] >= CAR_LOWER_LIMIT + assert class_counts[PERSON] >= PERSON_LOWER_LIMIT + assert class_counts[BICYCLE] >= BICYCLE_LOWER_LIMIT + assert class_counts[CAR] <= CAR_UPPER_LIMIT + assert class_counts[PERSON] <= PERSON_UPPER_LIMIT + assert class_counts[BICYCLE] <= BICYCLE_UPPER_LIMIT def test_detection_in_rotated_video( self, yolov8m: Yolov8, cyclist_mp4: Path, + rotated_cyclist_mp4: Path, test_data_dir: Path, test_data_tmp_dir: Path, ) -> None: - output_filetype = ".mp4" - input_file = ( - test_data_dir / "Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.h264" - ) - rotated_video = test_data_tmp_dir / f"rotate-{input_file.name}" - filter_cmds = ["-vf", "transpose=1, transpose=1"] - ffmpeg_cmd = _get_ffmpeg_command( - input_file, - 20, - 0, - 20, - rotated_video, - filter_cmds=filter_cmds, - ) - - subprocess.run( - ffmpeg_cmd, - check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.STDOUT, - ) - - convert( - input_video_file=rotated_video, - output_filetype=output_filetype, - rotation=180, - fps_from_filename=False, + rotated_counts = self._get_detection_counts_for( + rotated_cyclist_mp4, yolov8m, CYCLIST_VIDEO_LENGTH ) - converted_video = test_data_tmp_dir / f"{rotated_video.stem}{output_filetype}" - - rotated_counts = self._get_detection_counts_for(converted_video, yolov8m) - - normal_counts = self._get_detection_counts_for(cyclist_mp4, yolov8m) - deviation = 0.05 - for key in [CAR, PERSON, BICYCLE]: - assert rotated_counts[key] >= normal_counts[key] * (1 - deviation) - assert rotated_counts[key] <= normal_counts[key] * (1 + deviation) + assert rotated_counts[CAR] >= CAR_LOWER_LIMIT + assert rotated_counts[PERSON] >= PERSON_LOWER_LIMIT + assert rotated_counts[BICYCLE] >= BICYCLE_LOWER_LIMIT + assert rotated_counts[CAR] <= CAR_UPPER_LIMIT + assert rotated_counts[PERSON] <= PERSON_UPPER_LIMIT + assert rotated_counts[BICYCLE] <= BICYCLE_UPPER_LIMIT def _get_detection_counts_for( - self, converted_video: Path, yolov8m: Yolov8 + self, + converted_video: Path, + yolov8m: Yolov8, + expected_duration: timedelta = DEFAULT_EXPECTED_DURATION, ) -> dict[str, float]: yolov8m.confidence = 0.5 detect( paths=[converted_video], model=yolov8m, - expected_duration=DEFAULT_EXPECTED_DURATION, + expected_duration=expected_duration, ) result_otdet = converted_video.parent / converted_video.with_suffix(".otdet") otdet_dict = read_bz2_otdet(result_otdet) diff --git a/tests/detect/yolo_test.py b/tests/detect/yolo_test.py index 887cb49a..222880b6 100644 --- a/tests/detect/yolo_test.py +++ b/tests/detect/yolo_test.py @@ -1,14 +1,41 @@ from pathlib import Path from unittest.mock import Mock, patch +import numpy import pytest from cv2 import VideoCapture +from numpy.testing import assert_array_equal from torch import Tensor -from OTVision.detect.yolo import Yolov8 +from OTVision.detect.yolo import DISPLAYMATRIX, Yolov8, rotate from OTVision.track.preprocess import Detection +@pytest.mark.parametrize( + "angle, expected", + [ + (90, [[2, 4], [1, 3]]), + (-90, [[3, 1], [4, 2]]), + (-180, [[4, 3], [2, 1]]), + (180, [[4, 3], [2, 1]]), + ], +) +def test_rotate(angle: int, expected: list[list[int]]) -> None: + actual_array = numpy.array([[1, 2], [3, 4]], int) + expected_array = numpy.array(expected, int) + + result = rotate(actual_array, {DISPLAYMATRIX: angle}) + + assert_array_equal(result, expected_array) + + +def test_rotate_by_non_90_degree() -> None: + actual_array = numpy.array([[1, 2], [3, 4]], int) + + with pytest.raises(ValueError): + rotate(actual_array, {DISPLAYMATRIX: 20}) + + @pytest.fixture def video_path() -> str: return str(