Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug/5821-bounding-boxes-are-offset #370

Merged
merged 12 commits into from
Oct 29, 2024
58 changes: 47 additions & 11 deletions OTVision/detect/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
from time import perf_counter
from typing import Generator

import av
import numpy
import torch
from numpy import ndarray
from tqdm import tqdm
from ultralytics import YOLO as YOLOv8
from ultralytics.engine.results import Boxes, Results
Expand All @@ -45,6 +48,8 @@
from OTVision.helpers.log import LOGGER_NAME
from OTVision.track.preprocess import Detection

DISPLAYMATRIX = "DISPLAYMATRIX"

log = logging.getLogger(LOGGER_NAME)


Expand Down Expand Up @@ -74,6 +79,29 @@ def detect(self, video: Path) -> list[list[Detection]]:
pass


def rotate(array: ndarray, side_data: dict) -> ndarray:
"""
Rotate a numpy array using the DISPLAYMATRIX rotation angle defined in side_data.

Args:
array: to rotate
side_data: metadata dictionary to read the angle from

Returns: rotated array

"""
if DISPLAYMATRIX in side_data:
angle = side_data[DISPLAYMATRIX]
if angle % 90 != 0:
raise ValueError(
f"Rotation angle must be multiple of 90 degrees, but is {angle}"
)
rotation = angle / 90
rotated_image = numpy.rot90(array, rotation)
return rotated_image
return array


class Yolov8(ObjectDetection):
"""Wrapper to YOLOv8 object detection model.

Expand Down Expand Up @@ -145,17 +173,25 @@ def _load_model(self) -> YOLOv8:
return model

def _predict(self, video: Path) -> Generator[Results, None, None]:
return self.model.predict(
source=video,
conf=self.confidence,
iou=self.iou,
half=self.half_precision,
imgsz=self.img_size,
device=0 if torch.cuda.is_available() else "cpu",
stream=True,
verbose=False,
agnostic_nms=True,
)
with av.open(str(video.absolute())) as container:
container.streams.video[0].thread_type = "AUTO"
side_data = container.streams.video[0].side_data
for frame in container.decode(video=0):
ndarray = frame.to_ndarray(format="rgb24")
rotated_image = rotate(ndarray, side_data)
results = self.model.predict(
source=rotated_image,
conf=self.confidence,
iou=self.iou,
half=self.half_precision,
imgsz=self.img_size,
device=0 if torch.cuda.is_available() else "cpu",
stream=False,
verbose=False,
agnostic_nms=True,
)
for result in results:
yield result

def _parse_detections(self, detection_result: Boxes) -> list[Detection]:
bboxes = detection_result.xywhn if self.normalized else detection_result.xywh
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
av==13.0.0
geopandas==1.0.1
ijson==3.3.0
moviepy==1.0.3
Expand Down
Binary file not shown.
90 changes: 41 additions & 49 deletions tests/detect/detect_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import platform
import shutil
import subprocess
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path
Expand All @@ -14,7 +13,6 @@

import OTVision.config as config
from OTVision.config import DEFAULT_EXPECTED_DURATION
from OTVision.convert.convert import _get_ffmpeg_command, convert
from OTVision.dataformat import (
CLASS,
CONFIDENCE,
Expand All @@ -37,6 +35,15 @@
from OTVision.detect.yolo import Yolov8, loadmodel
from tests.conftest import YieldFixture

CYCLIST_VIDEO_LENGTH = timedelta(seconds=3)
DEVIATION = 0.22
BICYCLE_UPPER_LIMIT = int(60 * (1 + DEVIATION))
PERSON_UPPER_LIMIT = int(120 * (1 + DEVIATION))
CAR_UPPER_LIMIT = int(120 * (1 + DEVIATION))
BICYCLE_LOWER_LIMIT = int(60 * (1 - DEVIATION))
PERSON_LOWER_LIMIT = int(120 * (1 - DEVIATION))
CAR_LOWER_LIMIT = int(120 * (1 - DEVIATION))

CAR = "car"
TRUCK = "truck"
PERSON = "person"
Expand Down Expand Up @@ -177,6 +184,15 @@ def cyclist_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path:
return dest


@pytest.fixture(scope="module")
def rotated_cyclist_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path:
file_name = "rotated-Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.mp4"
src = detect_test_data_dir / file_name
dest = detect_test_tmp_dir / file_name
shutil.copy2(src, dest)
return dest


@pytest.fixture(scope="module")
def truck_mp4(detect_test_data_dir: Path, detect_test_tmp_dir: Path) -> Path:
file_name = "Testvideo_Cars-Truck_FR20_2020-01-01_00-00-00.mp4"
Expand Down Expand Up @@ -390,71 +406,47 @@ def test_detect_overwrite(
def test_detect_fulfill_minimum_detection_requirements(
self, yolov8m: Yolov8, cyclist_mp4: Path
) -> None:
deviation = 0.2

class_counts = self._get_detection_counts_for(cyclist_mp4, yolov8m)
class_counts = self._get_detection_counts_for(
cyclist_mp4, yolov8m, CYCLIST_VIDEO_LENGTH
)

assert class_counts[CAR] >= 120 * (1 - deviation)
assert class_counts[PERSON] >= 120 * (1 - deviation)
assert class_counts[BICYCLE] >= 60 * (1 - deviation)
assert class_counts[CAR] <= 120 * (1 + deviation)
assert class_counts[PERSON] <= 120 * (1 + deviation)
assert class_counts[BICYCLE] <= 60 * (1 + deviation)
assert class_counts[CAR] >= CAR_LOWER_LIMIT
assert class_counts[PERSON] >= PERSON_LOWER_LIMIT
assert class_counts[BICYCLE] >= BICYCLE_LOWER_LIMIT
assert class_counts[CAR] <= CAR_UPPER_LIMIT
assert class_counts[PERSON] <= PERSON_UPPER_LIMIT
assert class_counts[BICYCLE] <= BICYCLE_UPPER_LIMIT

def test_detection_in_rotated_video(
self,
yolov8m: Yolov8,
cyclist_mp4: Path,
rotated_cyclist_mp4: Path,
test_data_dir: Path,
test_data_tmp_dir: Path,
) -> None:
output_filetype = ".mp4"
input_file = (
test_data_dir / "Testvideo_Cars-Cyclist_FR20_2020-01-01_00-00-00.h264"
)
rotated_video = test_data_tmp_dir / f"rotate-{input_file.name}"
filter_cmds = ["-vf", "transpose=1, transpose=1"]
ffmpeg_cmd = _get_ffmpeg_command(
input_file,
20,
0,
20,
rotated_video,
filter_cmds=filter_cmds,
)

subprocess.run(
ffmpeg_cmd,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT,
)

convert(
input_video_file=rotated_video,
output_filetype=output_filetype,
rotation=180,
fps_from_filename=False,
rotated_counts = self._get_detection_counts_for(
rotated_cyclist_mp4, yolov8m, CYCLIST_VIDEO_LENGTH
)

converted_video = test_data_tmp_dir / f"{rotated_video.stem}{output_filetype}"

rotated_counts = self._get_detection_counts_for(converted_video, yolov8m)

normal_counts = self._get_detection_counts_for(cyclist_mp4, yolov8m)
deviation = 0.05
for key in [CAR, PERSON, BICYCLE]:
assert rotated_counts[key] >= normal_counts[key] * (1 - deviation)
assert rotated_counts[key] <= normal_counts[key] * (1 + deviation)
assert rotated_counts[CAR] >= CAR_LOWER_LIMIT
assert rotated_counts[PERSON] >= PERSON_LOWER_LIMIT
assert rotated_counts[BICYCLE] >= BICYCLE_LOWER_LIMIT
assert rotated_counts[CAR] <= CAR_UPPER_LIMIT
assert rotated_counts[PERSON] <= PERSON_UPPER_LIMIT
assert rotated_counts[BICYCLE] <= BICYCLE_UPPER_LIMIT

def _get_detection_counts_for(
self, converted_video: Path, yolov8m: Yolov8
self,
converted_video: Path,
yolov8m: Yolov8,
expected_duration: timedelta = DEFAULT_EXPECTED_DURATION,
) -> dict[str, float]:
yolov8m.confidence = 0.5
detect(
paths=[converted_video],
model=yolov8m,
expected_duration=DEFAULT_EXPECTED_DURATION,
expected_duration=expected_duration,
)
result_otdet = converted_video.parent / converted_video.with_suffix(".otdet")
otdet_dict = read_bz2_otdet(result_otdet)
Expand Down
29 changes: 28 additions & 1 deletion tests/detect/yolo_test.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,41 @@
from pathlib import Path
from unittest.mock import Mock, patch

import numpy
import pytest
from cv2 import VideoCapture
from numpy.testing import assert_array_equal
from torch import Tensor

from OTVision.detect.yolo import Yolov8
from OTVision.detect.yolo import DISPLAYMATRIX, Yolov8, rotate
from OTVision.track.preprocess import Detection


@pytest.mark.parametrize(
"angle, expected",
[
(90, [[2, 4], [1, 3]]),
(-90, [[3, 1], [4, 2]]),
(-180, [[4, 3], [2, 1]]),
(180, [[4, 3], [2, 1]]),
],
)
def test_rotate(angle: int, expected: list[list[int]]) -> None:
actual_array = numpy.array([[1, 2], [3, 4]], int)
expected_array = numpy.array(expected, int)

result = rotate(actual_array, {DISPLAYMATRIX: angle})

assert_array_equal(result, expected_array)


def test_rotate_by_non_90_degree() -> None:
actual_array = numpy.array([[1, 2], [3, 4]], int)

with pytest.raises(ValueError):
rotate(actual_array, {DISPLAYMATRIX: 20})


@pytest.fixture
def video_path() -> str:
return str(
Expand Down
Loading