Skip to content

Commit

Permalink
Support grayf32le and gbrapf32le in numpy convertion
Browse files Browse the repository at this point in the history
  • Loading branch information
robinechuca authored Jan 15, 2025
1 parent d527571 commit 6bee799
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Features

- Add hardware decoding by :gh-user:`matthewlai` and :gh-user:`WyattBlue` in (:pr:`1685`).
- Add ``VideoFrame.rotation`` by :gh-user:`lgeiger` in (:pr:`1675`).
- Support grayf32le and gbrapf32le in numpy convertion by :gh-user:`robinechuca` in (:pr:`1712`).


v14.0.1
Expand Down
63 changes: 47 additions & 16 deletions av/video/frame.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ cdef class VideoFrame(Frame):
.. note:: For ``pal8``, an ``(image, palette)`` tuple will be returned,
with the palette being in ARGB (PyAV will swap bytes if needed).
.. note:: For ``gbrp`` formats, channels are flipped to RGB order.
"""
cdef VideoFrame frame = self.reformat(**kwargs)

Expand All @@ -312,29 +314,36 @@ cdef class VideoFrame(Frame):
return np.hstack((
useful_array(frame.planes[0]),
useful_array(frame.planes[1]),
useful_array(frame.planes[2])
useful_array(frame.planes[2]),
)).reshape(-1, frame.height, frame.width)
elif frame.format.name == "yuyv422":
assert frame.width % 2 == 0
assert frame.height % 2 == 0
return useful_array(frame.planes[0], 2).reshape(frame.height, frame.width, -1)
elif frame.format.name == "gbrp":
array = np.empty((frame.height, frame.width, 3), dtype="uint8")
array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(-1, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(-1, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(-1, frame.width)
array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(frame.height, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(frame.height, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(frame.height, frame.width)
return array
elif frame.format.name in ("gbrp10be", "gbrp12be", "gbrp14be", "gbrp16be", "gbrp10le", "gbrp12le", "gbrp14le", "gbrp16le"):
array = np.empty((frame.height, frame.width, 3), dtype="uint16")
array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(-1, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(-1, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(-1, frame.width)
array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(frame.height, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(frame.height, frame.width)
return byteswap_array(array, frame.format.name.endswith("be"))
elif frame.format.name in ("gbrpf32be", "gbrpf32le"):
array = np.empty((frame.height, frame.width, 3), dtype="float32")
array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(-1, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(-1, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(-1, frame.width)
array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width)
return byteswap_array(array, frame.format.name.endswith("be"))
elif frame.format.name in ("gbrapf32be", "gbrapf32le"):
array = np.empty((frame.height, frame.width, 4), dtype="float32")
array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width)
array[:, :, 3] = useful_array(frame.planes[3], 4, "float32").reshape(frame.height, frame.width)
return byteswap_array(array, frame.format.name.endswith("be"))
elif frame.format.name in ("rgb24", "bgr24"):
return useful_array(frame.planes[0], 3).reshape(frame.height, frame.width, -1)
Expand All @@ -345,17 +354,22 @@ cdef class VideoFrame(Frame):
elif frame.format.name in ("gray16be", "gray16le"):
return byteswap_array(
useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width),
frame.format.name == "gray16be",
frame.format.name.endswith("be"),
)
elif frame.format.name in ("grayf32be", "grayf32le"):
return byteswap_array(
useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width),
frame.format.name.endswith("be"),
)
elif frame.format.name in ("rgb48be", "rgb48le"):
return byteswap_array(
useful_array(frame.planes[0], 6, "uint16").reshape(frame.height, frame.width, -1),
frame.format.name == "rgb48be",
frame.format.name.endswith("be"),
)
elif frame.format.name in ("rgba64be", "rgba64le"):
return byteswap_array(
useful_array(frame.planes[0], 8, "uint16").reshape(frame.height, frame.width, -1),
frame.format.name == "rgba64be",
frame.format.name.endswith("be"),
)
elif frame.format.name == "pal8":
image = useful_array(frame.planes[0]).reshape(frame.height, frame.width)
Expand Down Expand Up @@ -491,6 +505,8 @@ cdef class VideoFrame(Frame):
must be in the system's native byte order.
.. note:: for ``pal8``, an ``(image, palette)`` pair must be passed. `palette` must have shape (256, 4) and is given in ARGB format (PyAV will swap bytes if needed).
.. note:: for ``gbrp`` formats, channels are assumed to be given in RGB order.
"""
if format == "pal8":
array, palette = array
Expand Down Expand Up @@ -568,19 +584,34 @@ cdef class VideoFrame(Frame):
elif format in ("gray16be", "gray16le"):
check_ndarray(array, "uint16", 2)
frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array, format == "gray16be"), frame.planes[0], 2)
copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 2)
return frame
elif format in ("grayf32be", "grayf32le"):
check_ndarray(array, "float32", 2)
frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 4)
return frame
elif format in ("rgb48be", "rgb48le"):
check_ndarray(array, "uint16", 3)
check_ndarray_shape(array, array.shape[2] == 3)
frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array, format == "rgb48be"), frame.planes[0], 6)
copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 6)
return frame
elif format in ("rgba64be", "rgba64le"):
check_ndarray(array, "uint16", 3)
check_ndarray_shape(array, array.shape[2] == 4)
frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array, format == "rgba64be"), frame.planes[0], 8)
copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 8)
return frame
elif format in ("gbrapf32be", "gbrapf32le"):
check_ndarray(array, "float32", 3)
check_ndarray_shape(array, array.shape[2] == 4)

frame = VideoFrame(array.shape[1], array.shape[0], format)
copy_array_to_plane(byteswap_array(array[:, :, 1], format.endswith("be")), frame.planes[0], 4)
copy_array_to_plane(byteswap_array(array[:, :, 2], format.endswith("be")), frame.planes[1], 4)
copy_array_to_plane(byteswap_array(array[:, :, 0], format.endswith("be")), frame.planes[2], 4)
copy_array_to_plane(byteswap_array(array[:, :, 3], format.endswith("be")), frame.planes[3], 4)
return frame
elif format == "nv12":
check_ndarray(array, "uint8", 2)
Expand Down
36 changes: 36 additions & 0 deletions tests/test_videoframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,24 @@ def test_ndarray_gray_align() -> None:
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_grayf32() -> None:
array = numpy.random.random_sample(size=(480, 640)).astype(numpy.float32)
for format in ("grayf32be", "grayf32le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert frame.width == 640 and frame.height == 480
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_grayf32_align() -> None:
array = numpy.random.random_sample(size=(238, 318)).astype(numpy.float32)
for format in ("grayf32be", "grayf32le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert frame.width == 318 and frame.height == 238
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_rgb() -> None:
array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8)
for format in ("rgb24", "bgr24"):
Expand Down Expand Up @@ -365,6 +383,24 @@ def test_ndarray_gbrpf32_align() -> None:
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_gbrapf32() -> None:
array = numpy.random.random_sample(size=(480, 640, 4)).astype(numpy.float32)
for format in ("gbrapf32be", "gbrapf32le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert frame.width == 640 and frame.height == 480
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_gbrapf32_allign() -> None:
array = numpy.random.random_sample(size=(238, 318, 4)).astype(numpy.float32)
for format in ("gbrapf32be", "gbrapf32le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert frame.width == 318 and frame.height == 238
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_yuv420p() -> None:
array = numpy.random.randint(0, 256, size=(720, 640), dtype=numpy.uint8)
frame = VideoFrame.from_ndarray(array, format="yuv420p")
Expand Down

0 comments on commit 6bee799

Please sign in to comment.