From 6bee7992d2919f2d6a43e5036c09f4b6cb00383e Mon Sep 17 00:00:00 2001 From: robinechuca Date: Wed, 15 Jan 2025 18:13:43 +0100 Subject: [PATCH] Support grayf32le and gbrapf32le in numpy convertion --- CHANGELOG.rst | 1 + av/video/frame.pyx | 63 ++++++++++++++++++++++++++++++---------- tests/test_videoframe.py | 36 +++++++++++++++++++++++ 3 files changed, 84 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index eed7b2528..7baadb4a8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -24,6 +24,7 @@ Features - Add hardware decoding by :gh-user:`matthewlai` and :gh-user:`WyattBlue` in (:pr:`1685`). - Add ``VideoFrame.rotation`` by :gh-user:`lgeiger` in (:pr:`1675`). +- Support grayf32le and gbrapf32le in numpy convertion by :gh-user:`robinechuca` in (:pr:`1712`). v14.0.1 diff --git a/av/video/frame.pyx b/av/video/frame.pyx index 02cde3187..6e4a1dbdf 100644 --- a/av/video/frame.pyx +++ b/av/video/frame.pyx @@ -295,6 +295,8 @@ cdef class VideoFrame(Frame): .. note:: For ``pal8``, an ``(image, palette)`` tuple will be returned, with the palette being in ARGB (PyAV will swap bytes if needed). + .. note:: For ``gbrp`` formats, channels are flipped to RGB order. + """ cdef VideoFrame frame = self.reformat(**kwargs) @@ -312,7 +314,7 @@ cdef class VideoFrame(Frame): return np.hstack(( useful_array(frame.planes[0]), useful_array(frame.planes[1]), - useful_array(frame.planes[2]) + useful_array(frame.planes[2]), )).reshape(-1, frame.height, frame.width) elif frame.format.name == "yuyv422": assert frame.width % 2 == 0 @@ -320,21 +322,28 @@ cdef class VideoFrame(Frame): return useful_array(frame.planes[0], 2).reshape(frame.height, frame.width, -1) elif frame.format.name == "gbrp": array = np.empty((frame.height, frame.width, 3), dtype="uint8") - array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(-1, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(-1, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(-1, frame.width) + array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(frame.height, frame.width) + array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(frame.height, frame.width) + array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(frame.height, frame.width) return array elif frame.format.name in ("gbrp10be", "gbrp12be", "gbrp14be", "gbrp16be", "gbrp10le", "gbrp12le", "gbrp14le", "gbrp16le"): array = np.empty((frame.height, frame.width, 3), dtype="uint16") - array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(-1, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(-1, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(-1, frame.width) + array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(frame.height, frame.width) + array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width) + array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(frame.height, frame.width) return byteswap_array(array, frame.format.name.endswith("be")) elif frame.format.name in ("gbrpf32be", "gbrpf32le"): array = np.empty((frame.height, frame.width, 3), dtype="float32") - array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(-1, frame.width) - array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(-1, frame.width) - array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(-1, frame.width) + array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width) + return byteswap_array(array, frame.format.name.endswith("be")) + elif frame.format.name in ("gbrapf32be", "gbrapf32le"): + array = np.empty((frame.height, frame.width, 4), dtype="float32") + array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width) + array[:, :, 3] = useful_array(frame.planes[3], 4, "float32").reshape(frame.height, frame.width) return byteswap_array(array, frame.format.name.endswith("be")) elif frame.format.name in ("rgb24", "bgr24"): return useful_array(frame.planes[0], 3).reshape(frame.height, frame.width, -1) @@ -345,17 +354,22 @@ cdef class VideoFrame(Frame): elif frame.format.name in ("gray16be", "gray16le"): return byteswap_array( useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width), - frame.format.name == "gray16be", + frame.format.name.endswith("be"), + ) + elif frame.format.name in ("grayf32be", "grayf32le"): + return byteswap_array( + useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width), + frame.format.name.endswith("be"), ) elif frame.format.name in ("rgb48be", "rgb48le"): return byteswap_array( useful_array(frame.planes[0], 6, "uint16").reshape(frame.height, frame.width, -1), - frame.format.name == "rgb48be", + frame.format.name.endswith("be"), ) elif frame.format.name in ("rgba64be", "rgba64le"): return byteswap_array( useful_array(frame.planes[0], 8, "uint16").reshape(frame.height, frame.width, -1), - frame.format.name == "rgba64be", + frame.format.name.endswith("be"), ) elif frame.format.name == "pal8": image = useful_array(frame.planes[0]).reshape(frame.height, frame.width) @@ -491,6 +505,8 @@ cdef class VideoFrame(Frame): must be in the system's native byte order. .. note:: for ``pal8``, an ``(image, palette)`` pair must be passed. `palette` must have shape (256, 4) and is given in ARGB format (PyAV will swap bytes if needed). + + .. note:: for ``gbrp`` formats, channels are assumed to be given in RGB order. """ if format == "pal8": array, palette = array @@ -568,19 +584,34 @@ cdef class VideoFrame(Frame): elif format in ("gray16be", "gray16le"): check_ndarray(array, "uint16", 2) frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array, format == "gray16be"), frame.planes[0], 2) + copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 2) + return frame + elif format in ("grayf32be", "grayf32le"): + check_ndarray(array, "float32", 2) + frame = VideoFrame(array.shape[1], array.shape[0], format) + copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 4) return frame elif format in ("rgb48be", "rgb48le"): check_ndarray(array, "uint16", 3) check_ndarray_shape(array, array.shape[2] == 3) frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array, format == "rgb48be"), frame.planes[0], 6) + copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 6) return frame elif format in ("rgba64be", "rgba64le"): check_ndarray(array, "uint16", 3) check_ndarray_shape(array, array.shape[2] == 4) frame = VideoFrame(array.shape[1], array.shape[0], format) - copy_array_to_plane(byteswap_array(array, format == "rgba64be"), frame.planes[0], 8) + copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 8) + return frame + elif format in ("gbrapf32be", "gbrapf32le"): + check_ndarray(array, "float32", 3) + check_ndarray_shape(array, array.shape[2] == 4) + + frame = VideoFrame(array.shape[1], array.shape[0], format) + copy_array_to_plane(byteswap_array(array[:, :, 1], format.endswith("be")), frame.planes[0], 4) + copy_array_to_plane(byteswap_array(array[:, :, 2], format.endswith("be")), frame.planes[1], 4) + copy_array_to_plane(byteswap_array(array[:, :, 0], format.endswith("be")), frame.planes[2], 4) + copy_array_to_plane(byteswap_array(array[:, :, 3], format.endswith("be")), frame.planes[3], 4) return frame elif format == "nv12": check_ndarray(array, "uint8", 2) diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py index f044be949..250641676 100644 --- a/tests/test_videoframe.py +++ b/tests/test_videoframe.py @@ -223,6 +223,24 @@ def test_ndarray_gray_align() -> None: assertNdarraysEqual(frame.to_ndarray(), array) +def test_ndarray_grayf32() -> None: + array = numpy.random.random_sample(size=(480, 640)).astype(numpy.float32) + for format in ("grayf32be", "grayf32le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + +def test_ndarray_grayf32_align() -> None: + array = numpy.random.random_sample(size=(238, 318)).astype(numpy.float32) + for format in ("grayf32be", "grayf32le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 318 and frame.height == 238 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + def test_ndarray_rgb() -> None: array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8) for format in ("rgb24", "bgr24"): @@ -365,6 +383,24 @@ def test_ndarray_gbrpf32_align() -> None: assertNdarraysEqual(frame.to_ndarray(), array) +def test_ndarray_gbrapf32() -> None: + array = numpy.random.random_sample(size=(480, 640, 4)).astype(numpy.float32) + for format in ("gbrapf32be", "gbrapf32le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + +def test_ndarray_gbrapf32_allign() -> None: + array = numpy.random.random_sample(size=(238, 318, 4)).astype(numpy.float32) + for format in ("gbrapf32be", "gbrapf32le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert frame.width == 318 and frame.height == 238 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + def test_ndarray_yuv420p() -> None: array = numpy.random.randint(0, 256, size=(720, 640), dtype=numpy.uint8) frame = VideoFrame.from_ndarray(array, format="yuv420p")