Support grayf32le and gbrapf32le in numpy convertion

PyAV-Org · Jan 15, 2025 · 6bee799 · 6bee799
1 parent d527571
commit 6bee799
Show file tree

Hide file tree

Showing 3 changed files with 84 additions and 16 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -24,6 +24,7 @@ Features
 
 - Add hardware decoding by :gh-user:`matthewlai` and :gh-user:`WyattBlue` in (:pr:`1685`).
 - Add ``VideoFrame.rotation`` by :gh-user:`lgeiger` in (:pr:`1675`).
+- Support grayf32le and gbrapf32le in numpy convertion by :gh-user:`robinechuca` in (:pr:`1712`).
 
 
 v14.0.1

diff --git a/av/video/frame.pyx b/av/video/frame.pyx
@@ -295,6 +295,8 @@ cdef class VideoFrame(Frame):
         .. note:: For ``pal8``, an ``(image, palette)`` tuple will be returned,
         with the palette being in ARGB (PyAV will swap bytes if needed).
 
+        .. note:: For ``gbrp`` formats, channels are flipped to RGB order.
+
         """
         cdef VideoFrame frame = self.reformat(**kwargs)
 
@@ -312,29 +314,36 @@ cdef class VideoFrame(Frame):
             return np.hstack((
                 useful_array(frame.planes[0]),
                 useful_array(frame.planes[1]),
-                useful_array(frame.planes[2])
+                useful_array(frame.planes[2]),
             )).reshape(-1, frame.height, frame.width)
         elif frame.format.name == "yuyv422":
             assert frame.width % 2 == 0
             assert frame.height % 2 == 0
             return useful_array(frame.planes[0], 2).reshape(frame.height, frame.width, -1)
         elif frame.format.name == "gbrp":
             array = np.empty((frame.height, frame.width, 3), dtype="uint8")
-            array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(-1, frame.width)
-            array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(-1, frame.width)
-            array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(-1, frame.width)
+            array[:, :, 0] = useful_array(frame.planes[2], 1).reshape(frame.height, frame.width)
+            array[:, :, 1] = useful_array(frame.planes[0], 1).reshape(frame.height, frame.width)
+            array[:, :, 2] = useful_array(frame.planes[1], 1).reshape(frame.height, frame.width)
             return array
         elif frame.format.name in ("gbrp10be", "gbrp12be", "gbrp14be", "gbrp16be", "gbrp10le", "gbrp12le", "gbrp14le", "gbrp16le"):
             array = np.empty((frame.height, frame.width, 3), dtype="uint16")
-            array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(-1, frame.width)
-            array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(-1, frame.width)
-            array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(-1, frame.width)
+            array[:, :, 0] = useful_array(frame.planes[2], 2, "uint16").reshape(frame.height, frame.width)
+            array[:, :, 1] = useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width)
+            array[:, :, 2] = useful_array(frame.planes[1], 2, "uint16").reshape(frame.height, frame.width)
             return byteswap_array(array, frame.format.name.endswith("be"))
         elif frame.format.name in ("gbrpf32be", "gbrpf32le"):
             array = np.empty((frame.height, frame.width, 3), dtype="float32")
-            array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(-1, frame.width)
-            array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(-1, frame.width)
-            array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(-1, frame.width)
+            array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width)
+            array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width)
+            array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width)
+            return byteswap_array(array, frame.format.name.endswith("be"))
+        elif frame.format.name in ("gbrapf32be", "gbrapf32le"):
+            array = np.empty((frame.height, frame.width, 4), dtype="float32")
+            array[:, :, 0] = useful_array(frame.planes[2], 4, "float32").reshape(frame.height, frame.width)
+            array[:, :, 1] = useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width)
+            array[:, :, 2] = useful_array(frame.planes[1], 4, "float32").reshape(frame.height, frame.width)
+            array[:, :, 3] = useful_array(frame.planes[3], 4, "float32").reshape(frame.height, frame.width)
             return byteswap_array(array, frame.format.name.endswith("be"))
         elif frame.format.name in ("rgb24", "bgr24"):
             return useful_array(frame.planes[0], 3).reshape(frame.height, frame.width, -1)
@@ -345,17 +354,22 @@ cdef class VideoFrame(Frame):
         elif frame.format.name in ("gray16be", "gray16le"):
             return byteswap_array(
                 useful_array(frame.planes[0], 2, "uint16").reshape(frame.height, frame.width),
-                frame.format.name == "gray16be",
+                frame.format.name.endswith("be"),
+            )
+        elif frame.format.name in ("grayf32be", "grayf32le"):
+            return byteswap_array(
+                useful_array(frame.planes[0], 4, "float32").reshape(frame.height, frame.width),
+                frame.format.name.endswith("be"),
             )
         elif frame.format.name in ("rgb48be", "rgb48le"):
             return byteswap_array(
                 useful_array(frame.planes[0], 6, "uint16").reshape(frame.height, frame.width, -1),
-                frame.format.name == "rgb48be",
+                frame.format.name.endswith("be"),
             )
         elif frame.format.name in ("rgba64be", "rgba64le"):
             return byteswap_array(
                 useful_array(frame.planes[0], 8, "uint16").reshape(frame.height, frame.width, -1),
-                frame.format.name == "rgba64be",
+                frame.format.name.endswith("be"),
             )
         elif frame.format.name == "pal8":
             image = useful_array(frame.planes[0]).reshape(frame.height, frame.width)
@@ -491,6 +505,8 @@ cdef class VideoFrame(Frame):
         must be in the system's native byte order.
 
         .. note:: for ``pal8``, an ``(image, palette)`` pair must be passed. `palette` must have shape (256, 4) and is given in ARGB format (PyAV will swap bytes if needed).
+
+        .. note:: for ``gbrp`` formats, channels are assumed to be given in RGB order.
         """
         if format == "pal8":
             array, palette = array
@@ -568,19 +584,34 @@ cdef class VideoFrame(Frame):
         elif format in ("gray16be", "gray16le"):
             check_ndarray(array, "uint16", 2)
             frame = VideoFrame(array.shape[1], array.shape[0], format)
-            copy_array_to_plane(byteswap_array(array, format == "gray16be"), frame.planes[0], 2)
+            copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 2)
+            return frame
+        elif format in ("grayf32be", "grayf32le"):
+            check_ndarray(array, "float32", 2)
+            frame = VideoFrame(array.shape[1], array.shape[0], format)
+            copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 4)
             return frame
         elif format in ("rgb48be", "rgb48le"):
             check_ndarray(array, "uint16", 3)
             check_ndarray_shape(array, array.shape[2] == 3)
             frame = VideoFrame(array.shape[1], array.shape[0], format)
-            copy_array_to_plane(byteswap_array(array, format == "rgb48be"), frame.planes[0], 6)
+            copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 6)
             return frame
         elif format in ("rgba64be", "rgba64le"):
             check_ndarray(array, "uint16", 3)
             check_ndarray_shape(array, array.shape[2] == 4)
             frame = VideoFrame(array.shape[1], array.shape[0], format)
-            copy_array_to_plane(byteswap_array(array, format == "rgba64be"), frame.planes[0], 8)
+            copy_array_to_plane(byteswap_array(array, format.endswith("be")), frame.planes[0], 8)
+            return frame
+        elif format in ("gbrapf32be", "gbrapf32le"):
+            check_ndarray(array, "float32", 3)
+            check_ndarray_shape(array, array.shape[2] == 4)
+
+            frame = VideoFrame(array.shape[1], array.shape[0], format)
+            copy_array_to_plane(byteswap_array(array[:, :, 1], format.endswith("be")), frame.planes[0], 4)
+            copy_array_to_plane(byteswap_array(array[:, :, 2], format.endswith("be")), frame.planes[1], 4)
+            copy_array_to_plane(byteswap_array(array[:, :, 0], format.endswith("be")), frame.planes[2], 4)
+            copy_array_to_plane(byteswap_array(array[:, :, 3], format.endswith("be")), frame.planes[3], 4)
             return frame
         elif format == "nv12":
             check_ndarray(array, "uint8", 2)

diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py
@@ -223,6 +223,24 @@ def test_ndarray_gray_align() -> None:
         assertNdarraysEqual(frame.to_ndarray(), array)
 
 
+def test_ndarray_grayf32() -> None:
+    array = numpy.random.random_sample(size=(480, 640)).astype(numpy.float32)
+    for format in ("grayf32be", "grayf32le"):
+        frame = VideoFrame.from_ndarray(array, format=format)
+        assert frame.width == 640 and frame.height == 480
+        assert frame.format.name == format
+        assertNdarraysEqual(frame.to_ndarray(), array)
+
+
+def test_ndarray_grayf32_align() -> None:
+    array = numpy.random.random_sample(size=(238, 318)).astype(numpy.float32)
+    for format in ("grayf32be", "grayf32le"):
+        frame = VideoFrame.from_ndarray(array, format=format)
+        assert frame.width == 318 and frame.height == 238
+        assert frame.format.name == format
+        assertNdarraysEqual(frame.to_ndarray(), array)
+
+
 def test_ndarray_rgb() -> None:
     array = numpy.random.randint(0, 256, size=(480, 640, 3), dtype=numpy.uint8)
     for format in ("rgb24", "bgr24"):
@@ -365,6 +383,24 @@ def test_ndarray_gbrpf32_align() -> None:
         assertNdarraysEqual(frame.to_ndarray(), array)
 
 
+def test_ndarray_gbrapf32() -> None:
+    array = numpy.random.random_sample(size=(480, 640, 4)).astype(numpy.float32)
+    for format in ("gbrapf32be", "gbrapf32le"):
+        frame = VideoFrame.from_ndarray(array, format=format)
+        assert frame.width == 640 and frame.height == 480
+        assert frame.format.name == format
+        assertNdarraysEqual(frame.to_ndarray(), array)
+
+
+def test_ndarray_gbrapf32_allign() -> None:
+    array = numpy.random.random_sample(size=(238, 318, 4)).astype(numpy.float32)
+    for format in ("gbrapf32be", "gbrapf32le"):
+        frame = VideoFrame.from_ndarray(array, format=format)
+        assert frame.width == 318 and frame.height == 238
+        assert frame.format.name == format
+        assertNdarraysEqual(frame.to_ndarray(), array)
+
+
 def test_ndarray_yuv420p() -> None:
     array = numpy.random.randint(0, 256, size=(720, 640), dtype=numpy.uint8)
     frame = VideoFrame.from_ndarray(array, format="yuv420p")
-Original file line number
+Diff line change
@@ Expand Up / @@ -24,6 +24,7 @@ Features @@
     - Add hardware decoding by :gh-user:`matthewlai` and :gh-user:`WyattBlue` in (:pr:`1685`).
     - Add ``VideoFrame.rotation`` by :gh-user:`lgeiger` in (:pr:`1675`).
+    - Support grayf32le and gbrapf32le in numpy convertion by :gh-user:`robinechuca` in (:pr:`1712`).
     v14.0.1
@@ Expand Down @@