From 172d8a80461e244366c4e8f3996d1a681e2f1279 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Sun, 23 Jul 2023 10:48:09 -0400 Subject: [PATCH] Create a dedicate code path to convert YUV frames to numpy for speed This avoids the use of hstack, which inevitiably copies the data to a new memory locaiton. The speed up is small, but measurable. I can go from 185 fps decoding of a ~3000 x 2000 video to 200 fps decoding. --- av/video/frame.pyx | 22 ++++++++++++++++------ av/video/plane.pxd | 4 ++++ av/video/plane.pyx | 17 +++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/av/video/frame.pyx b/av/video/frame.pyx index 84a74a018..b294531b4 100644 --- a/av/video/frame.pyx +++ b/av/video/frame.pyx @@ -6,7 +6,7 @@ from av.enum cimport define_enum from av.error cimport err_check from av.utils cimport check_ndarray, check_ndarray_shape from av.video.format cimport get_pix_fmt, get_video_format -from av.video.plane cimport VideoPlane +from av.video.plane cimport VideoPlane, YUVPlanes cdef object _cinit_bypass_sentinel @@ -265,11 +265,21 @@ cdef class VideoFrame(Frame): if frame.format.name in ('yuv420p', 'yuvj420p'): assert frame.width % 2 == 0 assert frame.height % 2 == 0 - return np.hstack(( - useful_array(frame.planes[0]), - useful_array(frame.planes[1]), - useful_array(frame.planes[2]) - )).reshape(-1, frame.width) + # Fast path for the case that the entire YUV data is contiguous + if ( + frame.planes[0].line_size == frame.planes[0].width and + frame.planes[1].line_size == frame.planes[1].width and + frame.planes[2].line_size == frame.planes[2].width + ): + yuv_planes = YUVPlanes(frame, 0) + return useful_array(yuv_planes).reshape(frame.height * 3 // 2, frame.width) + else: + # Otherwise, we need to copy the data through the use of np.hstack + return np.hstack(( + useful_array(frame.planes[0]), + useful_array(frame.planes[1]), + useful_array(frame.planes[2]) + )).reshape(-1, frame.width) elif frame.format.name in ('yuv444p', 'yuvj444p'): return np.hstack(( useful_array(frame.planes[0]), diff --git a/av/video/plane.pxd b/av/video/plane.pxd index f9abf22b6..a74eea206 100644 --- a/av/video/plane.pxd +++ b/av/video/plane.pxd @@ -6,3 +6,7 @@ cdef class VideoPlane(Plane): cdef readonly size_t buffer_size cdef readonly unsigned int width, height + + +cdef class YUVPlanes(VideoPlane): + pass diff --git a/av/video/plane.pyx b/av/video/plane.pyx index 6f1286ca3..3d0a523b0 100644 --- a/av/video/plane.pyx +++ b/av/video/plane.pyx @@ -37,3 +37,20 @@ cdef class VideoPlane(Plane): """ def __get__(self): return self.frame.ptr.linesize[self.index] + + +cdef class YUVPlanes(VideoPlane): + def __cinit__(self, VideoFrame frame, int index): + if index != 0: + raise RuntimeError("YUVPlanes only supports index 0") + if frame.format.name not in ['yuvj420p', 'yuv420p']: + raise RuntimeError("YUVPlane only supports yuv420p and yuvj420p") + if frame.ptr.linesize[0] < 0: + raise RuntimeError("YUVPlane only supports positive linesize") + self.width = frame.width + self.height = frame.height * 3 // 2 + self.buffer_size = self.height * abs(self.frame.ptr.linesize[0]) + self.frame = frame + + cdef void* _buffer_ptr(self): + return self.frame.ptr.extended_data[self.index]