From bdacaac87902eeb4401e0fe5ed4f822e435071d4 Mon Sep 17 00:00:00 2001 From: Ulrik Mikaelsson Date: Sat, 25 Nov 2023 10:26:10 +0100 Subject: [PATCH] Add encode_lazy method to CodecContext Some codecs (VP9) can both buffer _many_ frames, and take a long time encoding each frame. Accumulated, the last `encode(None)`-flush can end taking a long time >30s, without detectable progress. FFmpeg and many encoders themselves output one frame at a time, but PyAV currently buffer them all up into lists returned. This change adds a `encode_lazy` yielding frames as they are made ready. The change was benchmarked to also yield a net performance improvement. For both `encode()` and `encode_lazy` encoding really small (24x18) frames using the `mpeg4` encoder seem to take ~11% less time. --- av/codec/context.pxd | 2 +- av/codec/context.pyx | 28 ++++++++++++++++------------ tests/test_encode.py | 4 ++-- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/av/codec/context.pxd b/av/codec/context.pxd index 6cc8bd899..4a59b106e 100644 --- a/av/codec/context.pxd +++ b/av/codec/context.pxd @@ -40,6 +40,7 @@ cdef class CodecContext: # Used by both transcode APIs to setup user-land objects. # TODO: Remove the `Packet` from `_setup_decoded_frame` (because flushing # packets are bogus). It should take all info it needs from the context and/or stream. + cdef _prepare_and_time_rebase_frames_for_encode(self, Frame frame) cdef _prepare_frames_for_encode(self, Frame frame) cdef _setup_encoded_packet(self, Packet) cdef _setup_decoded_frame(self, Frame, Packet) @@ -50,7 +51,6 @@ cdef class CodecContext: # resampling audio to a higher rate but with fixed size frames), and the # send/recv buffer may be limited to a single frame. Ergo, we need to flush # the buffer as often as possible. - cdef _send_frame_and_recv(self, Frame frame) cdef _recv_packet(self) cdef _send_packet_and_recv(self, Packet packet) cdef _recv_frame(self) diff --git a/av/codec/context.pyx b/av/codec/context.pyx index bc8b35d57..112ab32e7 100644 --- a/av/codec/context.pyx +++ b/av/codec/context.pyx @@ -388,7 +388,7 @@ cdef class CodecContext: return packets - cdef _send_frame_and_recv(self, Frame frame): + def _send_frame_and_recv(self, Frame frame): cdef Packet packet @@ -397,14 +397,10 @@ cdef class CodecContext: res = lib.avcodec_send_frame(self.ptr, frame.ptr if frame is not None else NULL) err_check(res) - out = [] - while True: + packet = self._recv_packet() + while packet: + yield packet packet = self._recv_packet() - if packet: - out.append(packet) - else: - break - return out cdef _send_packet_and_recv(self, Packet packet): @@ -462,9 +458,7 @@ cdef class CodecContext: if not res: return packet - cpdef encode(self, Frame frame=None): - """Encode a list of :class:`.Packet` from the given :class:`.Frame`.""" - + cdef _prepare_and_time_rebase_frames_for_encode(self, Frame frame): if self.ptr.codec_type not in [lib.AVMEDIA_TYPE_VIDEO, lib.AVMEDIA_TYPE_AUDIO]: raise NotImplementedError('Encoding is only supported for audio and video.') @@ -478,13 +472,23 @@ cdef class CodecContext: if frame is not None: frame._rebase_time(self.ptr.time_base) + return frames + + cpdef encode(self, Frame frame=None): + """Encode a list of :class:`.Packet` from the given :class:`.Frame`.""" res = [] - for frame in frames: + for frame in self._prepare_and_time_rebase_frames_for_encode(frame): for packet in self._send_frame_and_recv(frame): self._setup_encoded_packet(packet) res.append(packet) return res + def encode_lazy(self, Frame frame=None): + for frame in self._prepare_and_time_rebase_frames_for_encode(frame): + for packet in self._send_frame_and_recv(frame): + self._setup_encoded_packet(packet) + yield packet + cdef _setup_encoded_packet(self, Packet packet): # We coerced the frame's time_base into the CodecContext's during encoding, # and FFmpeg copied the frame's pts/dts to the packet, so keep track of diff --git a/tests/test_encode.py b/tests/test_encode.py index e79bf539f..a9690bb5e 100644 --- a/tests/test_encode.py +++ b/tests/test_encode.py @@ -57,10 +57,10 @@ def write_rgb_rotate(output): ) frame.planes[0].update(image.tobytes()) - for packet in stream.encode(frame): + for packet in stream.encode_lazy(frame): output.mux(packet) - for packet in stream.encode(None): + for packet in stream.encode_lazy(None): output.mux(packet)