Skip to content

Commit

Permalink
Use PyAV for atempo, volume filters
Browse files Browse the repository at this point in the history
  • Loading branch information
WyattBlue committed Sep 18, 2024
1 parent ee492f2 commit 0bf09fb
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 50 deletions.
107 changes: 69 additions & 38 deletions auto_editor/render/audio.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import io
from pathlib import Path
from platform import system
from subprocess import PIPE

import av
import numpy as np

from auto_editor.ffwrapper import FFmpeg, FileInfo
Expand All @@ -12,12 +14,12 @@
from auto_editor.lib.contracts import andc, between_c, is_int_or_float
from auto_editor.lib.err import MyError
from auto_editor.output import Ensure
from auto_editor.timeline import v3
from auto_editor.timeline import TlAudio, v3
from auto_editor.utils.bar import Bar
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
from auto_editor.utils.log import Log
from auto_editor.utils.types import Args
from auto_editor.wavfile import AudioData, read, write
from auto_editor.wavfile import AudioData, read, read_fid, write

norm_types = {
"ebu": pAttrs(
Expand Down Expand Up @@ -165,6 +167,68 @@ def apply_audio_normalization(
ffmpeg.run(["-i", f"{pre_master}"] + cmd + [f"{path}"])


def process_audio_clip(
clip: TlAudio, samp_list: AudioData, samp_start: int, samp_end: int, sr: int
) -> AudioData:
input_buffer = io.BytesIO()
write(input_buffer, sr, samp_list[samp_start:samp_end])
input_buffer.seek(0)

input_file = av.open(input_buffer, "r")
input_stream = input_file.streams.audio[0]

output_bytes = io.BytesIO()
output_file = av.open(output_bytes, mode="w", format="wav")
output_stream = output_file.add_stream("pcm_s16le", rate=sr)
assert isinstance(output_stream, av.audio.AudioStream)

graph = av.filter.Graph()
args = [graph.add_abuffer(template=input_stream)]

if clip.speed != 1:
if clip.speed > 10_000:
for _ in range(3):
args.append(graph.add("atempo", f"{clip.speed ** (1/3)}"))
elif clip.speed > 100:
for _ in range(2):
args.append(graph.add("atempo", f"{clip.speed ** 0.5}"))
elif clip.speed >= 0.5:
args.append(graph.add("atempo", f"{clip.speed}"))
else:
start = 0.5
while start * 0.5 > clip.speed:
start *= 0.5
args.append(graph.add("atempo", "0.5"))
args.append(graph.add("atempo", f"{clip.speed / start}"))

if clip.volume != 1:
args.append(graph.add("volume", f"{clip.volume}"))

args.append(graph.add("abuffersink"))
graph.link_nodes(*args).configure()

for frame in input_file.decode(input_stream):
graph.push(frame)
while True:
try:
aframe = graph.pull()
assert isinstance(aframe, av.audio.AudioFrame)
for packet in output_stream.encode(aframe):
output_file.mux(packet)
except (av.BlockingIOError, av.EOFError):
break

# Flush the stream
for packet in output_stream.encode(None):
output_file.mux(packet)

input_file.close()
output_file.close()

output_bytes.seek(0)
return read_fid(output_bytes)[1]


def make_new_audio(
tl: v3, ensure: Ensure, args: Args, ffmpeg: FFmpeg, bar: Bar, log: Log
) -> list[str]:
Expand All @@ -175,7 +239,6 @@ def make_new_audio(

norm = parse_norm(args.audio_normalize, log)

af_tick = 0
temp = log.temp

if not tl.a or not tl.a[0]:
Expand Down Expand Up @@ -214,42 +277,10 @@ def make_new_audio(
if samp_end > len(samp_list):
samp_end = len(samp_list)

filters: list[str] = []

if clip.speed != 1:
if clip.speed > 10_000:
filters.extend([f"atempo={clip.speed}^.33333"] * 3)
elif clip.speed > 100:
filters.extend(
[f"atempo=sqrt({clip.speed})", f"atempo=sqrt({clip.speed})"]
)
elif clip.speed >= 0.5:
filters.append(f"atempo={clip.speed}")
else:
start = 0.5
while start * 0.5 > clip.speed:
start *= 0.5
filters.append("atempo=0.5")
filters.append(f"atempo={clip.speed / start}")

if clip.volume != 1:
filters.append(f"volume={clip.volume}")

if not filters:
clip_arr = samp_list[samp_start:samp_end]
if clip.speed != 1 or clip.volume != 1:
clip_arr = process_audio_clip(clip, samp_list, samp_start, samp_end, sr)
else:
af = Path(temp, f"af{af_tick}.wav")
af_out = Path(temp, f"af{af_tick}_out.wav")

# Windows can't replace a file that's already in use, so we have to
# cycle through file names.
af_tick = (af_tick + 1) % 3

with open(af, "wb") as fid:
write(fid, sr, samp_list[samp_start:samp_end])

ffmpeg.run(["-i", f"{af}", "-af", ",".join(filters), f"{af_out}"])
clip_arr = read(f"{af_out}")[1]
clip_arr = samp_list[samp_start:samp_end]

# Mix numpy arrays
start = clip.start * sr // tb
Expand Down
37 changes: 25 additions & 12 deletions auto_editor/wavfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import io
import struct
import sys
from typing import Literal
from typing import TYPE_CHECKING, Literal

import numpy as np

Expand All @@ -15,13 +15,17 @@
Endian = Literal[">", "<"] # Big Endian, Little Endian
ByteOrd = Literal["big", "little"]

if TYPE_CHECKING:
Reader = io.BufferedReader | io.BytesIO
Writer = io.BufferedWriter | io.BytesIO


class WavError(Exception):
pass


def _read_fmt_chunk(
fid: io.BufferedReader, bytes_order: ByteOrd
fid: Reader, bytes_order: ByteOrd
) -> tuple[int, int, int, int, int]:
size = int.from_bytes(fid.read(4), bytes_order)

Expand Down Expand Up @@ -69,7 +73,7 @@ def _read_fmt_chunk(


def _read_data_chunk(
fid: io.BufferedReader,
fid: Reader,
format_tag: int,
channels: int,
bit_depth: int,
Expand Down Expand Up @@ -114,16 +118,22 @@ def _read_data_chunk(
else:
n_samples = (size - 1) // block_align

data = np.memmap(
fid, dtype=dtype, mode="c", offset=fid.tell(), shape=(n_samples, channels)
)
fid.seek(size, 1)
if isinstance(fid, io.BufferedReader):
data: AudioData = np.memmap(
fid, dtype=dtype, mode="c", offset=fid.tell(), shape=(n_samples, channels)
)
fid.seek(size, 1)
else:
bytes_per_sample = np.dtype(dtype).itemsize
buffer = fid.read(n_samples * channels * bytes_per_sample)
data = np.frombuffer(buffer, dtype=dtype).reshape((n_samples, channels))

_handle_pad_byte(fid, size)

return data


def _skip_unknown_chunk(fid: io.BufferedReader, en: Endian) -> None:
def _skip_unknown_chunk(fid: Reader, en: Endian) -> None:
data = fid.read(4)

if len(data) == 4:
Expand All @@ -140,7 +150,7 @@ def _skip_unknown_chunk(fid: io.BufferedReader, en: Endian) -> None:
)


def _read_rf64_chunk(fid: io.BufferedReader) -> tuple[int, int, Endian]:
def _read_rf64_chunk(fid: Reader) -> tuple[int, int, Endian]:
# https://tech.ebu.ch/docs/tech/tech3306v1_0.pdf
# https://www.itu.int/dms_pubrec/itu-r/rec/bs/R-REC-BS.2088-1-201910-I!!PDF-E.pdf

Expand Down Expand Up @@ -171,7 +181,7 @@ def _read_rf64_chunk(fid: io.BufferedReader) -> tuple[int, int, Endian]:
return data_size, file_size, en


def _read_riff_chunk(sig: bytes, fid: io.BufferedReader) -> tuple[None, int, Endian]:
def _read_riff_chunk(sig: bytes, fid: Reader) -> tuple[None, int, Endian]:
en: Endian = "<" if sig == b"RIFF" else ">"
bytes_order: ByteOrd = "big" if en == ">" else "little"

Expand All @@ -184,14 +194,17 @@ def _read_riff_chunk(sig: bytes, fid: io.BufferedReader) -> tuple[None, int, End
return None, file_size, en


def _handle_pad_byte(fid: io.BufferedReader, size: int) -> None:
def _handle_pad_byte(fid: Reader, size: int) -> None:
if size % 2 == 1:
fid.seek(1, 1)


def read(filename: str) -> tuple[int, AudioData]:
fid = open(filename, "rb")
return read_fid(fid)


def read_fid(fid: Reader) -> tuple[int, AudioData]:
file_sig = fid.read(4)
if file_sig in (b"RIFF", b"RIFX"):
data_size, file_size, en = _read_riff_chunk(file_sig, fid)
Expand Down Expand Up @@ -241,7 +254,7 @@ def read(filename: str) -> tuple[int, AudioData]:
raise WavError("Found no data")


def write(fid: io.BufferedWriter, sr: int, arr: np.ndarray) -> None:
def write(fid: Writer, sr: int, arr: np.ndarray) -> None:
channels = 1 if arr.ndim == 1 else arr.shape[1]
bit_depth = arr.dtype.itemsize * 8
block_align = channels * (bit_depth // 8)
Expand Down

0 comments on commit 0bf09fb

Please sign in to comment.