Skip to content

Commit

Permalink
Use PyAV for atempo, volume filters
Browse files Browse the repository at this point in the history
  • Loading branch information
WyattBlue committed Sep 18, 2024
1 parent ee492f2 commit 393db50
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 42 deletions.
113 changes: 75 additions & 38 deletions auto_editor/render/audio.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations

import io
from pathlib import Path
from platform import system
from subprocess import PIPE

import av
import numpy as np

from auto_editor.ffwrapper import FFmpeg, FileInfo
Expand All @@ -12,12 +14,12 @@
from auto_editor.lib.contracts import andc, between_c, is_int_or_float
from auto_editor.lib.err import MyError
from auto_editor.output import Ensure
from auto_editor.timeline import v3
from auto_editor.timeline import TlAudio, v3
from auto_editor.utils.bar import Bar
from auto_editor.utils.cmdkw import ParserError, parse_with_palet, pAttr, pAttrs
from auto_editor.utils.log import Log
from auto_editor.utils.types import Args
from auto_editor.wavfile import AudioData, read, write
from auto_editor.wavfile import AudioData, read, read_fid, write

norm_types = {
"ebu": pAttrs(
Expand Down Expand Up @@ -165,6 +167,72 @@ def apply_audio_normalization(
ffmpeg.run(["-i", f"{pre_master}"] + cmd + [f"{path}"])


def process_audio_clip(
clip: TlAudio,
samp_list: AudioData,
samp_start: int,
samp_end: int,
sr: int,
temp: str,
) -> AudioData:
input_path = Path(temp, "input.wav")

with open(input_path, "wb") as fid:
write(fid, sr, samp_list[samp_start:samp_end])

input_file = av.open(input_path, "r")
input_stream = input_file.streams.audio[0]

output_bytes = io.BytesIO()
output_file = av.open(output_bytes, mode="w", format="wav")
output_stream = output_file.add_stream("pcm_s16le", rate=sr)
assert isinstance(output_stream, av.audio.AudioStream)

graph = av.filter.Graph()
args = [graph.add_abuffer(template=input_stream)]

if clip.speed != 1:
if clip.speed > 10_000:
for _ in range(3):
args.append(graph.add("atempo", f"{clip.speed ** (1/3)}"))
elif clip.speed > 100:
for _ in range(2):
args.append(graph.add("atempo", f"{clip.speed ** 0.5}"))
elif clip.speed >= 0.5:
args.append(graph.add("atempo", f"{clip.speed}"))
else:
start = 0.5
while start * 0.5 > clip.speed:
start *= 0.5
args.append(graph.add("atempo", "0.5"))
args.append(graph.add("atempo", f"{clip.speed / start}"))

if clip.volume != 1:
args.append(graph.add("volume", f"{clip.volume}"))

args.append(graph.add("abuffersink"))
graph.link_nodes(*args).configure()

for frame in input_file.decode(input_stream):
graph.push(frame)
while True:
try:
for packet in output_stream.encode(graph.pull()):
output_file.mux(packet)
except (av.BlockingIOError, av.EOFError):
break

# Flush the stream
for packet in output_stream.encode(None):
output_file.mux(packet)

input_file.close()
output_file.close()

output_bytes.seek(0)
return read_fid(io.BufferedReader(output_bytes))[1]


def make_new_audio(
tl: v3, ensure: Ensure, args: Args, ffmpeg: FFmpeg, bar: Bar, log: Log
) -> list[str]:
Expand All @@ -175,7 +243,6 @@ def make_new_audio(

norm = parse_norm(args.audio_normalize, log)

af_tick = 0
temp = log.temp

if not tl.a or not tl.a[0]:
Expand Down Expand Up @@ -214,42 +281,12 @@ def make_new_audio(
if samp_end > len(samp_list):
samp_end = len(samp_list)

filters: list[str] = []

if clip.speed != 1:
if clip.speed > 10_000:
filters.extend([f"atempo={clip.speed}^.33333"] * 3)
elif clip.speed > 100:
filters.extend(
[f"atempo=sqrt({clip.speed})", f"atempo=sqrt({clip.speed})"]
)
elif clip.speed >= 0.5:
filters.append(f"atempo={clip.speed}")
else:
start = 0.5
while start * 0.5 > clip.speed:
start *= 0.5
filters.append("atempo=0.5")
filters.append(f"atempo={clip.speed / start}")

if clip.volume != 1:
filters.append(f"volume={clip.volume}")

if not filters:
clip_arr = samp_list[samp_start:samp_end]
if clip.speed != 1 or clip.volume != 1:
clip_arr = process_audio_clip(
clip, samp_list, samp_start, samp_end, sr, temp
)
else:
af = Path(temp, f"af{af_tick}.wav")
af_out = Path(temp, f"af{af_tick}_out.wav")

# Windows can't replace a file that's already in use, so we have to
# cycle through file names.
af_tick = (af_tick + 1) % 3

with open(af, "wb") as fid:
write(fid, sr, samp_list[samp_start:samp_end])

ffmpeg.run(["-i", f"{af}", "-af", ",".join(filters), f"{af_out}"])
clip_arr = read(f"{af_out}")[1]
clip_arr = samp_list[samp_start:samp_end]

# Mix numpy arrays
start = clip.start * sr // tb
Expand Down
25 changes: 21 additions & 4 deletions auto_editor/wavfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,24 @@ def _read_data_chunk(
else:
n_samples = (size - 1) // block_align

data = np.memmap(
fid, dtype=dtype, mode="c", offset=fid.tell(), shape=(n_samples, channels)
)
fid.seek(size, 1)
assert isinstance(fid, io.BufferedIOBase)

try:
fid.fileno()
is_file = True
except io.UnsupportedOperation:
is_file = False

if is_file:
data: AudioData = np.memmap(
fid, dtype=dtype, mode="c", offset=fid.tell(), shape=(n_samples, channels)
)
fid.seek(size, 1)
else:
bytes_per_sample = np.dtype(dtype).itemsize
buffer = fid.read(n_samples * channels * bytes_per_sample)
data = np.frombuffer(buffer, dtype=dtype).reshape((n_samples, channels))

_handle_pad_byte(fid, size)

return data
Expand Down Expand Up @@ -191,7 +205,10 @@ def _handle_pad_byte(fid: io.BufferedReader, size: int) -> None:

def read(filename: str) -> tuple[int, AudioData]:
fid = open(filename, "rb")
return read_fid(fid)


def read_fid(fid: io.BufferedReader) -> tuple[int, AudioData]:
file_sig = fid.read(4)
if file_sig in (b"RIFF", b"RIFX"):
data_size, file_size, en = _read_riff_chunk(file_sig, fid)
Expand Down

0 comments on commit 393db50

Please sign in to comment.