From 03721aacf7f7b2e3bf8918903fdd03f52a8e1231 Mon Sep 17 00:00:00 2001 From: Philipp Mandler Date: Fri, 8 Dec 2023 16:07:59 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B1=20Scale=20preview=20video=20to=204?= =?UTF-8?q?80p=20and=20keep=20audio=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/editor/player.tsx | 5 ++++- worker/transcribee_worker/config.py | 9 +++++++++ worker/transcribee_worker/reencode.py | 10 +--------- worker/transcribee_worker/worker.py | 27 ++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 11 deletions(-) diff --git a/frontend/src/editor/player.tsx b/frontend/src/editor/player.tsx index a6a59fd0..7cd1555f 100644 --- a/frontend/src/editor/player.tsx +++ b/frontend/src/editor/player.tsx @@ -39,7 +39,10 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor: const relevantMediaFiles = data?.media_files.filter((media) => !media.tags.includes('original')) || []; - const mappedFiles = relevantMediaFiles.map((media) => { + const videoFiles = relevantMediaFiles.filter((media) => media.tags.includes('video')); + const audioFiles = relevantMediaFiles.filter((media) => !media.tags.includes('video')); + + const mappedFiles = [...videoFiles, ...audioFiles].map((media) => { return { src: media.url, type: media.content_type, diff --git a/worker/transcribee_worker/config.py b/worker/transcribee_worker/config.py index 1b2c5e22..9b8ff855 100644 --- a/worker/transcribee_worker/config.py +++ b/worker/transcribee_worker/config.py @@ -17,6 +17,11 @@ class Settings(BaseSettings): "audio_bitrate": "128k", "ac": "1", }, + "mp4": { + "format": "mp4", + "audio_bitrate": "128k", + "ac": "1", + }, "video:mp4": { "format": "mp4", "audio_bitrate": "128k", @@ -24,6 +29,10 @@ class Settings(BaseSettings): "c:v": "libx264", "crf": "26", "preset": "faster", + # downscale to 480p and pad to multiple of 2 (needed for libx264) + "vf": "scale='min(854,iw)':'min(480,ih)'" + ":force_original_aspect_ratio=decrease," + "pad='iw+mod(iw\\,2)':'ih+mod(ih\\,2)", }, } diff --git a/worker/transcribee_worker/reencode.py b/worker/transcribee_worker/reencode.py index b588adcb..bf29fcbe 100644 --- a/worker/transcribee_worker/reencode.py +++ b/worker/transcribee_worker/reencode.py @@ -10,14 +10,6 @@ def get_duration(input_path: Path): return float(ffmpeg.probe(input_path)["format"]["duration"]) -def has_video(input_path: Path): - streams = ffmpeg.probe(input_path)["streams"] - for stream in streams: - if stream["codec_type"] == "video": - return True - return False - - async def reencode( input_path: Path, output_path: Path, @@ -29,7 +21,7 @@ async def reencode( def work(_): pipeline = ffmpeg.input(input_path) streams = [pipeline.audio] - if include_video and has_video(input_path): + if include_video: streams.append(pipeline.video) cmd: subprocess.Popen = ffmpeg.output( diff --git a/worker/transcribee_worker/worker.py b/worker/transcribee_worker/worker.py index c07638cf..f77c6789 100644 --- a/worker/transcribee_worker/worker.py +++ b/worker/transcribee_worker/worker.py @@ -9,6 +9,7 @@ from typing import Any, AsyncGenerator, Optional, Tuple import automerge +import ffmpeg import numpy.typing as npt from pydantic import parse_raw_as from transcribee_proto.api import ( @@ -74,6 +75,19 @@ def get_last_atom_end(doc: EditorDocument): return 0 +def media_has_video(path: Path): + streams = ffmpeg.probe(path)["streams"] + for stream in streams: + if stream["codec_type"] == "video": + if stream["disposition"]["attached_pic"] != 0: + # ignore album covers + continue + + return True + + return False + + class Worker: base_url: str token: str @@ -256,9 +270,17 @@ async def reencode( self.set_duration(task, duration) n_profiles = len(settings.REENCODE_PROFILES) + + has_video = media_has_video(document_audio) + for i, (profile, parameters) in enumerate(settings.REENCODE_PROFILES.items()): output_path = self._get_tmpfile(f"reencode_{profile.replace(':', '_')}") + video_profile = profile.startswith("video:") + + if video_profile and not has_video: + continue + await reencode( document_audio, output_path, @@ -269,11 +291,14 @@ async def reencode( **kwargs, ), duration, - include_video=(profile.startswith("video:")), + include_video=video_profile, ) tags = [f"profile:{profile}"] + [f"{k}:{v}" for k, v in parameters.items()] + if video_profile: + tags.append("video") + loop = asyncio.get_running_loop() await loop.run_in_executor( None, self.add_document_media_file, task, output_path, tags