From 03721aacf7f7b2e3bf8918903fdd03f52a8e1231 Mon Sep 17 00:00:00 2001
From: Philipp Mandler <info@philipp-mandler.com>
Date: Fri, 8 Dec 2023 16:07:59 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B1=20Scale=20preview=20video=20to=204?=
 =?UTF-8?q?80p=20and=20keep=20audio=20file?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 frontend/src/editor/player.tsx        |  5 ++++-
 worker/transcribee_worker/config.py   |  9 +++++++++
 worker/transcribee_worker/reencode.py | 10 +---------
 worker/transcribee_worker/worker.py   | 27 ++++++++++++++++++++++++++-
 4 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/frontend/src/editor/player.tsx b/frontend/src/editor/player.tsx
index a6a59fd0..7cd1555f 100644
--- a/frontend/src/editor/player.tsx
+++ b/frontend/src/editor/player.tsx
@@ -39,7 +39,10 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor:
     const relevantMediaFiles =
       data?.media_files.filter((media) => !media.tags.includes('original')) || [];
 
-    const mappedFiles = relevantMediaFiles.map((media) => {
+    const videoFiles = relevantMediaFiles.filter((media) => media.tags.includes('video'));
+    const audioFiles = relevantMediaFiles.filter((media) => !media.tags.includes('video'));
+
+    const mappedFiles = [...videoFiles, ...audioFiles].map((media) => {
       return {
         src: media.url,
         type: media.content_type,
diff --git a/worker/transcribee_worker/config.py b/worker/transcribee_worker/config.py
index 1b2c5e22..9b8ff855 100644
--- a/worker/transcribee_worker/config.py
+++ b/worker/transcribee_worker/config.py
@@ -17,6 +17,11 @@ class Settings(BaseSettings):
             "audio_bitrate": "128k",
             "ac": "1",
         },
+        "mp4": {
+            "format": "mp4",
+            "audio_bitrate": "128k",
+            "ac": "1",
+        },
         "video:mp4": {
             "format": "mp4",
             "audio_bitrate": "128k",
@@ -24,6 +29,10 @@ class Settings(BaseSettings):
             "c:v": "libx264",
             "crf": "26",
             "preset": "faster",
+            # downscale to 480p and pad to multiple of 2 (needed for libx264)
+            "vf": "scale='min(854,iw)':'min(480,ih)'"
+            ":force_original_aspect_ratio=decrease,"
+            "pad='iw+mod(iw\\,2)':'ih+mod(ih\\,2)",
         },
     }
 
diff --git a/worker/transcribee_worker/reencode.py b/worker/transcribee_worker/reencode.py
index b588adcb..bf29fcbe 100644
--- a/worker/transcribee_worker/reencode.py
+++ b/worker/transcribee_worker/reencode.py
@@ -10,14 +10,6 @@ def get_duration(input_path: Path):
     return float(ffmpeg.probe(input_path)["format"]["duration"])
 
 
-def has_video(input_path: Path):
-    streams = ffmpeg.probe(input_path)["streams"]
-    for stream in streams:
-        if stream["codec_type"] == "video":
-            return True
-    return False
-
-
 async def reencode(
     input_path: Path,
     output_path: Path,
@@ -29,7 +21,7 @@ async def reencode(
     def work(_):
         pipeline = ffmpeg.input(input_path)
         streams = [pipeline.audio]
-        if include_video and has_video(input_path):
+        if include_video:
             streams.append(pipeline.video)
 
         cmd: subprocess.Popen = ffmpeg.output(
diff --git a/worker/transcribee_worker/worker.py b/worker/transcribee_worker/worker.py
index c07638cf..f77c6789 100644
--- a/worker/transcribee_worker/worker.py
+++ b/worker/transcribee_worker/worker.py
@@ -9,6 +9,7 @@
 from typing import Any, AsyncGenerator, Optional, Tuple
 
 import automerge
+import ffmpeg
 import numpy.typing as npt
 from pydantic import parse_raw_as
 from transcribee_proto.api import (
@@ -74,6 +75,19 @@ def get_last_atom_end(doc: EditorDocument):
     return 0
 
 
+def media_has_video(path: Path):
+    streams = ffmpeg.probe(path)["streams"]
+    for stream in streams:
+        if stream["codec_type"] == "video":
+            if stream["disposition"]["attached_pic"] != 0:
+                # ignore album covers
+                continue
+
+            return True
+
+    return False
+
+
 class Worker:
     base_url: str
     token: str
@@ -256,9 +270,17 @@ async def reencode(
         self.set_duration(task, duration)
 
         n_profiles = len(settings.REENCODE_PROFILES)
+
+        has_video = media_has_video(document_audio)
+
         for i, (profile, parameters) in enumerate(settings.REENCODE_PROFILES.items()):
             output_path = self._get_tmpfile(f"reencode_{profile.replace(':', '_')}")
 
+            video_profile = profile.startswith("video:")
+
+            if video_profile and not has_video:
+                continue
+
             await reencode(
                 document_audio,
                 output_path,
@@ -269,11 +291,14 @@ async def reencode(
                     **kwargs,
                 ),
                 duration,
-                include_video=(profile.startswith("video:")),
+                include_video=video_profile,
             )
 
             tags = [f"profile:{profile}"] + [f"{k}:{v}" for k, v in parameters.items()]
 
+            if video_profile:
+                tags.append("video")
+
             loop = asyncio.get_running_loop()
             await loop.run_in_executor(
                 None, self.add_document_media_file, task, output_path, tags