diff --git a/frontend/src/editor/player.tsx b/frontend/src/editor/player.tsx
index 88a46ca2..1d7a6097 100644
--- a/frontend/src/editor/player.tsx
+++ b/frontend/src/editor/player.tsx
@@ -23,7 +23,15 @@ const SKIP_SHORTCUT_SEC = 3;
let lastTabPressTs = 0;
-export function PlayerBar({ documentId, editor }: { documentId: string; editor: Editor }) {
+export function PlayerBar({
+ documentId,
+ editor,
+ onShowVideo,
+}: {
+ documentId: string;
+ editor: Editor;
+ onShowVideo?: (show: boolean) => void;
+}) {
const { data } = useGetDocument(
{ document_id: documentId },
{
@@ -34,23 +42,39 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor:
const [playbackRate, setPlaybackRate] = useLocalStorage('playbackRate', 1);
- const sources = useMemo(() => {
- const mappedFiles =
- data?.media_files.map((media) => {
- return {
- src: media.url,
- type: media.content_type,
- };
- }) || [];
+ const { sources, hasVideo } = useMemo(() => {
+ // do not play the original file, it may be large
+ const relevantMediaFiles =
+ data?.media_files.filter((media) => !media.tags.includes('original')) || [];
+
+ const videoFiles = relevantMediaFiles.filter((media) => media.tags.includes('video'));
+ const audioFiles = relevantMediaFiles.filter((media) => !media.tags.includes('video'));
- return sortMediaFiles(mappedFiles);
+ const mappedFiles = [...videoFiles, ...audioFiles].map((media) => {
+ return {
+ src: media.url,
+ type: media.content_type,
+ };
+ });
+
+ return {
+ sources: sortMediaFiles(mappedFiles),
+ hasVideo: videoFiles.length > 0,
+ };
}, [data?.media_files]);
const audio = useAudio({
playbackRate,
sources,
+ videoPreview: hasVideo,
});
+ useEffect(() => {
+ if (onShowVideo) {
+ onShowVideo(hasVideo);
+ }
+ }, [hasVideo]);
+
// calculate the start of the current element to color it
const [currentElementStartTime, setCurrentElementStartTime] = useState(0.0);
diff --git a/frontend/src/editor/transcription_editor.tsx b/frontend/src/editor/transcription_editor.tsx
index 46688bb6..c5becb43 100644
--- a/frontend/src/editor/transcription_editor.tsx
+++ b/frontend/src/editor/transcription_editor.tsx
@@ -224,12 +224,14 @@ export function TranscriptionEditor({
documentId,
readOnly,
initialValue,
+ onShowVideo,
...props
}: {
editor?: Editor;
documentId: string;
readOnly: boolean;
initialValue?: Paragraph[];
+ onShowVideo?: (show: boolean) => void;
} & ComponentProps<'div'>) {
const systemPrefersDark = useMediaQuery('(prefers-color-scheme: dark)');
// prevent ctrl+s
@@ -312,7 +314,7 @@ export function TranscriptionEditor({
className={clsx('2xl:-ml-20')}
/>
-
+
diff --git a/frontend/src/pages/document.tsx b/frontend/src/pages/document.tsx
index 9b6da10a..1ed18558 100644
--- a/frontend/src/pages/document.tsx
+++ b/frontend/src/pages/document.tsx
@@ -112,6 +112,7 @@ export function DocumentPage({
const [_location, navigate] = useLocation();
const debugMode = useDebugMode();
const { isLoggedIn } = useAuthData();
+ const [videoVisible, setVideoVisible] = useState(false);
const url = getDocumentWsUrl(documentId);
@@ -188,6 +189,7 @@ export function DocumentPage({
+ {/* Spacer to prevent video preview from hiding text */}
+ {videoVisible &&
}
+
{editor && debugMode && {}}
);
diff --git a/frontend/src/utils/use_audio.ts b/frontend/src/utils/use_audio.ts
index 6e934b29..67410447 100644
--- a/frontend/src/utils/use_audio.ts
+++ b/frontend/src/utils/use_audio.ts
@@ -1,12 +1,13 @@
-import { actions, audio, events, props } from '@podlove/html5-audio-driver';
+import { actions, video, events, props, audio } from '@podlove/html5-audio-driver';
import { useCallback, useEffect, useRef, useState } from 'react';
type UseAudioOptions = {
playbackRate?: number;
sources: Array<{ src: string; type: string }>;
+ videoPreview?: boolean;
};
-export function useAudio({ sources, playbackRate }: UseAudioOptions) {
+export function useAudio({ sources, playbackRate, videoPreview }: UseAudioOptions) {
const [playing, setPlayingState] = useState(false);
const [duration, setDuration] = useState();
const [buffering, setBuffering] = useState(false);
@@ -16,9 +17,23 @@ export function useAudio({ sources, playbackRate }: UseAudioOptions) {
const [audioElement, setAudioElement] = useState(null);
useEffect(() => {
- const myAudioElement = audio([]);
+ const myAudioElement = videoPreview ? video([]) : audio([]);
setAudioElement(myAudioElement);
+ if (videoPreview) {
+ myAudioElement.style = `
+ position: fixed;
+ bottom: 90px;
+ right: 20px;
+ height: 170px;
+ width: 300px;
+ `;
+ } else {
+ myAudioElement.style = `
+ display: none;
+ `;
+ }
+
const e = events(myAudioElement);
e.onDurationChange(() => {
setDuration(props(myAudioElement).duration);
@@ -40,7 +55,7 @@ export function useAudio({ sources, playbackRate }: UseAudioOptions) {
myAudioElement.innerHTML = '';
myAudioElement.remove();
};
- }, []);
+ }, [videoPreview]);
useEffect(() => {
if (!audioElement) return;
diff --git a/worker/transcribee_worker/config.py b/worker/transcribee_worker/config.py
index 2a020784..9dbaec9a 100644
--- a/worker/transcribee_worker/config.py
+++ b/worker/transcribee_worker/config.py
@@ -22,6 +22,18 @@ class Settings(BaseSettings):
"audio_bitrate": "128k",
"ac": "1",
},
+ "video:mp4": {
+ "format": "mp4",
+ "audio_bitrate": "128k",
+ "ac": "1",
+ "c:v": "libx264",
+ "crf": "26",
+ "preset": "faster",
+ # downscale to 480p and pad to multiple of 2 (needed for libx264)
+ "vf": "scale='min(854,iw)':'min(480,ih)'"
+ ":force_original_aspect_ratio=decrease,"
+ "pad='iw+mod(iw\\,2)':'ih+mod(ih\\,2)",
+ },
}
KEEPALIVE_INTERVAL: float = 0.5
diff --git a/worker/transcribee_worker/reencode.py b/worker/transcribee_worker/reencode.py
index 1a6632b3..bf29fcbe 100644
--- a/worker/transcribee_worker/reencode.py
+++ b/worker/transcribee_worker/reencode.py
@@ -16,21 +16,23 @@ async def reencode(
output_params: dict[str, str],
progress_callback: ProgressCallbackType,
duration: float,
+ include_video: bool,
):
def work(_):
- cmd: subprocess.Popen = (
- ffmpeg.input(input_path)
- .output(
- filename=output_path,
- map="0:a",
- loglevel="quiet",
- stats=None,
- progress="-",
- map_metadata="-1",
- **output_params
- )
- .run_async(pipe_stdout=True)
- )
+ pipeline = ffmpeg.input(input_path)
+ streams = [pipeline.audio]
+ if include_video:
+ streams.append(pipeline.video)
+
+ cmd: subprocess.Popen = ffmpeg.output(
+ *streams,
+ filename=output_path,
+ loglevel="quiet",
+ stats=None,
+ progress="-",
+ map_metadata="-1",
+ **output_params
+ ).run_async(pipe_stdout=True)
assert cmd.stdout
raw_line: bytes
progress_dict = {}
diff --git a/worker/transcribee_worker/worker.py b/worker/transcribee_worker/worker.py
index a8670fac..1ecefa67 100644
--- a/worker/transcribee_worker/worker.py
+++ b/worker/transcribee_worker/worker.py
@@ -9,6 +9,7 @@
from typing import Any, AsyncGenerator, Optional, Tuple
import automerge
+import ffmpeg
import numpy.typing as npt
from pydantic import parse_raw_as
from transcribee_proto.api import (
@@ -74,6 +75,23 @@ def get_last_atom_end(doc: EditorDocument):
return 0
+def media_has_video(path: Path):
+ streams = ffmpeg.probe(path)["streams"]
+ for stream in streams:
+ if stream["codec_type"] == "video":
+ if stream["disposition"]["attached_pic"] != 0:
+ # ignore album covers
+ continue
+
+ return True
+
+ return False
+
+
+def is_video_profile(profile_name: str):
+ return profile_name.startswith("video:")
+
+
class Worker:
base_url: str
token: str
@@ -255,9 +273,17 @@ async def reencode(
duration = get_duration(document_audio)
self.set_duration(task, duration)
- n_profiles = len(settings.REENCODE_PROFILES)
- for i, (profile, parameters) in enumerate(settings.REENCODE_PROFILES.items()):
- output_path = self._get_tmpfile(f"reencode_{profile}")
+ has_video = media_has_video(document_audio)
+ applicable_profiles = {
+ profile_name: parameters
+ for profile_name, parameters in settings.REENCODE_PROFILES.items()
+ if has_video or not is_video_profile(profile_name)
+ }
+ n_profiles = len(applicable_profiles)
+
+ for i, (profile, parameters) in enumerate(applicable_profiles.items()):
+ output_path = self._get_tmpfile(f"reencode_{profile.replace(':', '_')}")
+ video_profile = is_video_profile(profile)
await reencode(
document_audio,
@@ -269,10 +295,14 @@ async def reencode(
**kwargs,
),
duration,
+ include_video=video_profile,
)
tags = [f"profile:{profile}"] + [f"{k}:{v}" for k, v in parameters.items()]
+ if video_profile:
+ tags.append("video")
+
loop = asyncio.get_running_loop()
await loop.run_in_executor(
None, self.add_document_media_file, task, output_path, tags