From 868dc875cc61597bff24a094ca855b9929ee2986 Mon Sep 17 00:00:00 2001 From: Philipp Mandler Date: Wed, 6 Dec 2023 12:46:59 +0100 Subject: [PATCH 1/6] =?UTF-8?q?=E2=9C=A8=20Add=20basic=20video=20preview?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/editor/player.tsx | 1 + frontend/src/utils/use_audio.ts | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/frontend/src/editor/player.tsx b/frontend/src/editor/player.tsx index 88a46ca2..4052fe96 100644 --- a/frontend/src/editor/player.tsx +++ b/frontend/src/editor/player.tsx @@ -49,6 +49,7 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor: const audio = useAudio({ playbackRate, sources, + videoPreview: true, }); // calculate the start of the current element to color it diff --git a/frontend/src/utils/use_audio.ts b/frontend/src/utils/use_audio.ts index 6e934b29..7fe1ca9c 100644 --- a/frontend/src/utils/use_audio.ts +++ b/frontend/src/utils/use_audio.ts @@ -1,12 +1,13 @@ -import { actions, audio, events, props } from '@podlove/html5-audio-driver'; +import { actions, video, events, props, audio } from '@podlove/html5-audio-driver'; import { useCallback, useEffect, useRef, useState } from 'react'; type UseAudioOptions = { playbackRate?: number; sources: Array<{ src: string; type: string }>; + videoPreview?: boolean; }; -export function useAudio({ sources, playbackRate }: UseAudioOptions) { +export function useAudio({ sources, playbackRate, videoPreview }: UseAudioOptions) { const [playing, setPlayingState] = useState(false); const [duration, setDuration] = useState(); const [buffering, setBuffering] = useState(false); @@ -16,11 +17,22 @@ export function useAudio({ sources, playbackRate }: UseAudioOptions) { const [audioElement, setAudioElement] = useState(null); useEffect(() => { - const myAudioElement = audio([]); + const myAudioElement = videoPreview ? video([]) : audio([]); + setAudioElement(myAudioElement); const e = events(myAudioElement); e.onDurationChange(() => { + if (videoPreview && myAudioElement.videoHeight > 0) { + myAudioElement.style = ` + position: fixed; + bottom: 90px; + right: 20px; + height: 170px; + width: 300px; + `; + } + setDuration(props(myAudioElement).duration); }); e.onPlay(() => setPlayingState(true)); From 8c05d9d6b6a010a3a3089484a7eae783d4334d25 Mon Sep 17 00:00:00 2001 From: Philipp Mandler Date: Thu, 7 Dec 2023 02:44:59 +0100 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=A7=B1=20Include=20video=20in=20reenc?= =?UTF-8?q?oded=20preview=20media?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/editor/player.tsx | 17 +++++++------ worker/transcribee_worker/config.py | 5 +++- worker/transcribee_worker/reencode.py | 36 +++++++++++++++++---------- worker/transcribee_worker/worker.py | 3 ++- 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/frontend/src/editor/player.tsx b/frontend/src/editor/player.tsx index 4052fe96..a6a59fd0 100644 --- a/frontend/src/editor/player.tsx +++ b/frontend/src/editor/player.tsx @@ -35,13 +35,16 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor: const [playbackRate, setPlaybackRate] = useLocalStorage('playbackRate', 1); const sources = useMemo(() => { - const mappedFiles = - data?.media_files.map((media) => { - return { - src: media.url, - type: media.content_type, - }; - }) || []; + // do not play the original file, it may be large + const relevantMediaFiles = + data?.media_files.filter((media) => !media.tags.includes('original')) || []; + + const mappedFiles = relevantMediaFiles.map((media) => { + return { + src: media.url, + type: media.content_type, + }; + }); return sortMediaFiles(mappedFiles); }, [data?.media_files]); diff --git a/worker/transcribee_worker/config.py b/worker/transcribee_worker/config.py index 2a020784..1b2c5e22 100644 --- a/worker/transcribee_worker/config.py +++ b/worker/transcribee_worker/config.py @@ -17,10 +17,13 @@ class Settings(BaseSettings): "audio_bitrate": "128k", "ac": "1", }, - "m4a": { + "video:mp4": { "format": "mp4", "audio_bitrate": "128k", "ac": "1", + "c:v": "libx264", + "crf": "26", + "preset": "faster", }, } diff --git a/worker/transcribee_worker/reencode.py b/worker/transcribee_worker/reencode.py index 1a6632b3..b588adcb 100644 --- a/worker/transcribee_worker/reencode.py +++ b/worker/transcribee_worker/reencode.py @@ -10,27 +10,37 @@ def get_duration(input_path: Path): return float(ffmpeg.probe(input_path)["format"]["duration"]) +def has_video(input_path: Path): + streams = ffmpeg.probe(input_path)["streams"] + for stream in streams: + if stream["codec_type"] == "video": + return True + return False + + async def reencode( input_path: Path, output_path: Path, output_params: dict[str, str], progress_callback: ProgressCallbackType, duration: float, + include_video: bool, ): def work(_): - cmd: subprocess.Popen = ( - ffmpeg.input(input_path) - .output( - filename=output_path, - map="0:a", - loglevel="quiet", - stats=None, - progress="-", - map_metadata="-1", - **output_params - ) - .run_async(pipe_stdout=True) - ) + pipeline = ffmpeg.input(input_path) + streams = [pipeline.audio] + if include_video and has_video(input_path): + streams.append(pipeline.video) + + cmd: subprocess.Popen = ffmpeg.output( + *streams, + filename=output_path, + loglevel="quiet", + stats=None, + progress="-", + map_metadata="-1", + **output_params + ).run_async(pipe_stdout=True) assert cmd.stdout raw_line: bytes progress_dict = {} diff --git a/worker/transcribee_worker/worker.py b/worker/transcribee_worker/worker.py index a8670fac..c07638cf 100644 --- a/worker/transcribee_worker/worker.py +++ b/worker/transcribee_worker/worker.py @@ -257,7 +257,7 @@ async def reencode( n_profiles = len(settings.REENCODE_PROFILES) for i, (profile, parameters) in enumerate(settings.REENCODE_PROFILES.items()): - output_path = self._get_tmpfile(f"reencode_{profile}") + output_path = self._get_tmpfile(f"reencode_{profile.replace(':', '_')}") await reencode( document_audio, @@ -269,6 +269,7 @@ async def reencode( **kwargs, ), duration, + include_video=(profile.startswith("video:")), ) tags = [f"profile:{profile}"] + [f"{k}:{v}" for k, v in parameters.items()] From 06ba47a6b72e4cd213a61803daf9f876a778a7c8 Mon Sep 17 00:00:00 2001 From: Philipp Mandler Date: Fri, 8 Dec 2023 16:10:06 +0100 Subject: [PATCH 3/6] =?UTF-8?q?=F0=9F=A7=B1=20Scale=20preview=20video=20to?= =?UTF-8?q?=20480p=20and=20keep=20audio=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/editor/player.tsx | 5 ++++- worker/transcribee_worker/config.py | 9 +++++++++ worker/transcribee_worker/reencode.py | 10 +--------- worker/transcribee_worker/worker.py | 27 ++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 11 deletions(-) diff --git a/frontend/src/editor/player.tsx b/frontend/src/editor/player.tsx index a6a59fd0..7cd1555f 100644 --- a/frontend/src/editor/player.tsx +++ b/frontend/src/editor/player.tsx @@ -39,7 +39,10 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor: const relevantMediaFiles = data?.media_files.filter((media) => !media.tags.includes('original')) || []; - const mappedFiles = relevantMediaFiles.map((media) => { + const videoFiles = relevantMediaFiles.filter((media) => media.tags.includes('video')); + const audioFiles = relevantMediaFiles.filter((media) => !media.tags.includes('video')); + + const mappedFiles = [...videoFiles, ...audioFiles].map((media) => { return { src: media.url, type: media.content_type, diff --git a/worker/transcribee_worker/config.py b/worker/transcribee_worker/config.py index 1b2c5e22..9dbaec9a 100644 --- a/worker/transcribee_worker/config.py +++ b/worker/transcribee_worker/config.py @@ -17,6 +17,11 @@ class Settings(BaseSettings): "audio_bitrate": "128k", "ac": "1", }, + "m4a": { + "format": "mp4", + "audio_bitrate": "128k", + "ac": "1", + }, "video:mp4": { "format": "mp4", "audio_bitrate": "128k", @@ -24,6 +29,10 @@ class Settings(BaseSettings): "c:v": "libx264", "crf": "26", "preset": "faster", + # downscale to 480p and pad to multiple of 2 (needed for libx264) + "vf": "scale='min(854,iw)':'min(480,ih)'" + ":force_original_aspect_ratio=decrease," + "pad='iw+mod(iw\\,2)':'ih+mod(ih\\,2)", }, } diff --git a/worker/transcribee_worker/reencode.py b/worker/transcribee_worker/reencode.py index b588adcb..bf29fcbe 100644 --- a/worker/transcribee_worker/reencode.py +++ b/worker/transcribee_worker/reencode.py @@ -10,14 +10,6 @@ def get_duration(input_path: Path): return float(ffmpeg.probe(input_path)["format"]["duration"]) -def has_video(input_path: Path): - streams = ffmpeg.probe(input_path)["streams"] - for stream in streams: - if stream["codec_type"] == "video": - return True - return False - - async def reencode( input_path: Path, output_path: Path, @@ -29,7 +21,7 @@ async def reencode( def work(_): pipeline = ffmpeg.input(input_path) streams = [pipeline.audio] - if include_video and has_video(input_path): + if include_video: streams.append(pipeline.video) cmd: subprocess.Popen = ffmpeg.output( diff --git a/worker/transcribee_worker/worker.py b/worker/transcribee_worker/worker.py index c07638cf..f77c6789 100644 --- a/worker/transcribee_worker/worker.py +++ b/worker/transcribee_worker/worker.py @@ -9,6 +9,7 @@ from typing import Any, AsyncGenerator, Optional, Tuple import automerge +import ffmpeg import numpy.typing as npt from pydantic import parse_raw_as from transcribee_proto.api import ( @@ -74,6 +75,19 @@ def get_last_atom_end(doc: EditorDocument): return 0 +def media_has_video(path: Path): + streams = ffmpeg.probe(path)["streams"] + for stream in streams: + if stream["codec_type"] == "video": + if stream["disposition"]["attached_pic"] != 0: + # ignore album covers + continue + + return True + + return False + + class Worker: base_url: str token: str @@ -256,9 +270,17 @@ async def reencode( self.set_duration(task, duration) n_profiles = len(settings.REENCODE_PROFILES) + + has_video = media_has_video(document_audio) + for i, (profile, parameters) in enumerate(settings.REENCODE_PROFILES.items()): output_path = self._get_tmpfile(f"reencode_{profile.replace(':', '_')}") + video_profile = profile.startswith("video:") + + if video_profile and not has_video: + continue + await reencode( document_audio, output_path, @@ -269,11 +291,14 @@ async def reencode( **kwargs, ), duration, - include_video=(profile.startswith("video:")), + include_video=video_profile, ) tags = [f"profile:{profile}"] + [f"{k}:{v}" for k, v in parameters.items()] + if video_profile: + tags.append("video") + loop = asyncio.get_running_loop() await loop.run_in_executor( None, self.add_document_media_file, task, output_path, tags From a4f90118b74913f8f855f58a43fabd0d62f5bf88 Mon Sep 17 00:00:00 2001 From: Philipp Mandler Date: Fri, 8 Dec 2023 16:18:43 +0100 Subject: [PATCH 4/6] =?UTF-8?q?=F0=9F=A7=B9=20Enable=20video=20preview=20b?= =?UTF-8?q?ased=20on=20media=20tags?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/editor/player.tsx | 9 ++++++--- frontend/src/utils/use_audio.ts | 27 +++++++++++++++------------ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/frontend/src/editor/player.tsx b/frontend/src/editor/player.tsx index 7cd1555f..5e1a43de 100644 --- a/frontend/src/editor/player.tsx +++ b/frontend/src/editor/player.tsx @@ -34,7 +34,7 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor: const [playbackRate, setPlaybackRate] = useLocalStorage('playbackRate', 1); - const sources = useMemo(() => { + const { sources, hasVideo } = useMemo(() => { // do not play the original file, it may be large const relevantMediaFiles = data?.media_files.filter((media) => !media.tags.includes('original')) || []; @@ -49,13 +49,16 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor: }; }); - return sortMediaFiles(mappedFiles); + return { + sources: sortMediaFiles(mappedFiles), + hasVideo: videoFiles.length > 0, + }; }, [data?.media_files]); const audio = useAudio({ playbackRate, sources, - videoPreview: true, + videoPreview: hasVideo, }); // calculate the start of the current element to color it diff --git a/frontend/src/utils/use_audio.ts b/frontend/src/utils/use_audio.ts index 7fe1ca9c..67410447 100644 --- a/frontend/src/utils/use_audio.ts +++ b/frontend/src/utils/use_audio.ts @@ -18,21 +18,24 @@ export function useAudio({ sources, playbackRate, videoPreview }: UseAudioOption useEffect(() => { const myAudioElement = videoPreview ? video([]) : audio([]); - setAudioElement(myAudioElement); + if (videoPreview) { + myAudioElement.style = ` + position: fixed; + bottom: 90px; + right: 20px; + height: 170px; + width: 300px; + `; + } else { + myAudioElement.style = ` + display: none; + `; + } + const e = events(myAudioElement); e.onDurationChange(() => { - if (videoPreview && myAudioElement.videoHeight > 0) { - myAudioElement.style = ` - position: fixed; - bottom: 90px; - right: 20px; - height: 170px; - width: 300px; - `; - } - setDuration(props(myAudioElement).duration); }); e.onPlay(() => setPlayingState(true)); @@ -52,7 +55,7 @@ export function useAudio({ sources, playbackRate, videoPreview }: UseAudioOption myAudioElement.innerHTML = ''; myAudioElement.remove(); }; - }, []); + }, [videoPreview]); useEffect(() => { if (!audioElement) return; From b601f3edf47a2e6220f3c8d5a2a69efda0feb38e Mon Sep 17 00:00:00 2001 From: Philipp Mandler Date: Fri, 8 Dec 2023 16:39:21 +0100 Subject: [PATCH 5/6] =?UTF-8?q?=E2=9C=A8=20Add=20space=20for=20video=20bel?= =?UTF-8?q?ow=20document=20content?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/editor/player.tsx | 16 +++++++++++++++- frontend/src/editor/transcription_editor.tsx | 4 +++- frontend/src/pages/document.tsx | 5 +++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/frontend/src/editor/player.tsx b/frontend/src/editor/player.tsx index 5e1a43de..1d7a6097 100644 --- a/frontend/src/editor/player.tsx +++ b/frontend/src/editor/player.tsx @@ -23,7 +23,15 @@ const SKIP_SHORTCUT_SEC = 3; let lastTabPressTs = 0; -export function PlayerBar({ documentId, editor }: { documentId: string; editor: Editor }) { +export function PlayerBar({ + documentId, + editor, + onShowVideo, +}: { + documentId: string; + editor: Editor; + onShowVideo?: (show: boolean) => void; +}) { const { data } = useGetDocument( { document_id: documentId }, { @@ -61,6 +69,12 @@ export function PlayerBar({ documentId, editor }: { documentId: string; editor: videoPreview: hasVideo, }); + useEffect(() => { + if (onShowVideo) { + onShowVideo(hasVideo); + } + }, [hasVideo]); + // calculate the start of the current element to color it const [currentElementStartTime, setCurrentElementStartTime] = useState(0.0); diff --git a/frontend/src/editor/transcription_editor.tsx b/frontend/src/editor/transcription_editor.tsx index 46688bb6..c5becb43 100644 --- a/frontend/src/editor/transcription_editor.tsx +++ b/frontend/src/editor/transcription_editor.tsx @@ -224,12 +224,14 @@ export function TranscriptionEditor({ documentId, readOnly, initialValue, + onShowVideo, ...props }: { editor?: Editor; documentId: string; readOnly: boolean; initialValue?: Paragraph[]; + onShowVideo?: (show: boolean) => void; } & ComponentProps<'div'>) { const systemPrefersDark = useMediaQuery('(prefers-color-scheme: dark)'); // prevent ctrl+s @@ -312,7 +314,7 @@ export function TranscriptionEditor({ className={clsx('2xl:-ml-20')} /> - + diff --git a/frontend/src/pages/document.tsx b/frontend/src/pages/document.tsx index 9b6da10a..1ed18558 100644 --- a/frontend/src/pages/document.tsx +++ b/frontend/src/pages/document.tsx @@ -112,6 +112,7 @@ export function DocumentPage({ const [_location, navigate] = useLocation(); const debugMode = useDebugMode(); const { isLoggedIn } = useAuthData(); + const [videoVisible, setVideoVisible] = useState(false); const url = getDocumentWsUrl(documentId); @@ -188,6 +189,7 @@ export function DocumentPage({ + {/* Spacer to prevent video preview from hiding text */} + {videoVisible &&
} + {editor && debugMode && {}} ); From 1e3a74e6ed67740668597a480ee7e845475f8592 Mon Sep 17 00:00:00 2001 From: Philipp Mandler Date: Fri, 8 Dec 2023 17:41:12 +0100 Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=90=9B=20Fix=20reencode=20progress=20?= =?UTF-8?q?calculation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- worker/transcribee_worker/worker.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/worker/transcribee_worker/worker.py b/worker/transcribee_worker/worker.py index f77c6789..1ecefa67 100644 --- a/worker/transcribee_worker/worker.py +++ b/worker/transcribee_worker/worker.py @@ -88,6 +88,10 @@ def media_has_video(path: Path): return False +def is_video_profile(profile_name: str): + return profile_name.startswith("video:") + + class Worker: base_url: str token: str @@ -269,17 +273,17 @@ async def reencode( duration = get_duration(document_audio) self.set_duration(task, duration) - n_profiles = len(settings.REENCODE_PROFILES) - has_video = media_has_video(document_audio) - - for i, (profile, parameters) in enumerate(settings.REENCODE_PROFILES.items()): + applicable_profiles = { + profile_name: parameters + for profile_name, parameters in settings.REENCODE_PROFILES.items() + if has_video or not is_video_profile(profile_name) + } + n_profiles = len(applicable_profiles) + + for i, (profile, parameters) in enumerate(applicable_profiles.items()): output_path = self._get_tmpfile(f"reencode_{profile.replace(':', '_')}") - - video_profile = profile.startswith("video:") - - if video_profile and not has_video: - continue + video_profile = is_video_profile(profile) await reencode( document_audio,