diff --git a/worker/transcribee_worker/identify_speakers.py b/worker/transcribee_worker/identify_speakers.py index 610573b0..f1bf2a13 100644 --- a/worker/transcribee_worker/identify_speakers.py +++ b/worker/transcribee_worker/identify_speakers.py @@ -35,7 +35,14 @@ def time_to_sample(time: float | None): segments = [ ( - time_to_sample(child.children[0].start), + min( + time_to_sample(child.children[0].start), + # we always use at least 0.1s, + # otherwise the fingerprinting model explodes sometimes + # since the start of the segment might be less than 0.1s + # from end of the audio, we use this as a safety + len(audio) - time_to_sample(0.1), + ), max( time_to_sample(child.children[-1].end), # we always use at least 0.1s,