From b6175bc7351f3d6935364156efc2acd3af153b01 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Tue, 29 Oct 2024 13:20:56 +0100 Subject: [PATCH] feat: add SpeechToText component for speech recognition --- apps/web/app/components/LSTT.tsx | 244 ++++++++++++++++++ .../app/modules/Dashboard/Dashboard.page.tsx | 2 + 2 files changed, 246 insertions(+) create mode 100644 apps/web/app/components/LSTT.tsx diff --git a/apps/web/app/components/LSTT.tsx b/apps/web/app/components/LSTT.tsx new file mode 100644 index 00000000..d64dfdb3 --- /dev/null +++ b/apps/web/app/components/LSTT.tsx @@ -0,0 +1,244 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import React, { useState, useEffect, useCallback, useRef } from "react"; + +interface SpeechRecognitionEvent extends Event { + resultIndex: number; + results: { + isFinal: boolean; + length: number; + item(index: number): SpeechRecognitionResult; + [index: number]: SpeechRecognitionResult; + }; +} + +interface SpeechRecognitionResult { + isFinal: boolean; + length: number; + item(index: number): SpeechRecognitionAlternative; + [index: number]: SpeechRecognitionAlternative; +} + +interface SpeechRecognitionAlternative { + transcript: string; + confidence: number; +} + +interface SpeechRecognitionErrorEvent extends Event { + error: + | "aborted" + | "audio-capture" + | "bad-grammar" + | "language-not-supported" + | "network" + | "no-speech" + | "not-allowed" + | "service-not-allowed"; + message: string; +} + +interface SpeechRecognition extends EventTarget { + continuous: boolean; + grammars: SpeechGrammarList; + interimResults: boolean; + lang: string; + maxAlternatives: number; + onaudioend: ((this: SpeechRecognition, ev: Event) => any) | null; + onaudiostart: ((this: SpeechRecognition, ev: Event) => any) | null; + onend: ((this: SpeechRecognition, ev: Event) => any) | null; + onerror: + | ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) + | null; + onnomatch: + | ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) + | null; + onresult: + | ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) + | null; + onsoundend: ((this: SpeechRecognition, ev: Event) => any) | null; + onsoundstart: ((this: SpeechRecognition, ev: Event) => any) | null; + onspeechend: ((this: SpeechRecognition, ev: Event) => any) | null; + onspeechstart: ((this: SpeechRecognition, ev: Event) => any) | null; + onstart: ((this: SpeechRecognition, ev: Event) => any) | null; + start: () => void; + stop: () => void; + abort: () => void; +} + +interface SpeechGrammarList { + length: number; + item(index: number): SpeechGrammar; + addFromString(string: string, weight?: number): void; + addFromURI(src: string, weight?: number): void; + [index: number]: SpeechGrammar; +} + +interface SpeechGrammar { + src: string; + weight: number; +} + +declare global { + interface Window { + SpeechRecognition: new () => SpeechRecognition; + webkitSpeechRecognition: new () => SpeechRecognition; + } +} + +export const SpeechToText: React.FC = () => { + const [isListening, setIsListening] = useState(false); + const [isStopping, setIsStopping] = useState(false); + const [text, setText] = useState(""); + const [error, setError] = useState(""); + const [recognition, setRecognition] = useState( + null + ); + const stopTimeout = useRef(); + + const initializeSpeechRecognition = useCallback(() => { + const SpeechRecognition = + window.SpeechRecognition || window.webkitSpeechRecognition; + if (!SpeechRecognition) { + setError("Speech recognition is not supported in this browser."); + return null; + } + + const recognition = new SpeechRecognition(); + recognition.continuous = true; + recognition.interimResults = true; + recognition.lang = "pl-PL"; + + recognition.onstart = () => { + console.log("Speech recognition started"); + setIsListening(true); + setIsStopping(false); + setError(""); + }; + + recognition.onerror = (event: SpeechRecognitionErrorEvent) => { + console.log("Speech recognition error", event); + setError(`Error: ${event.error}`); + setIsListening(false); + setIsStopping(false); + }; + + recognition.onend = () => { + console.log("Speech recognition ended"); + setIsListening(false); + setIsStopping(false); + + if (stopTimeout.current) { + clearTimeout(stopTimeout.current); + stopTimeout.current = undefined; + } + }; + + recognition.onresult = (event: SpeechRecognitionEvent) => { + let finalTranscript = ""; + for (let i = 0; i < event.results.length; i++) { + const result = event.results[i]; + if (result.isFinal) { + finalTranscript += result[0].transcript + " "; + } + } + + if (finalTranscript) { + setText((prev) => prev + finalTranscript); + } + }; + + return recognition; + }, []); + + useEffect(() => { + const recognition = initializeSpeechRecognition(); + if (recognition) { + setRecognition(recognition); + } + + return () => { + if (stopTimeout.current) { + clearTimeout(stopTimeout.current); + } + if (recognition) { + recognition.stop(); + } + }; + }, [initializeSpeechRecognition]); + + const toggleListening = useCallback(() => { + if (!recognition) { + setError("Speech recognition not initialized"); + return; + } + + if (isListening) { + setIsStopping(true); + + recognition.stop(); + + stopTimeout.current = setTimeout(() => { + setIsListening(false); + setIsStopping(false); + }, 15000); + } else { + setText(""); + try { + recognition.start(); + } catch (err) { + console.error("Failed to start speech recognition:", err); + setError("Failed to start speech recognition"); + setIsStopping(false); + } + } + }, [recognition, isListening]); + + return ( +
+
+
+

Speech to Text

+ +
+ + {error && ( +
{error}
+ )} + +
+ {text || "Transcription will appear here..."} + {isStopping && ( +
+ Stopping recognition... +
+ )} +
+ + {(isListening || isStopping) && ( +
+ {isStopping + ? "Finishing up... This might take a few seconds." + : "Listening..."} +
+ )} +
+
+ ); +}; diff --git a/apps/web/app/modules/Dashboard/Dashboard.page.tsx b/apps/web/app/modules/Dashboard/Dashboard.page.tsx index eec9eb20..4893f49f 100644 --- a/apps/web/app/modules/Dashboard/Dashboard.page.tsx +++ b/apps/web/app/modules/Dashboard/Dashboard.page.tsx @@ -12,6 +12,7 @@ import { useStudentCourses } from "~/api/queries/useStudentCourses"; import { queryClient } from "~/api/queryClient"; import { ButtonGroup } from "~/components/ButtonGroup/ButtonGroup"; import { Icon } from "~/components/Icon"; +import { SpeechToText } from "~/components/LSTT"; import { cn } from "~/lib/utils"; import { SORT_OPTIONS, type SortOption } from "~/types/sorting"; import { useLayoutsStore } from "../common/Layout/LayoutsStore"; @@ -127,6 +128,7 @@ export default function DashboardPage() { return (
+