diff --git a/src/main/autocut/check.ts b/src/main/autocut/check.ts index 678d52f..5508b39 100644 --- a/src/main/autocut/check.ts +++ b/src/main/autocut/check.ts @@ -1,5 +1,5 @@ import { exec } from "child_process" -import { safePath } from "~~/utils" +import { safePath } from "~~/utils/path" export function ffmpegCheck() { return new Promise((resolve, reject) => { diff --git a/src/main/autocut/index.ts b/src/main/autocut/index.ts index ad2b473..172aa2f 100644 --- a/src/main/autocut/index.ts +++ b/src/main/autocut/index.ts @@ -1,8 +1,14 @@ import { spawn } from "child_process" import readline from "readline" import fs from "fs" -import { safePath } from "~~/utils" +import { timestampToSecond } from "~~/utils" +import { safePath } from "~~/utils/path" import { AutocutConfig } from "~~/../types" +import { slice } from "~~/ffmpeg" +import { detectVoiceActivity } from "~~/vad" +import { transcribe, WhisperResItem } from "~~/whisper" +import { type NodeCue, type NodeList, stringifySync } from "subtitle" + type GenerateStatus = "processing" | "error" | "success" /** @@ -201,3 +207,60 @@ export function cutVideo( }); } + + +export async function generateSubtitle1( + file: string, + config: {language: string, modelPath: string, vad?: boolean}, + cb?: (status: GenerateStatus, msg: string, process?: number) => any, +) { + const srtFile = file.slice(0, file.lastIndexOf(".")) + ".srt" + + const srt: NodeList = [] + const times = detectVoiceActivity(file) + + let res: Array = [] + + let sliceRes: Array<{start: string, end: string, file: string}> = [] + + if(config.vad) { + const { sliceRes: _sliceRes, removeTemps } = await slice(file, times) + sliceRes = _sliceRes + + const done: number[] = [] + + cb?.("processing", "transcribing", 0) + res = await Promise.all(sliceRes.map((item, _idx) => { + return transcribe(config.modelPath, item.file, {language: config.language}, _idx, (idx: number) => { + done.push(idx) + cb?.("processing", "transcribing", Math.floor(done.length / sliceRes.length * 100)) + }) + })) + + removeTemps() + } else { + cb?.("processing", "transcribing", 0) + res.push(await transcribe(config.modelPath, file, {language: config.language})) + cb?.("processing", "transcribing", 100) + } + + // 生成 srt 并保存 + res.forEach((p, pIdx) => { + p.forEach(l => { + let offset = 0 + if(sliceRes[pIdx]) { + offset = Number(Number(sliceRes[pIdx].start).toFixed(3)) + } + const cue: NodeCue = { + type: "cue", + data: { + start: (timestampToSecond(l[0]) + offset) * 1000, + end: (timestampToSecond(l[1]) + offset) * 1000, + text: l[2], + }, + } + srt.push(cue) + }) + }) + fs.writeFileSync(srtFile, stringifySync(srt, { format: "SRT" })) +} \ No newline at end of file diff --git a/src/main/ffmpeg/index.ts b/src/main/ffmpeg/index.ts index 161c4e9..1ff7a0e 100644 --- a/src/main/ffmpeg/index.ts +++ b/src/main/ffmpeg/index.ts @@ -4,7 +4,8 @@ import path from "path"; import { v4 as uuidv4 } from "uuid"; import { spawn } from "child_process" import readline from "readline" -import { safePath } from "~~/utils" +import { secondToTimestamp } from "~~/utils" +import { safePath } from "~~/utils/path" import { type Vad } from "~~/vad" type ProcessStatus = "error" | "processing" | "success" @@ -122,43 +123,27 @@ export function convertVideo( }); } -/** like 2.304000 -> 00:00:02.304 */ -function _transformTimeformat(time: string | number): string { - const [second, millisecond] = `${time}`.split(".") - const date = new Date(0) - date.setSeconds(+second) - return date.toISOString().substr(11, 8).replace("T", "").replace("Z", "") + "." + millisecond.slice(0, 3) -} - function _ffmpegSlice(file: string, start: string, end: string) { const tempDir = path.join(os.tmpdir(), "./autocut-client") if(!fs.existsSync(tempDir)){ fs.mkdirSync(tempDir) } return new Promise[0] & {file: string}>((resolve, reject) => { + const id = uuidv4() const exportPath = `${tempDir}/${id}.wav` - console.log("ffmpeg", - [ - "-i", safePath(file), "-y", - "-ss", _transformTimeformat(start), - "-t", _transformTimeformat(Number(end) - Number(start)), - "-c:a", "pcm_s16le", - exportPath, - ]) const p = spawn( "ffmpeg", [ "-i", safePath(file), "-y", - "-ss", _transformTimeformat(start), - "-t", _transformTimeformat(Number(end) - Number(start)), + "-ss", secondToTimestamp(start), + "-t", secondToTimestamp(Number(end) - Number(start)), "-c:a", "pcm_s16le", exportPath, ], ) p.on("close", (code) => { - console.log(`child process exited with code ${code}`); if(code === 0) { resolve({ start, @@ -166,6 +151,7 @@ function _ffmpegSlice(file: string, start: string, end: string) { file: exportPath, }) } else { + console.log(`child process exited with code ${code}`); reject() } }) @@ -176,8 +162,6 @@ function _ffmpegSlice(file: string, start: string, end: string) { export async function slice(file: string, times: ReturnType) { // ffmpeg -i input_audio.mp3 -ss 00:00:02 -t 00:00:03 -c:a pcm_s16le output_audio.wav - console.log(os.tmpdir()) - const sliceRes = await Promise.all(times.map(time => { return _ffmpegSlice(file, time.start, time.end) })) diff --git a/src/main/utils/index.ts b/src/main/utils/index.ts index 86fb2b7..dd75184 100644 --- a/src/main/utils/index.ts +++ b/src/main/utils/index.ts @@ -1 +1,19 @@ -export * from "./path" \ No newline at end of file +/** like 2.304000 -> 00:00:02.304 */ +export function secondToTimestamp(time: string | number): string { + const [second, millisecond] = `${time}`.split(".") + const date = new Date(0) + date.setSeconds(+second) + return date.toISOString().substr(11, 8).replace("T", "").replace("Z", "") + "." + millisecond.slice(0, 3) +} + +/** 00:00:02,304 -> 2.304000 */ +export function timestampToSecond(timestamp: string): number { + const [hours, minutes, _seconds] = timestamp.split(":"); + const seconds = _seconds.split(",")[0]; + const milliseconds = _seconds.split(",")[1]; + + const totalSeconds = +hours * 3600 + +minutes * 60 + +seconds; + const fractionalSeconds = +milliseconds / 1000; + + return totalSeconds + fractionalSeconds; +} \ No newline at end of file diff --git a/src/main/whisper/index.ts b/src/main/whisper/index.ts index 895edfd..e247001 100644 --- a/src/main/whisper/index.ts +++ b/src/main/whisper/index.ts @@ -14,6 +14,15 @@ if(!fs.existsSync(addonPath)) { const { whisper } = require(addonPath); +export type WhisperResItem = [ + /** start time , format 00:00:00,000 */ + string, + /** end time , format 00:00:00,000 */ + string, + /** subtitle */ + string +] + type WhisperAsync = (options: { language: string, model: string, @@ -22,28 +31,23 @@ type WhisperAsync = (options: { max_len?: number /** default: false */ translate?: boolean -}) => Promise> +}) => Promise> const whisperAsync: WhisperAsync = promisify(whisper); -export function transcribe( +export async function transcribe( modelPath: string, filePath: string, _options: Omit[0], "model" | "fname_inp"> = {language: "en"}, + idx?: number, + cb?: (idx: number) => any, ) { const defaultOptions = { language: "en", max_len: 0, translate: false, } - return whisperAsync({ + const res = await whisperAsync({ model: modelPath, fname_inp: filePath, ...{ @@ -51,4 +55,6 @@ export function transcribe( ..._options, }, }) + cb?.(idx) + return res } \ No newline at end of file