Skip to content

Commit

Permalink
feat: transcribe with addon
Browse files Browse the repository at this point in the history
  • Loading branch information
zcf0508 committed Sep 22, 2023
1 parent 736cdf6 commit 5cc78b0
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 35 deletions.
2 changes: 1 addition & 1 deletion src/main/autocut/check.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { exec } from "child_process"
import { safePath } from "~~/utils"
import { safePath } from "~~/utils/path"

export function ffmpegCheck() {
return new Promise<boolean>((resolve, reject) => {
Expand Down
65 changes: 64 additions & 1 deletion src/main/autocut/index.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
import { spawn } from "child_process"
import readline from "readline"
import fs from "fs"
import { safePath } from "~~/utils"
import { timestampToSecond } from "~~/utils"
import { safePath } from "~~/utils/path"
import { AutocutConfig } from "~~/../types"
import { slice } from "~~/ffmpeg"
import { detectVoiceActivity } from "~~/vad"
import { transcribe, WhisperResItem } from "~~/whisper"
import { type NodeCue, type NodeList, stringifySync } from "subtitle"

type GenerateStatus = "processing" | "error" | "success"

/**
Expand Down Expand Up @@ -201,3 +207,60 @@ export function cutVideo(
});

}


export async function generateSubtitle1(
file: string,
config: {language: string, modelPath: string, vad?: boolean},
cb?: (status: GenerateStatus, msg: string, process?: number) => any,
) {
const srtFile = file.slice(0, file.lastIndexOf(".")) + ".srt"

const srt: NodeList = []
const times = detectVoiceActivity(file)

let res: Array<WhisperResItem[]> = []

let sliceRes: Array<{start: string, end: string, file: string}> = []

if(config.vad) {
const { sliceRes: _sliceRes, removeTemps } = await slice(file, times)
sliceRes = _sliceRes

const done: number[] = []

cb?.("processing", "transcribing", 0)
res = await Promise.all(sliceRes.map((item, _idx) => {
return transcribe(config.modelPath, item.file, {language: config.language}, _idx, (idx: number) => {
done.push(idx)
cb?.("processing", "transcribing", Math.floor(done.length / sliceRes.length * 100))
})
}))

removeTemps()
} else {
cb?.("processing", "transcribing", 0)
res.push(await transcribe(config.modelPath, file, {language: config.language}))
cb?.("processing", "transcribing", 100)
}

// 生成 srt 并保存
res.forEach((p, pIdx) => {
p.forEach(l => {
let offset = 0
if(sliceRes[pIdx]) {
offset = Number(Number(sliceRes[pIdx].start).toFixed(3))
}
const cue: NodeCue = {
type: "cue",
data: {
start: (timestampToSecond(l[0]) + offset) * 1000,
end: (timestampToSecond(l[1]) + offset) * 1000,
text: l[2],
},
}
srt.push(cue)
})
})
fs.writeFileSync(srtFile, stringifySync(srt, { format: "SRT" }))
}
28 changes: 6 additions & 22 deletions src/main/ffmpeg/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import path from "path";
import { v4 as uuidv4 } from "uuid";
import { spawn } from "child_process"
import readline from "readline"
import { safePath } from "~~/utils"
import { secondToTimestamp } from "~~/utils"
import { safePath } from "~~/utils/path"
import { type Vad } from "~~/vad"

type ProcessStatus = "error" | "processing" | "success"
Expand Down Expand Up @@ -122,50 +123,35 @@ export function convertVideo(
});
}

/** like 2.304000 -> 00:00:02.304 */
function _transformTimeformat(time: string | number): string {
const [second, millisecond] = `${time}`.split(".")
const date = new Date(0)
date.setSeconds(+second)
return date.toISOString().substr(11, 8).replace("T", "").replace("Z", "") + "." + millisecond.slice(0, 3)
}

function _ffmpegSlice(file: string, start: string, end: string) {
const tempDir = path.join(os.tmpdir(), "./autocut-client")
if(!fs.existsSync(tempDir)){
fs.mkdirSync(tempDir)
}
return new Promise<ReturnType<Vad>[0] & {file: string}>((resolve, reject) => {

const id = uuidv4()
const exportPath = `${tempDir}/${id}.wav`
console.log("ffmpeg",
[
"-i", safePath(file), "-y",
"-ss", _transformTimeformat(start),
"-t", _transformTimeformat(Number(end) - Number(start)),
"-c:a", "pcm_s16le",
exportPath,
])
const p = spawn(
"ffmpeg",
[
"-i", safePath(file), "-y",
"-ss", _transformTimeformat(start),
"-t", _transformTimeformat(Number(end) - Number(start)),
"-ss", secondToTimestamp(start),
"-t", secondToTimestamp(Number(end) - Number(start)),
"-c:a", "pcm_s16le",
exportPath,
],
)

p.on("close", (code) => {
console.log(`child process exited with code ${code}`);
if(code === 0) {
resolve({
start,
end,
file: exportPath,
})
} else {
console.log(`child process exited with code ${code}`);
reject()
}
})
Expand All @@ -176,8 +162,6 @@ function _ffmpegSlice(file: string, start: string, end: string) {
export async function slice(file: string, times: ReturnType<Vad>) {
// ffmpeg -i input_audio.mp3 -ss 00:00:02 -t 00:00:03 -c:a pcm_s16le output_audio.wav

console.log(os.tmpdir())

const sliceRes = await Promise.all(times.map(time => {
return _ffmpegSlice(file, time.start, time.end)
}))
Expand Down
20 changes: 19 additions & 1 deletion src/main/utils/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1,19 @@
export * from "./path"
/** like 2.304000 -> 00:00:02.304 */
export function secondToTimestamp(time: string | number): string {
const [second, millisecond] = `${time}`.split(".")
const date = new Date(0)
date.setSeconds(+second)
return date.toISOString().substr(11, 8).replace("T", "").replace("Z", "") + "." + millisecond.slice(0, 3)
}

/** 00:00:02,304 -> 2.304000 */
export function timestampToSecond(timestamp: string): number {
const [hours, minutes, _seconds] = timestamp.split(":");
const seconds = _seconds.split(",")[0];
const milliseconds = _seconds.split(",")[1];

const totalSeconds = +hours * 3600 + +minutes * 60 + +seconds;
const fractionalSeconds = +milliseconds / 1000;

return totalSeconds + fractionalSeconds;
}
26 changes: 16 additions & 10 deletions src/main/whisper/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ if(!fs.existsSync(addonPath)) {

const { whisper } = require(addonPath);

export type WhisperResItem = [
/** start time , format 00:00:00,000 */
string,
/** end time , format 00:00:00,000 */
string,
/** subtitle */
string
]

type WhisperAsync = (options: {
language: string,
model: string,
Expand All @@ -22,33 +31,30 @@ type WhisperAsync = (options: {
max_len?: number
/** default: false */
translate?: boolean
}) => Promise<Array<[
/** start time , format 00:00:00,000 */
string,
/** end time , format 00:00:00,000 */
string,
/** subtitle */
string
]>>
}) => Promise<Array<WhisperResItem>>

const whisperAsync: WhisperAsync = promisify(whisper);

export function transcribe(
export async function transcribe(
modelPath: string,
filePath: string,
_options: Omit<Parameters<WhisperAsync>[0], "model" | "fname_inp"> = {language: "en"},
idx?: number,
cb?: (idx: number) => any,
) {
const defaultOptions = {
language: "en",
max_len: 0,
translate: false,
}
return whisperAsync({
const res = await whisperAsync({
model: modelPath,
fname_inp: filePath,
...{
...defaultOptions,
..._options,
},
})
cb?.(idx)
return res
}

0 comments on commit 5cc78b0

Please sign in to comment.