Skip to content

Commit

Permalink
feat: update whisper.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
zcf0508 committed Dec 10, 2023
1 parent b3c6967 commit 0f27833
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 43 deletions.
10 changes: 3 additions & 7 deletions script/post.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ function info(msg) {

if(!fs.existsSync(path.resolve(__dirname, "../lib/whisper.cpp/Makefile"))) {
info("Clone whisper.cpp")
shelljs.exec("git clone https://github.com/ggerganov/whisper.cpp.git lib/whisper.cpp -b v1.4.2 --depth=1")
shelljs.exec("git clone https://github.com/ggerganov/whisper.cpp.git lib/whisper.cpp -b v1.5.1 --depth=1")

info("Patch whisper addon.cpp")
fs.cpSync(
Expand All @@ -47,11 +47,8 @@ if(
}

if(
(
fs.existsSync(path.resolve(__dirname, "../lib/whisper.cpp/build/Release/whisper-addon.node"))
|| fs.existsSync(path.resolve(__dirname, "../lib/whisper.cpp/build/bin/Release/whisper-addon.node"))
)
&& !fs.existsSync(path.resolve(__dirname, "../public/resources/whisper/whisper-addon.node"))
fs.existsSync(path.resolve(__dirname, "../lib/whisper.cpp/build/Release/whisper-addon.node"))
|| fs.existsSync(path.resolve(__dirname, "../lib/whisper.cpp/build/bin/Release/whisper-addon.node"))
) {
info("Copy whisper.cpp addon")
if(process.platform === "win32") {
Expand Down Expand Up @@ -84,7 +81,6 @@ if(!fs.existsSync(path.resolve(__dirname, "../lib/vad/build/Release/vad_addon.no

if(
fs.existsSync(path.resolve(__dirname, "../lib/vad/build/Release/vad_addon.node"))
&& !fs.existsSync(path.resolve(__dirname, "../public/resources/vad/vad_addon.node"))
) {
info("Copy VAD-addon")
cpDirSync(
Expand Down
13 changes: 10 additions & 3 deletions script/whisper-addon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ struct whisper_params {
bool print_colors = false;
bool print_progress = false;
bool no_timestamps = false;
bool use_gpu = true;

std::string language = "en";
std::string prompt;
Expand Down Expand Up @@ -153,7 +154,9 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {

// whisper init

struct whisper_context * ctx = whisper_init_from_file(params.model.c_str());
struct whisper_context_params cparams;
cparams.use_gpu = params.use_gpu;
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);

if (ctx == nullptr) {
fprintf(stderr, "error: failed to initialize whisper context\n");
Expand Down Expand Up @@ -315,14 +318,18 @@ Napi::Value whisper(const Napi::CallbackInfo& info) {
std::string language = whisper_params.Get("language").As<Napi::String>();
std::string model = whisper_params.Get("model").As<Napi::String>();
std::string input = whisper_params.Get("fname_inp").As<Napi::String>();
int32_t max_len = whisper_params.Get("max_len").As<Napi::Number>();
std::string prompt = whisper_params.Get("prompt").As<Napi::String>();
int32_t n_threads = whisper_params.Get("n_threads").As<Napi::Number>();
bool translate = whisper_params.Get("translate").As<Napi::Boolean>();
bool use_gpu = whisper_params.Get("use_gpu").As<Napi::Boolean>();

params.language = language;
params.model = model;
params.fname_inp.emplace_back(input);
params.max_len = max_len;
params.prompt = prompt;
params.n_threads = n_threads;
params.translate = translate;
params.use_gpu = use_gpu;

Napi::Function callback = info[1].As<Napi::Function>();
Worker* worker = new Worker(callback, params);
Expand Down
55 changes: 43 additions & 12 deletions src/main/autocut/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { spawn } from "child_process"
import readline from "readline"
import fs from "fs"
import os from "os";
import { timestampToSecond } from "~~/utils"
import { safePath } from "~~/utils/path"
import { AutocutConfig } from "~~/../types"
Expand Down Expand Up @@ -216,31 +217,60 @@ export async function generateSubtitle1(
) {
const srtFile = file.slice(0, file.lastIndexOf(".")) + ".srt"

if (fs.existsSync(srtFile)) {
cb?.("success", "srt file already exist")
return
}

const srt: NodeList = []
const times = await detectVoiceActivity(file)

let res: Array<WhisperResItem[]> = []

let sliceRes: Array<{start: string, end: string, file: string}> = []

if(config.vad) {
const times = await detectVoiceActivity(file)

const { sliceRes: _sliceRes, removeTemps } = await slice(file, times)
sliceRes = _sliceRes

const done: number[] = []

cb?.("processing", "transcribing", 0)
res = await Promise.all(sliceRes.map((item, _idx) => {
return transcribe(config.modelPath, item.file, {language: config.language}, _idx, (idx: number) => {
done.push(idx)
cb?.("processing", "transcribing", Math.floor(done.length / sliceRes.length * 100))
})
}))

// 打印进度条
let lastProgress = 0
for (let i = 0; i < times.length; i++) {
const item = sliceRes[i]

const { res: t, cost } = await transcribe(
config.modelPath,
item.file,
{
language: config.language,
n_threads: os.cpus().length - 1,
},
)

console.log(`speed: ${(cost/(Number(item.end) - Number(item.start))).toFixed(2)}`)

res.push(t)

const progress = Math.floor(i / times.length * 100)
if(progress > lastProgress) {
lastProgress = progress
cb?.("processing", "transcribing", progress > 100 ? 100 : progress)
}
}

removeTemps()
} else {
cb?.("processing", "transcribing", 0)
res.push(await transcribe(config.modelPath, file, {language: config.language}))
res.push((await transcribe(
config.modelPath,
file,
{
language: config.language,
n_threads: os.cpus().length - 1,
},
)).res)
cb?.("processing", "transcribing", 100)
}

Expand All @@ -263,4 +293,5 @@ export async function generateSubtitle1(
})
})
fs.writeFileSync(srtFile, stringifySync(srt, { format: "SRT" }))
}
cb?.("success", "saved", 100)
}
28 changes: 24 additions & 4 deletions src/main/ffmpeg/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import { spawn } from "child_process"
import readline from "readline"
import { secondToTimestamp } from "~~/utils"
import { safePath } from "~~/utils/path"
import { type Vad } from "~~/vad"
import { type Vad } from "~~/vad/index"
import { i } from "vitest/dist/index-5aad25c1";

type ProcessStatus = "error" | "processing" | "success"

Expand Down Expand Up @@ -161,9 +162,28 @@ function _ffmpegSlice(file: string, start: string, end: string) {
export async function slice(file: string, times: ReturnType<Vad>) {
// ffmpeg -i input_audio.mp3 -ss 00:00:02 -t 00:00:03 -c:a pcm_s16le output_audio.wav

const sliceRes = await Promise.all(times.map(time => {
return _ffmpegSlice(file, time.start, time.end)
}))
const sliceRes = [] as Array<{
start: string;
end: string;
file: string;
}>

const cpuNum = os.cpus().length
const threads = Math.floor(cpuNum / 2) || 1

// 打印进度条
let lastProgress = 0
for (let i = 0; i < Math.ceil(times.length / threads); i++) {
const _res = await Promise.all(times.slice(i * threads, i * threads + threads).map(time => {
return _ffmpegSlice(file, time.start, time.end)
}))
sliceRes.push(..._res)
const progress = Math.floor((i * threads + threads) / times.length * 100)
if(progress > lastProgress) {
lastProgress = progress
console.log(`slice progress: ${progress > 100 ? 100 : progress}%`)
}
}

return {
sliceRes,
Expand Down
38 changes: 31 additions & 7 deletions src/main/vad/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,21 @@ function getDuration(file: string) {
}

export async function detectVoiceActivity(filePath: string) {
const res = vad(model, filePath)

const mediaDuration = await getDuration(filePath)

const res = vad(model, filePath)

res[0].start = "0.000000"
res[res.length - 1].end = `${mediaDuration}`
if(res[0].start !== "0.000000") {
res.unshift({
start: "0.000000",
end: res[0].start,
})
}
if(res[res.length - 1].end !== `${mediaDuration}`) {
res.push({
start: res[res.length - 1].end,
end: `${mediaDuration}`,
})
}

for(let i = 0; i < res.length - 1; i++) {
const current = res[i]
Expand All @@ -52,6 +61,21 @@ export async function detectVoiceActivity(filePath: string) {
next.start = `${middle}`
}
}
console.log(res)
return res

let merged = [];
let current = res[0];

for(let i = 1; i < res.length; i++) {
if(Number(res[i].end) - Number(current.start) < 15) {
current.end = res[i].end;
} else {
merged.push(current);
current = res[i];
}
}

// Push the last segment
merged.push(current);

return merged
}
32 changes: 22 additions & 10 deletions src/main/whisper/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import fs from "fs";
import os from "os";
import path from "path";
import { promisify } from "node:util";

Expand Down Expand Up @@ -27,26 +28,32 @@ type WhisperAsync = (options: {
language: string,
model: string,
fname_inp: string
/** default: 0 */
max_len?: number
/** default: Math.min(4, os.cpus().length) */
n_threads: number
prompt: string
/** default: false */
translate?: boolean
translate: boolean
/** not work default: true */
use_gpu?: boolean
}) => Promise<Array<WhisperResItem>>

const whisperAsync: WhisperAsync = promisify(whisper);

export async function transcribe(
modelPath: string,
filePath: string,
_options: Omit<Parameters<WhisperAsync>[0], "model" | "fname_inp"> = {language: "en"},
idx?: number,
cb?: (idx: number) => any,
_options: Partial<Omit<Parameters<WhisperAsync>[0], "model" | "fname_inp" | "use_gpu">> = {},
) {
const defaultOptions = {
const defaultOptions: Omit<Parameters<WhisperAsync>[0], "model" | "fname_inp"> = {
language: "en",
max_len: 0,
n_threads: Math.min(4, os.cpus().length),
translate: false,
prompt: "",
use_gpu: true,
}

let time = Date.now()

const res = await whisperAsync({
model: modelPath,
fname_inp: filePath,
Expand All @@ -55,6 +62,11 @@ export async function transcribe(
..._options,
},
})
cb?.(idx)
return res

const cost = Date.now() - time

return {
res,
cost: cost / 1000,
}
}

0 comments on commit 0f27833

Please sign in to comment.