Skip to content

Commit

Permalink
[O] Refactor backend logic
Browse files Browse the repository at this point in the history
  • Loading branch information
hykilpikonna committed Feb 11, 2024
1 parent 1575e01 commit 270726e
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 11 deletions.
66 changes: 55 additions & 11 deletions back/main.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import os
import threading
import time
import traceback
import uuid
from pathlib import Path
from subprocess import check_call
from tempfile import TemporaryDirectory
from tempfile import NamedTemporaryFile
from typing import NamedTuple

import GPUtil
import magic
import uvicorn
from fastapi import FastAPI, UploadFile, File
from hypy_utils import write, write_json
from starlette.middleware.cors import CORSMiddleware
from starlette.staticfiles import StaticFiles

from wp import transcribe, diarized_transcribe
from wp import diarized_transcribe

app = FastAPI()

Expand All @@ -26,31 +29,44 @@
allow_headers=["*"],
)

DATA_DIR = Path("/data/private/whisper")
TMP_DIR = Path("/tmp/whisper")
TMP_DIR.mkdir(exist_ok=True)
DATA_DIR = Path("/ws/tmp-whisper")
DATA_DIR.mkdir(exist_ok=True)
MAGIC = magic.Magic(flags=magic.MAGIC_MIME_TYPE)

process_queue = []
processing = ""
start_time = 0
lock = threading.Lock()
errors = {}

app.mount("/result", StaticFiles(directory=DATA_DIR / "transcription"), name="result")


class PendingProcess(NamedTuple):
audio_id: str
file: Path


@app.post("/upload")
async def upload(file: UploadFile = File(...)):
try:
contents = await file.read()

# Get time in YYYY-MM-DD HH:MM:SS format
time_str = time.strftime("%Y-%m-%d %H:%M:%S")
print(f"[{time_str}] Received {file.filename}")

# Generate uuid for the audio file
audio_id = str(uuid.uuid4())

# Write to file
write(DATA_DIR / "audio" / f"{audio_id}.mp3", contents)
ext = file.filename.split('.')[-1]
fp = DATA_DIR / "audio" / f"{audio_id}.{ext}"
write(fp, contents)

# Add to processing queue
with lock:
process_queue.append(audio_id)
process_queue.append(PendingProcess(audio_id, fp))

return {"audio_id": audio_id}

Expand All @@ -71,8 +87,15 @@ async def progress(uuid: str):
elapsed = time.time() - start_time

return {"done": False, "status": f"Processing ({lavg / num_cpus * 100:.0f}% CPU, {nvidia * 100:.0f}% GPU, {elapsed:.0f}s elapsed)"}
elif uuid in errors:
return {"done": False, "status": "Error", "error": errors[uuid]}
else:
return {"done": False, "status": f"Queued ({process_queue.index(uuid)} in queue before this one)"}
index = 0
for i, (audio_id, _) in enumerate(process_queue):
if audio_id == uuid:
index = i
break
return {"done": False, "status": f"Queued ({index} in queue before this one)"}


def process():
Expand All @@ -81,23 +104,44 @@ def process():
time.sleep(0.1)
with lock:
if len(process_queue) > 0:
audio_id = process_queue.pop(0)
audio_id, fp = process_queue.pop(0)
processing = audio_id
start_time = time.time()
else:
continue

try:
# Check file magic
mime = MAGIC.id_buffer(open(fp, "rb").read(2048))
is_tmp = False

if mime not in {'audio/mpeg', 'audio/x-wav', 'audio/wav'}:
print(f"Converting {fp} to mp3")
mp3_file = TMP_DIR / f"{audio_id}.mp3"

# Check if file is audio by converting to mp3 (fp) using ffmpeg
check_call(["ffmpeg", "-i", fp, "-acodec", "mp3", "-y", mp3_file])

# Set new fp
fp = mp3_file
is_tmp = True

# Start transcription
output, elapsed = diarized_transcribe(DATA_DIR / "audio" / f"{audio_id}.mp3", num_speakers=2)
output, elapsed = diarized_transcribe(fp, num_speakers=2)

# Write to file
write_json(DATA_DIR / "transcription" / f"{audio_id}.json", {
"output": output,
"elapsed": elapsed
})

# Remove tmp file
if is_tmp:
fp.unlink()

except Exception as e:
pass
errors[audio_id] = str(e) + "\n\n" + traceback.format_exc()
print(f"Error processing {audio_id}: {e}")

# Clear processing
with lock:
Expand Down
1 change: 1 addition & 0 deletions back/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ transformers
torch
uvicorn
intervaltree
filemagic

0 comments on commit 270726e

Please sign in to comment.