Skip to content

Commit

Permalink
switch to faster-whisper and upgrade deltabot-cli
Browse files Browse the repository at this point in the history
  • Loading branch information
adbenitez committed Mar 22, 2024
1 parent 6097b6c commit 792f617
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 69 deletions.
10 changes: 2 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,8 @@ A voice-to-text converter bot for Delta Chat.
pip install voice2text-deltabot
```

The bot uses [Whisper](https://github.com/openai/whisper) to extract the text from voice messages,
Whisper requires the command-line tool `ffmpeg` to be installed on your system, which is available
from most package managers:

```sh
# on Ubuntu or Debian
sudo apt update && sudo apt install ffmpeg
```
The bot uses [Faster Whisper](https://github.com/guillaumekln/faster-whisper/) to extract the text
from voice messages.

## Usage

Expand Down
18 changes: 10 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
[build-system]
requires = ["setuptools"]
requires = ["setuptools>=64", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta"

[project]
version = "0.1.3"
name = "voice2text-deltabot"
description = "Delta Chat bot to extract text from voice messages"
dynamic = ["version"]
readme = "README.md"
requires-python = ">=3.8"
license = {file = "LICENSE.txt"}
keywords = ["deltachat", "bot"]
authors = [
{name = "adbenitez", email = "[email protected]"},
{name = "adbenitez", email = "[email protected]"},
]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python"
"Programming Language :: Python",
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
]
dependencies = [
"deltabot-cli>=0.2.0",
"deltachat-rpc-server>=1.127.0",
"openai-whisper",
"deltabot-cli>=5.0.0,<6.0",
"faster-whisper",
]

[project.optional-dependencies]
Expand All @@ -36,6 +35,9 @@ dev = [
[project.scripts]
voice2text-bot = "voice2text_deltabot:main"

[tool.setuptools_scm]
# can be empty if no extra settings are needed, presence enables setuptools_scm

[tool.isort]
profile = "black"

Expand Down
2 changes: 0 additions & 2 deletions voice2text_deltabot/const.py

This file was deleted.

87 changes: 56 additions & 31 deletions voice2text_deltabot/hooks.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,79 @@
"""Event handlers and hooks."""

import logging
from argparse import Namespace

import whisper
from deltabot_cli import AttrDict, Bot, BotCli, EventType, const, events

from .const import MODEL_CFG_KEY
from .subcommands import add_subcommands
from deltabot_cli import AttrDict, Bot, BotCli, ChatType, EventType, ViewType, events
from faster_whisper import WhisperModel

cli = BotCli("voice2text-bot")
add_subcommands(cli)
STATUS = "I am a Delta Chat bot, send me any voice message to convert it to text"
MODEL: whisper.Whisper = None # noqa
cli.add_generic_option(
"--model",
help="set the whisper model to use, for example: small, medium, large. (default: %(default)s)",
default="large",
)
cli.add_generic_option(
"--device",
help="set the device type (default: %(default)s)",
choices=["cuda", "cpu"],
default="cpu",
)
cli.add_generic_option(
"--compute-type",
help="set the compute type (default: %(default)s)",
choices=["int8", "float16", "int8_float16"],
default="int8",
)
STATUS = "I'm a Delta Chat bot, send me any voice message to convert it to text"
MODEL: WhisperModel = None # noqa


@cli.on_init
def on_init(bot: Bot, _args: Namespace) -> None:
if not bot.account.get_config("displayname"):
bot.account.set_config("displayname", "Voice To Text")
bot.account.set_config("selfstatus", STATUS)
def _on_init(bot: Bot, _args: Namespace) -> None:
for accid in bot.rpc.get_all_account_ids():
if not bot.rpc.get_config(accid, "displayname"):
bot.rpc.set_config(accid, "displayname", "Voice To Text")
bot.rpc.set_config(accid, "selfstatus", STATUS)
bot.rpc.set_config(accid, "delete_server_after", "1")
bot.rpc.set_config(accid, "delete_device_after", str(60 * 60 * 24))


@cli.on_start
def on_start(bot: Bot, _args: Namespace) -> None:
def on_start(_bot: Bot, args: Namespace) -> None:
global MODEL # pylint: disable=W0603
model = bot.account.get_config(MODEL_CFG_KEY) or "medium"
MODEL = whisper.load_model(model)
MODEL = WhisperModel(args.model, device=args.device, compute_type=args.compute_type)


@cli.on(events.RawEvent)
def log_event(event: AttrDict) -> None:
def _log_event(bot: Bot, accid: int, event: AttrDict) -> None:
if event.kind == EventType.INFO:
logging.info(event.msg)
bot.logger.debug(event.msg)
elif event.kind == EventType.WARNING:
logging.warning(event.msg)
bot.logger.warning(event.msg)
elif event.kind == EventType.ERROR:
logging.error(event.msg)
bot.logger.error(event.msg)
elif event.kind == EventType.SECUREJOIN_INVITER_PROGRESS:
if event.progress == 1000:
bot.logger.debug("QR scanned by contact id=%s", event.contact_id)
chatid = bot.rpc.create_chat_by_contact_id(accid, event.contact_id)
bot.rpc.send_msg(accid, chatid, {"text": STATUS})


@cli.on(events.NewMessage(is_info=False))
def on_newmsg(event: AttrDict) -> None:
msg = event.message_snapshot
if msg.view_type in (const.ViewType.VOICE, const.ViewType.AUDIO):
result = MODEL.transcribe(msg.file)
msg.chat.send_message(text=result["text"], quoted_msg=msg.id)
return
@cli.on(events.NewMessage(is_info=False, is_bot=None))
def on_newmsg(bot: Bot, accid: int, event: AttrDict) -> None:
msg = event.msg
chat = bot.rpc.get_basic_chat_info(accid, msg.chat_id)
if chat.chat_type == ChatType.SINGLE:
bot.rpc.markseen_msgs(accid, [msg.id])
if msg.is_bot:
return

chat = event.message_snapshot.chat.get_basic_snapshot()
if chat.chat_type == const.ChatType.SINGLE:
event.message_snapshot.chat.send_message(
text=STATUS, quoted_msg=event.message_snapshot.id
if msg.view_type in (ViewType.VOICE, ViewType.AUDIO):
segments, info = MODEL.transcribe(msg.file)
bot.logger.info(
f"[chat={msg.chat_id}, msg={msg.id}] Detected language"
f" {info.language!r} with probability {info.language_probability}"
)
text = " ".join(seg.text for seg in segments)
bot.rpc.send_msg(accid, msg.chat_id, {"text": text, "quotedMessageId": msg.id})
elif chat.chat_type == ChatType.SINGLE:
bot.rpc.send_msg(accid, msg.chat_id, {"text": STATUS})
20 changes: 0 additions & 20 deletions voice2text_deltabot/subcommands.py

This file was deleted.

0 comments on commit 792f617

Please sign in to comment.