Skip to content

Commit

Permalink
Add automatic checking for profanity
Browse files Browse the repository at this point in the history
This adds functionality to automatically check for profanity in text
messages written in any of the XMPP MUC rooms monitored by the
moderation bot.

The terms being considered profanity can be configured using the
database and are language specific. They have to be stored in
their lemmatized form. If a supported language gets detected with an
accuracy of 100% only terms for that language will be checked, otherwise
English terms will be checked as well. Supported languages for now are
English, French, German, Polish, Portuguese, Russian, Spanish and
Turkish.

For the first two times in a sliding window of three months a user uses
profanity they'll receive a warning. Starting from the third time,
the user will get muted. At first users will be muted for five minutes,
with an exponentially increasing duration up to one week for each
continued use of profanity afterwards.

To enable this functionality the `--enable-profanity-monitoring`
command line option has to be provided.
  • Loading branch information
Dunedan committed Oct 8, 2024
1 parent 64d6cf2 commit 1ed99e3
Show file tree
Hide file tree
Showing 3 changed files with 273 additions and 27 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies = [
"cachetools",
"defusedxml",
"dateparser",
"simplemma[marisa-trie]>=1.1.1",
"slixmpp>=1.8.0",
"sqlalchemy>=2.0.4",
]
Expand Down Expand Up @@ -87,5 +88,5 @@ max-doc-length = 72
convention = "pep257"

[tool.ruff.lint.pylint]
max-args = 8
max-args = 10
max-nested-blocks = 4
22 changes: 9 additions & 13 deletions xpartamupp/lobby_moderation_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from typing import Any, ClassVar

from sqlalchemy import (
JSON,
DateTime,
ForeignKey,
String,
Expand Down Expand Up @@ -69,20 +70,13 @@ class Base(DeclarativeBase):
}


class Blacklist(Base):
class ProfanityTerms(Base):
"""Model for profanity terms."""

__tablename__ = "profanity_blacklist"
__tablename__ = "profanity_terms"

word: Mapped[str] = mapped_column(String(255), primary_key=True)


class Whitelist(Base):
"""Model for terms which are whitelisted from profanity."""

__tablename__ = "profanity_whitelist"

word: Mapped[str] = mapped_column(String(255), primary_key=True)
term: Mapped[str] = mapped_column(String(255), primary_key=True)
language: Mapped[str] = mapped_column(String(2), primary_key=True)


class ProfanityIncident(Base):
Expand All @@ -91,10 +85,12 @@ class ProfanityIncident(Base):
__tablename__ = "profanity_incidents"

id: Mapped[int] = mapped_column(primary_key=True)
timestamp: Mapped[datetime]
timestamp: Mapped[datetime] = mapped_column(default=partial(datetime.now, tz=UTC))
player: Mapped[str] = mapped_column(String(255))
room: Mapped[str] = mapped_column(String(255))
offending_content: Mapped[str] = mapped_column(UnicodeText)
deleted: Mapped[bool]
detected_languages: Mapped[list[str]] = mapped_column(JSON)
matched_terms: Mapped[list[str]] = mapped_column(JSON)


class JIDNickWhitelist(Base):
Expand Down
Loading

0 comments on commit 1ed99e3

Please sign in to comment.