Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add inference endpoint #9

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions app/crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,10 @@ def update_telegram_message_is_music(self, message_id, is_music):
self.db.add(telegram_message)
self.db.commit()
return telegram_message

def read_telegram_messages_by_ids(self, message_ids):
return (
self.db.query(TelegramMessage)
.filter(TelegramMessage.id.in_(message_ids))
.all()
)
3 changes: 2 additions & 1 deletion app/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
TELEGRAM_API_HASH = config("TELEGRAM_API_HASH", None)
TELEGRAM_CHANNEL_ID = config("TELEGRAM_CHANNEL_ID", 0)
ARTISTS_FILE_PATH = config("ARTISTS_FILE_PATH", None)
DATABASE_PATH = config("DATABASE_PATH", None)
DATABASE_PATH = config("DATABASE_PATH", None)
SPACY_MODEL_PATH = config("SPACY_MODEL_PATH", None)
70 changes: 66 additions & 4 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import time
import uvicorn

from dask import dataframe as dd
from fastapi import Depends, HTTPException, FastAPI, Query, Request
from fastapi.middleware.cors import CORSMiddleware
from sqlalchemy.exc import IntegrityError
Expand All @@ -16,10 +15,9 @@

from crud import TelegramCrud, TelegramSessionCrud
from database import SessionLocal, engine
from RapidFuzz import RapidFuzz
from schemas import TelegramMessageResponse, TelegramMessageArtistCreate, TelegramMessageArtistResponse
from definitions import SPACY_MODEL_PATH
from schemas import TelegramMessageResponse, TelegramMessageResponseWithSuggestions, TelegramMessageArtistCreate, TelegramMessageArtistResponse
from TelegramApi import TelegramApi
from utils import check_artist_names_file

models.Base.metadata.create_all(bind=engine)

Expand Down Expand Up @@ -164,6 +162,70 @@ async def update_telegram_message_is_music(
raise HTTPException(status_code=404, detail="Telegram message not found")
return telegram_message

@app.post("/telegram_messages/suggestions", response_model=List[TelegramMessageResponseWithSuggestions])
async def get_spacy_nlp_suggestions(
telegram_message_ids: List[int],
db: Session = Depends(get_db),
):
# get the messages from the database
telegram_crud = TelegramCrud(db)
telegram_messages = telegram_crud.read_telegram_messages_by_ids(telegram_message_ids)

# load the spacy model
if SPACY_MODEL_PATH is None or not os.path.exists(SPACY_MODEL_PATH):
raise HTTPException(status_code=500, detail="No available spaCy model was found.")
nlp_ner = spacy.load(SPACY_MODEL_PATH)

# for each message, concatenate the title and description and run the spacy model
for msg in telegram_messages:
# skip if message is not webpage or if website is not youtube
if not msg.is_webpage or msg.site_name != "YouTube":
continue

# get the title and description
video_title = msg.webpage_title
video_description = msg.webpage_description

# concatenate title and description, if description is not null
if video_description is not None:
text = video_title + " " + video_description
else:
text = video_title

# remove accents and convert to lowercase
# cleaned_text = unidecode(text.lower().strip())
cleaned_text = text.replace("\n", " ").strip()

# run the spacy model
doc = nlp_ner(cleaned_text)

# save suggestions to the message object
suggestions = {
"webpage_title": [],
"webpage_description": []
}
for ent in doc.ents:

title_text_start = video_title.find(ent.text)
if title_text_start != -1:

# check if there isnt a suggestion for the same text with the same start and end index
# this is to avoid duplicates
if not any(suggestion[0] == title_text_start and suggestion[1] == title_text_start + len(ent.text) for suggestion in suggestions["webpage_title"]):
suggestions["webpage_title"].append((title_text_start, title_text_start + len(ent.text), ent.text))

if video_description is not None:
descr_text_start = video_description.find(ent.text)
if descr_text_start != -1:
if not any(suggestion[0] == descr_text_start and suggestion[1] == descr_text_start + len(ent.text) for suggestion in suggestions["webpage_description"]):
suggestions["webpage_description"].append((descr_text_start, descr_text_start + len(ent.text), ent.text))

msg.suggestions = suggestions
return telegram_messages




@app.get("/spacy/dataset")
def generate_spacy_dataset(
db: Session = Depends(get_db),
Expand Down
3 changes: 3 additions & 0 deletions app/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,8 @@ class TelegramMessageResponse(BaseModel):
class Config:
orm_mode = True

class TelegramMessageResponseWithSuggestions(TelegramMessageResponse):
suggestions: dict

class TelegramMessageArtistCreate(BaseModel):
artist_name: str
11 changes: 11 additions & 0 deletions frontend/.prettierrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"printWidth": 75,
"tabWidth": 4,
"singleQuote": true,
"trailingComma": "all",
"bracketSpacing": true,
"useTabs": false,
"arrowParens": "avoid",
"endOfLine": "auto",
"semicolon": true
}
24 changes: 22 additions & 2 deletions frontend/src/App.css
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
}

.webpage_description div {
height:100px;
overflow-y: hidden;
height: 125px;
overflow-y: scroll;
}

.webpage_description.expanded div {
Expand All @@ -30,3 +30,23 @@
.nlp-suggestion {
background-color: #ffcdcd;
}

table {
table-layout: fixed;
}
table th,
table td {
overflow: hidden;
}

table th:nth-child(1) {
width: 10%;
}

table th:nth-child(2) {
width: 20%;
}

table th:nth-child(3) {
width: 30%;
}
Loading