guites · guites · Apr 23, 2023 · Apr 23, 2023 · Apr 27, 2023 · Apr 27, 2023
diff --git a/app/crud.py b/app/crud.py
@@ -179,3 +179,10 @@ def update_telegram_message_is_music(self, message_id, is_music):
         self.db.add(telegram_message)
         self.db.commit()
         return telegram_message
+
+    def read_telegram_messages_by_ids(self, message_ids):
+        return (
+            self.db.query(TelegramMessage)
+            .filter(TelegramMessage.id.in_(message_ids))
+            .all()
+        )
diff --git a/app/definitions.py b/app/definitions.py
@@ -4,4 +4,5 @@
 TELEGRAM_API_HASH = config("TELEGRAM_API_HASH", None)
 TELEGRAM_CHANNEL_ID = config("TELEGRAM_CHANNEL_ID", 0)
 ARTISTS_FILE_PATH = config("ARTISTS_FILE_PATH", None)
-DATABASE_PATH = config("DATABASE_PATH", None)
+DATABASE_PATH = config("DATABASE_PATH", None)
+SPACY_MODEL_PATH = config("SPACY_MODEL_PATH", None)
diff --git a/app/main.py b/app/main.py
@@ -4,7 +4,6 @@
 import time
 import uvicorn
 
-from dask import dataframe as dd
 from fastapi import Depends, HTTPException, FastAPI, Query, Request
 from fastapi.middleware.cors import CORSMiddleware
 from sqlalchemy.exc import IntegrityError
@@ -16,10 +15,9 @@
 
 from crud import TelegramCrud, TelegramSessionCrud
 from database import SessionLocal, engine
-from RapidFuzz import RapidFuzz
-from schemas import TelegramMessageResponse, TelegramMessageArtistCreate, TelegramMessageArtistResponse
+from definitions import SPACY_MODEL_PATH
+from schemas import TelegramMessageResponse, TelegramMessageResponseWithSuggestions, TelegramMessageArtistCreate, TelegramMessageArtistResponse
 from TelegramApi import TelegramApi
-from utils import check_artist_names_file
 
 models.Base.metadata.create_all(bind=engine)
 
@@ -164,6 +162,70 @@ async def update_telegram_message_is_music(
         raise HTTPException(status_code=404, detail="Telegram message not found")
     return telegram_message
 
+@app.post("/telegram_messages/suggestions", response_model=List[TelegramMessageResponseWithSuggestions])
+async def get_spacy_nlp_suggestions(
+    telegram_message_ids: List[int],
+    db: Session = Depends(get_db),
+):
+    # get the messages from the database
+    telegram_crud = TelegramCrud(db)
+    telegram_messages = telegram_crud.read_telegram_messages_by_ids(telegram_message_ids)
+
+    # load the spacy model
+    if SPACY_MODEL_PATH is None or not os.path.exists(SPACY_MODEL_PATH):
+        raise HTTPException(status_code=500, detail="No available spaCy model was found.")
+    nlp_ner = spacy.load(SPACY_MODEL_PATH)
+
+    # for each message, concatenate the title and description and run the spacy model
+    for msg in telegram_messages:
+        # skip if message is not webpage or if website is not youtube
+        if not msg.is_webpage or msg.site_name != "YouTube":
+            continue
+
+        # get the title and description
+        video_title = msg.webpage_title
+        video_description = msg.webpage_description
+
+        # concatenate title and description, if description is not null
+        if video_description is not None:
+            text = video_title + " " + video_description
+        else:
+            text = video_title
+
+        # remove accents and convert to lowercase
+        # cleaned_text = unidecode(text.lower().strip())
+        cleaned_text = text.replace("\n", " ").strip()
+
+        # run the spacy model
+        doc = nlp_ner(cleaned_text)
+
+        # save suggestions to the message object
+        suggestions = {
+            "webpage_title": [],
+            "webpage_description": []
+        }
+        for ent in doc.ents:
+
+            title_text_start = video_title.find(ent.text)
+            if title_text_start != -1:
+
+                # check if there isnt a suggestion for the same text with the same start and end index
+                # this is to avoid duplicates
+                if not any(suggestion[0] == title_text_start and suggestion[1] == title_text_start + len(ent.text) for suggestion in suggestions["webpage_title"]):
+                    suggestions["webpage_title"].append((title_text_start, title_text_start + len(ent.text), ent.text))
+
+            if video_description is not None:
+                descr_text_start = video_description.find(ent.text)
+                if descr_text_start != -1:
+                    if not any(suggestion[0] == descr_text_start and suggestion[1] == descr_text_start + len(ent.text) for suggestion in suggestions["webpage_description"]):
+                        suggestions["webpage_description"].append((descr_text_start, descr_text_start + len(ent.text), ent.text))
+
+        msg.suggestions = suggestions
+    return telegram_messages
+
+
+
+
 @app.get("/spacy/dataset")
 def generate_spacy_dataset(
     db: Session = Depends(get_db),

diff --git a/app/schemas.py b/app/schemas.py
@@ -26,5 +26,8 @@ class TelegramMessageResponse(BaseModel):
     class Config:
         orm_mode = True
 
+class TelegramMessageResponseWithSuggestions(TelegramMessageResponse):
+    suggestions: dict
+
 class TelegramMessageArtistCreate(BaseModel):
     artist_name: str
diff --git a/frontend/.prettierrc b/frontend/.prettierrc
@@ -0,0 +1,11 @@
+{
+    "printWidth": 75,
+    "tabWidth": 4,
+    "singleQuote": true,
+    "trailingComma": "all",
+    "bracketSpacing": true,
+    "useTabs": false,
+    "arrowParens": "avoid",
+    "endOfLine": "auto",
+    "semicolon": true
+}
diff --git a/frontend/src/App.css b/frontend/src/App.css
@@ -3,8 +3,8 @@
 }
 
 .webpage_description div {
-    height:100px;
-    overflow-y: hidden;
+    height: 125px;
+    overflow-y: scroll;
 }
 
 .webpage_description.expanded div {
@@ -30,3 +30,23 @@
 .nlp-suggestion {
     background-color: #ffcdcd;
 }
+
+table {
+    table-layout: fixed;
+}
+table th,
+table td {
+    overflow: hidden;
+}
+
+table th:nth-child(1) {
+    width: 10%;
+}
+
+table th:nth-child(2) {
+    width: 20%;
+}
+
+table th:nth-child(3) {
+    width: 30%;
+}