forked from bilge-ince/aidb-rag-app
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathembedding.py
33 lines (26 loc) · 869 Bytes
/
embedding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import os
import PyPDF2
from db import get_connection
def create_retriever():
conn = get_connection()
cursor = conn.cursor()
cursor.execute(f"""
SELECT aidb.create_pg_retriever(
'documents_embeddings',
'public',
'id',
'{os.getenv("AIDB_MODEL_NAME")}',
'text',
'documents',
ARRAY['filename', 'doc_fragment'],
TRUE);""")
conn.commit()
return None
def read_pdf_file(pdf_path):
pdf_document = PyPDF2.PdfReader(pdf_path)
lines = []
for page_number in range(len(pdf_document.pages)):
page = pdf_document.pages[page_number]
text = page.extract_text()
lines.extend(text.splitlines())
return lines