-
Notifications
You must be signed in to change notification settings - Fork 7
/
requirements.txt
73 lines (65 loc) · 1.9 KB
/
requirements.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
Flask==3.0.0
flask-cors==4.0.0
Flask-Injector==0.15.0
gunicorn==22.0.0
protobuf==4.25.0
aiohttp==3.8.6
wheel==0.41.3
click==8.1.7
MarkupSafe==2.1.3
Werkzeug==3.0.1
mkdocstrings[python]==0.23.0
mkdocs-material==9.4.7
itsdangerous==2.1.2
Jinja2==3.1.2
mkdocs==1.5.3
SQLAlchemy==2.0.22
tabulate==0.9.0
typing-inspect==0.9.0
typing_extensions==4.8.0
cryptography==42.0.7
# Utils
tiktoken==0.5.1
python-dotenv==1.0.1
pydantic==1.10.13 # pydantic v1 works better for ray
flask-executor==1.0.0
retry==0.9.2
XlsxWriter==3.2.0
# AI & core services
nomic==2.0.14
openai==0.28.1
langchain==0.0.331
langchainhub==0.1.14
# Data
boto3==1.28.79
qdrant-client==1.7.3
supabase==2.5.3
# Logging
posthog==3.1.0
sentry-sdk==1.39.1
ollama==0.2.1
# Not currently supporting coursera ingest
# cs-dlp @ git+https://github.com/raffaem/[email protected] # previously called coursera-dl
# removed due to /ingest in Beam
# canvasapi==3.2.0
# GitPython==3.1.40
# pysrt==1.1.2
# docx2txt==0.8
# pydub==0.25.1
# ffmpeg-python==0.2.0
# ffprobe==0.5
# ffmpeg==1.4
# beautifulsoup4==4.12.2
# PyMuPDF==1.23.6
# pytesseract==0.3.10 # image OCR
# openpyxl==3.1.2 # excel
# networkx==3.2.1 # unused part of excel partitioning :(
# python-pptx==0.6.23
# unstructured==0.10.29 # causes huge ~5.3 GB of installs. Probbably from onnx: https://github.com/Unstructured-IO/unstructured/blob/ad14321016533dc03c1782f6ebea00bc9c804846/requirements/extra-pdf-image.in#L4
# pdf packages for unstructured
# pdf2image==1.16.3
# pdfminer.six==20221105
# opencv-python-headless==4.8.1.78
# unstructured.pytesseract==0.3.12
# unstructured-inference==0.7.11 # this is the real large one :(
# unstructured[xlsx,image,pptx]==0.10.29 # causes huge ~5.3 GB of installs. Probbably from onnx: https://github.com/Unstructured-IO/unstructured/blob/ad14321016533dc03c1782f6ebea00bc9c804846/requirements/extra-pdf-image.in#L4