Skip to content

Commit

Permalink
Add webdriver and use firefox for debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
judtinzhang committed Mar 2, 2024
1 parent b9350e5 commit b8547c1
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 93 deletions.
1 change: 1 addition & 0 deletions backend/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ django-redis = "*"
redis = "*"
python-dateutil = "*"
selenium = "*"
webdriver-manager = "*"

[requires]
python_version = "3.9.14"
209 changes: 117 additions & 92 deletions backend/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from django.utils import timezone
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.firefox import GeckoDriverManager

from penndata.models import Event

Expand All @@ -27,7 +29,7 @@ def handle(self, *args, **kwargs):

# Scrapes Penn Today
try:
driver = webdriver.Chrome()
driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()))

driver.get(PENN_TODAY_WEBSITE)
events_list = WebDriverWait(driver, 10).until(
Expand All @@ -37,6 +39,7 @@ def handle(self, *args, **kwargs):
html_content = events_list.get_attribute("innerHTML")
driver.quit()
except ConnectionError:
print("Connection Error to webdriver")
return None

soup = BeautifulSoup(html_content, "html.parser")
Expand Down

0 comments on commit b8547c1

Please sign in to comment.