-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial changes from ashley * events type * change models * linting * fix linting * tests * tests * TESTS * format * quotation marks * black * tests * end time * formatting * Lint * :( * oops * Lint * Migration file * Lint and removed facebook field * all day * more pythonic --------- Co-authored-by: vcai122 <[email protected]> Co-authored-by: Justin Zhang <[email protected]>
- Loading branch information
1 parent
c5e7088
commit 90418f9
Showing
9 changed files
with
278 additions
and
36 deletions.
There are no files selected for viewing
141 changes: 141 additions & 0 deletions
141
backend/penndata/management/commands/get_penn_today_events.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import datetime | ||
from urllib.parse import urljoin | ||
|
||
from bs4 import BeautifulSoup | ||
from django.core.management.base import BaseCommand | ||
from django.utils import timezone | ||
from selenium import webdriver | ||
from selenium.webdriver.common.by import By | ||
from selenium.webdriver.support import expected_conditions as EC | ||
from selenium.webdriver.support.ui import WebDriverWait | ||
|
||
from penndata.models import Event | ||
|
||
|
||
PENN_TODAY_WEBSITE = "https://penntoday.upenn.edu/events" | ||
ALL_DAY = "all day" | ||
|
||
|
||
class Command(BaseCommand): | ||
def handle(self, *args, **kwargs): | ||
now = timezone.localtime() | ||
current_month, current_year = now.month, now.year | ||
|
||
# Clears out previous Events | ||
# past_events = Event.objects.filter(end__lt=now.date()) | ||
# past_events.delete() | ||
|
||
# Scrapes Penn Today | ||
try: | ||
driver = webdriver.Chrome() | ||
|
||
driver.get(PENN_TODAY_WEBSITE) | ||
events_list = WebDriverWait(driver, 10).until( | ||
EC.presence_of_element_located((By.ID, "events-list")) | ||
) | ||
|
||
html_content = events_list.get_attribute("innerHTML") | ||
driver.quit() | ||
except ConnectionError: | ||
return None | ||
|
||
soup = BeautifulSoup(html_content, "html.parser") | ||
|
||
event_articles = soup.find_all("article", class_="tease") | ||
|
||
for article in event_articles: | ||
name = article.find("h3", class_="tease__head").text.strip() | ||
description = article.find("div", class_="tease__dek").text.strip() | ||
start_date_str = article.find("p", class_="tease__date").text.strip() | ||
meta_elements = article.find_all("p", class_="tease__meta--sm") | ||
if len(meta_elements) >= 2: | ||
start_time_str = meta_elements[0].text.strip().replace(".", "") | ||
location = meta_elements[1].text.strip() | ||
else: | ||
start_time_str = ALL_DAY | ||
location = None | ||
|
||
end_date_elem = article.find( | ||
"p", class_="tease__meta--sm", string=lambda x: "Through" in str(x) | ||
) | ||
|
||
if start_date_str == "02/29" or start_date_str == "2/29": | ||
# If it's February 29th | ||
start_date = datetime.datetime.strptime("02/28", "%m/%d").replace(year=current_year) | ||
if start_date.month < current_month: | ||
# If scraped month is before current month, increment year | ||
start_date = start_date.replace(year=current_year + 1) | ||
start_date = start_date + datetime.timedelta(days=1) | ||
else: | ||
start_date = datetime.datetime.strptime(start_date_str, "%m/%d").replace( | ||
year=current_year | ||
) | ||
if start_date.month < current_month: | ||
# If scraped month is before current month, increment year | ||
start_date = start_date.replace(year=current_year + 1) | ||
if start_time_str == ALL_DAY: | ||
start_time = datetime.time(0, 0) | ||
else: | ||
start_time = datetime.datetime.strptime(start_time_str, "%I:%M%p").time() | ||
start_date = datetime.datetime.combine(start_date, start_time) | ||
|
||
event_url = urljoin(PENN_TODAY_WEBSITE, article.find("a", class_="tease__link")["href"]) | ||
|
||
end_time = self.get_end_time(event_url) | ||
if end_time is not None: | ||
if end_date_elem: # end date and end time | ||
end_date_str = end_date_elem.text.strip().split(" ")[-1] | ||
end_date = datetime.datetime.strptime(end_date_str, "%m/%d/%Y") | ||
end_time = datetime.datetime.strptime(end_time, "%I:%M %p").time() | ||
end_date = datetime.datetime.combine(end_date, end_time) | ||
else: # no end date but end time | ||
end_time = datetime.datetime.strptime(end_time, "%I:%M %p").time() | ||
end_date = datetime.datetime.combine(start_date, end_time) | ||
else: | ||
end_of_day = datetime.time(23, 59, 59) | ||
if end_date_elem: # end date but no end time | ||
end_date_str = end_date_elem.text.strip().split(" ")[-1] | ||
end_date = datetime.combine( | ||
datetime.datetime.strptime(end_date_str, "%m/%d/%Y"), end_of_day | ||
) | ||
else: # no end date or end time | ||
end_date = datetime.combine(start_date, end_of_day) | ||
|
||
Event.objects.update_or_create( | ||
name=name, | ||
defaults={ | ||
"event_type": "", | ||
"image_url": "", | ||
"start": start_date, | ||
"end": end_date, | ||
"location": location, | ||
"website": event_url, | ||
"description": description, | ||
"email": "", | ||
}, | ||
) | ||
|
||
self.stdout.write("Uploaded Events!") | ||
|
||
def get_end_time(event_url): | ||
driver = webdriver.Chrome() | ||
driver.get(event_url) | ||
event_element = WebDriverWait(driver, 10).until( | ||
EC.presence_of_element_located((By.CLASS_NAME, "event__topper-content")) | ||
) | ||
end_time_soup = BeautifulSoup(event_element.get_attribute("innerHTML"), "html.parser") | ||
|
||
end_time_range_str = ( | ||
end_time_soup.find("p", class_="event__meta event__time").text.strip().replace(".", "") | ||
) | ||
print(end_time_range_str) | ||
if not end_time_range_str or ALL_DAY in end_time_range_str.lower(): | ||
driver.quit() | ||
return None # No end time if the event is all day | ||
times = end_time_range_str.split(" - ") | ||
if len(times) <= 1: | ||
driver.quit() | ||
return None | ||
end_time_str = times[1] | ||
driver.quit() | ||
return end_time_str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Generated by Django 3.2.22 on 2024-02-23 23:20 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("penndata", "0008_calendarevent"), | ||
] | ||
|
||
operations = [ | ||
migrations.RenameField(model_name="event", old_name="start_time", new_name="start",), | ||
migrations.RemoveField(model_name="event", name="end_time",), | ||
migrations.AddField(model_name="event", name="end", field=models.DateTimeField(null=True),), | ||
migrations.AddField( | ||
model_name="event", name="location", field=models.CharField(max_length=255, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", name="description", field=models.TextField(null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", name="email", field=models.CharField(max_length=255, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", | ||
name="event_type", | ||
field=models.CharField(max_length=255, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", name="image_url", field=models.URLField(null=True), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Generated by Django 3.2.22 on 2024-02-28 06:50 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
("penndata", "0009_auto_20240223_1820"), | ||
] | ||
|
||
operations = [ | ||
migrations.RemoveField(model_name="event", name="facebook",), | ||
migrations.AlterField( | ||
model_name="event", name="description", field=models.TextField(blank=True, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", | ||
name="email", | ||
field=models.CharField(blank=True, max_length=255, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", name="end", field=models.DateTimeField(blank=True, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", | ||
name="event_type", | ||
field=models.CharField(blank=True, max_length=255, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", name="image_url", field=models.URLField(blank=True, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", | ||
name="location", | ||
field=models.CharField(blank=True, max_length=255, null=True), | ||
), | ||
migrations.AlterField( | ||
model_name="event", | ||
name="website", | ||
field=models.URLField(blank=True, max_length=255, null=True), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters