Skip to content

Commit

Permalink
VA: events fix DB import error on location name too long
Browse files Browse the repository at this point in the history
  • Loading branch information
jessemortenson committed Nov 6, 2024
1 parent f7ed0d3 commit 32ce8c6
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion scrapers/va/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import re


simple_html_tag_regex = re.compile("<.*?>")


class VaEventScraper(Scraper):
_tz = pytz.timezone("America/New_York")

Expand Down Expand Up @@ -43,7 +46,10 @@ def scrape(self, start_date=None):
if "RoomDescription" in row:
location = row["RoomDescription"]
else:
location = row["Description"]
# the Description property is kinda sloppy, it can have a little overlapping title
# and sometimes links to the agenda and livestream
# so need to strip: anything in HTML tags (location seems to never be bolded or in link)
location = re.sub(simple_html_tag_regex, "", row["Description"])[:200]

if location == "":
location = "See Agenda"
Expand Down

0 comments on commit 32ce8c6

Please sign in to comment.