From 32ce8c6d8a7c68a254c0b9308ec82418098e3bc9 Mon Sep 17 00:00:00 2001 From: Jesse Mortenson Date: Wed, 6 Nov 2024 15:28:36 -0600 Subject: [PATCH] VA: events fix DB import error on location name too long --- scrapers/va/events.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scrapers/va/events.py b/scrapers/va/events.py index 5b5a704ab9..196a8337c7 100644 --- a/scrapers/va/events.py +++ b/scrapers/va/events.py @@ -6,6 +6,9 @@ import re +simple_html_tag_regex = re.compile("<.*?>") + + class VaEventScraper(Scraper): _tz = pytz.timezone("America/New_York") @@ -43,7 +46,10 @@ def scrape(self, start_date=None): if "RoomDescription" in row: location = row["RoomDescription"] else: - location = row["Description"] + # the Description property is kinda sloppy, it can have a little overlapping title + # and sometimes links to the agenda and livestream + # so need to strip: anything in HTML tags (location seems to never be bolded or in link) + location = re.sub(simple_html_tag_regex, "", row["Description"])[:200] if location == "": location = "See Agenda"