Skip to content

Commit

Permalink
Merge pull request #5086 from openstates/pa-fix-more-new-site-bugs
Browse files Browse the repository at this point in the history
PA fix more new site bugs
  • Loading branch information
jessemortenson authored Nov 15, 2024
2 parents 9873941 + ef32cde commit b7a2fe5
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 9 deletions.
1 change: 1 addition & 0 deletions scrapers/pa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class Pennsylvania(State):
"start_date": "2023-01-03",
"end_date": "2024-11-30",
"active": True,
"extras": {"session_year": "2023"},
},
{
"_scraped_name": "2023-2024 Special Session #1 (Victims of Sexual Abuse)",
Expand Down
23 changes: 14 additions & 9 deletions scrapers/pa/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,17 @@


class PABillScraper(Scraper):
session_year: str = ""

def scrape(self, chamber=None, session=None):
chambers = [chamber] if chamber is not None else ["upper", "lower"]

# Session Year code needed to fix committee vote URLs
# until new PA site fixes them
for i in self.jurisdiction.legislative_sessions:
if i["identifier"] == session:
self.session_year = i.get("extras", {}).get("session_year", "2023")

match = re.search(r"[S#](\d+)", session)
for chamber in chambers:
if match:
Expand Down Expand Up @@ -64,7 +72,7 @@ def parse_bill(self, chamber, session, special, link):
page = self.get_page(url)

xpath = (
'//div[contains(@class, "header ")]/following-sibling::*[1]'
'//div[contains(@class, "header")]/following-sibling::*[1]'
'/div[@class="col-md-9"]/div[1]'
)

Expand Down Expand Up @@ -267,14 +275,11 @@ def parse_votes(self, bill, page):
if "/roll-calls/" in url:
yield from self.parse_chamber_votes(bill, url)
elif "/roll-call-votes/" in url:
# TODO remove log message and uncomment self.parse_committee_votes()
# when committee vote URLs work again, for example:
# https://www.palegis.us/house/committees/roll-call-votes/vote-summary?committeecode=59&rollcallid=1
self.logger.warning(
"Temporarily disabling committee vote ingestion "
"due to systemic 500 HTTP errors"
)
# yield from self.parse_committee_votes(bill, url)
# As of Nov 2024, this URL in the new site is broken
# but works if we add a query param
if "sessyr" not in url:
url = f"{url}&sessyr={self.session_year}"
yield from self.parse_committee_votes(bill, url)
else:
msg = "Unexpected vote url: %r" % url
raise Exception(msg)
Expand Down

0 comments on commit b7a2fe5

Please sign in to comment.