Skip to content

Commit

Permalink
Merge pull request #952 from CodeForPhilly/staging
Browse files Browse the repository at this point in the history
Weekly PR from Staging to Main
  • Loading branch information
CodeWritingCow authored Oct 14, 2024
2 parents e18127b + c13fec8 commit 52ae6ca
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 42 deletions.
30 changes: 26 additions & 4 deletions data/src/data_utils/access_process.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
def access_process(dataset):
from typing import Any

def access_process(dataset: Any) -> Any:
"""
Process a dataset to determine the access process for each property based on
city ownership and market value. The result is added as a new column in the dataset.
Args:
dataset (Any): The dataset containing a GeoDataFrame named `gdf` with
columns "city_owner_agency" and "market_value".
Returns:
Any: The updated dataset with an additional "access_process" column.
Side Effects:
Prints the distribution of the "access_process" column.
"""
access_processes = []

for _, row in dataset.gdf.iterrows():
Expand All @@ -9,9 +25,9 @@ def access_process(dataset):
)

# Simplified decision logic
if city_owner_agency == "PLB":
access_process = "Land Bank"
elif city_owner_agency in ["PRA", "PHDC"]:
if city_owner_agency == "Land Bank (PHDC)":
access_process = "Go through Land Bank"
elif city_owner_agency == "PRA":
access_process = "Do Nothing"
else:
if market_value_over_1000:
Expand All @@ -22,4 +38,10 @@ def access_process(dataset):
access_processes.append(access_process)

dataset.gdf["access_process"] = access_processes

# Print the distribution of "access_process"
distribution = dataset.gdf["access_process"].value_counts()
print("Distribution of access process:")
print(distribution)

return dataset
20 changes: 19 additions & 1 deletion data/src/data_utils/city_owned_properties.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
from typing import Any
from classes.featurelayer import FeatureLayer
from constants.services import CITY_OWNED_PROPERTIES_TO_LOAD

def city_owned_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer:
"""
Processes city-owned property data by joining it with the primary feature layer,
renaming columns, and updating access information for properties based on ownership.
All instances where the "city_owner_agency" is "PLB" are changed to "Land Bank (PHDC)".
def city_owned_properties(primary_featurelayer):
Args:
primary_featurelayer (FeatureLayer): The primary feature layer to which city-owned
property data will be joined.
Returns:
FeatureLayer: The updated primary feature layer with processed city ownership
information.
"""
city_owned_properties = FeatureLayer(
name="City Owned Properties",
esri_rest_urls=CITY_OWNED_PROPERTIES_TO_LOAD,
Expand Down Expand Up @@ -60,4 +73,9 @@ def city_owned_properties(primary_featurelayer):
"side_yard_eligible"
].fillna("No")

# Update all instances where city_owner_agency is "PLB" to "Land Bank (PHDC)"
primary_featurelayer.gdf.loc[
primary_featurelayer.gdf["city_owner_agency"] == "PLB", "city_owner_agency"
] = "Land Bank (PHDC)"

return primary_featurelayer
2 changes: 1 addition & 1 deletion data/src/data_utils/conservatorship.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def conservatorship(primary_featurelayer):
sale_date_6_months_ago = False

# Simplified decision logic
if city_owner_agency == "PLB" or (
if city_owner_agency == "Land Bank (PHDC)" or (
not sale_date_6_months_ago and market_value_over_1000
):
conservatorship = "No"
Expand Down
65 changes: 52 additions & 13 deletions data/src/data_utils/l_and_i.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,43 @@
import pandas as pd
import geopandas as gpd
from typing import List
from classes.featurelayer import FeatureLayer
from constants.services import COMPLAINTS_SQL_QUERY, VIOLATIONS_SQL_QUERY

def l_and_i(primary_featurelayer: FeatureLayer) -> FeatureLayer:
"""
Process L&I (Licenses and Inspections) data for complaints and violations.
This function filters and processes L&I complaints and violations data,
joining it with the primary feature layer based on spatial relationships
and OPA (Office of Property Assessment) identifiers.
Args:
primary_featurelayer (FeatureLayer): The primary feature layer to join L&I data to.
Returns:
FeatureLayer: The primary feature layer updated with L&I data.
"""
keywords: List[str] = [
'dumping', 'blight', 'rubbish', 'weeds', 'graffiti',
'abandoned', 'sanitation', 'litter', 'vacant', 'trash',
'unsafe'
]

def l_and_i(primary_featurelayer):
# Load complaints data from L&I
l_and_i_complaints = FeatureLayer(
l_and_i_complaints: FeatureLayer = FeatureLayer(
name="LI Complaints",
carto_sql_queries=COMPLAINTS_SQL_QUERY
)

# filter for only Status = 'Open'
# Filter for rows where 'subject' contains any of the keywords
l_and_i_complaints.gdf = l_and_i_complaints.gdf[
l_and_i_complaints.gdf["status"] == "Open"
l_and_i_complaints.gdf["subject"].str.lower().str.contains('|'.join(keywords))
]

# Filter for only Status = 'Open'
l_and_i_complaints.gdf = l_and_i_complaints.gdf[
l_and_i_complaints.gdf["status"].str.lower() == "open"
]

# Group by geometry and concatenate the violationcodetitle values into a list with a semicolon separator
Expand All @@ -30,13 +55,18 @@ def l_and_i(primary_featurelayer):
)

# Load data for violations from L&I
l_and_i_violations = FeatureLayer(
l_and_i_violations: FeatureLayer = FeatureLayer(
name="LI Violations",
carto_sql_queries=VIOLATIONS_SQL_QUERY,
from_xy=True
)

all_violations_count_df = (
# Filter for rows where 'casetype' contains any of the keywords, handling NaN values
l_and_i_violations.gdf = l_and_i_violations.gdf[
l_and_i_violations.gdf["violationcodetitle"].fillna('').str.lower().str.contains('|'.join(keywords))
]

all_violations_count_df: pd.DataFrame = (
l_and_i_violations.gdf.groupby("opa_account_num")
.count()
.reset_index()[["opa_account_num", "violationnumber", "geometry"]]
Expand All @@ -45,11 +75,11 @@ def l_and_i(primary_featurelayer):
columns={"violationnumber": "all_violations_past_year"}
)
# filter for only cases where the casestatus is 'IN VIOLATION' or 'UNDER INVESTIGATION'
violations_gdf = l_and_i_violations.gdf[
(l_and_i_violations.gdf["violationstatus"] == "OPEN")
violations_gdf: gpd.GeoDataFrame = l_and_i_violations.gdf[
(l_and_i_violations.gdf["violationstatus"].str.lower() == "open")
]

open_violations_count_df = (
open_violations_count_df: pd.DataFrame = (
violations_gdf.groupby("opa_account_num")
.count()
.reset_index()[["opa_account_num", "violationnumber", "geometry"]]
Expand All @@ -58,7 +88,7 @@ def l_and_i(primary_featurelayer):
columns={"violationnumber": "open_violations_past_year"}
)
# join the all_violations_count_df and open_violations_count_df dataframes on opa_account_num
violations_count_gdf = all_violations_count_df.merge(
violations_count_gdf: gpd.GeoDataFrame = all_violations_count_df.merge(
open_violations_count_df, how="left", on="opa_account_num"
)

Expand Down Expand Up @@ -96,7 +126,7 @@ def l_and_i(primary_featurelayer):
)

# Complaints need a spatial join, but we need to take special care to merge on just the parcel geoms first to get opa_id
complaints_with_opa_id = primary_featurelayer.gdf.sjoin(
complaints_with_opa_id: gpd.GeoDataFrame = primary_featurelayer.gdf.sjoin(
l_and_i_complaints.gdf, how="left", predicate="contains"
)
complaints_with_opa_id.drop(columns=["index_right"], inplace=True)
Expand All @@ -109,7 +139,16 @@ def l_and_i(primary_featurelayer):
)

# Clean up the NaN values in the li_complaints column
def remove_nan_strings(x):
def remove_nan_strings(x: str) -> str | None:
"""
Remove 'nan' strings from the input.
Args:
x (str): Input string.
Returns:
str | None: Cleaned string or None if only 'nan' values.
"""
if x == "nan" or ("nan;" in x):
return None
else:
Expand All @@ -136,4 +175,4 @@ def remove_nan_strings(x):
.astype(int)
)

return primary_featurelayer
return primary_featurelayer
16 changes: 0 additions & 16 deletions data/src/data_utils/llc_owner.py

This file was deleted.

37 changes: 37 additions & 0 deletions data/src/data_utils/owner_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import pandas as pd
from classes.featurelayer import FeatureLayer

def owner_type(primary_featurelayer: FeatureLayer) -> FeatureLayer:
"""
Determines the ownership type for each property in the primary feature layer based on
the 'owner_1', 'owner_2', and 'city_owner_agency' columns. The ownership type is set as:
- "Public" if 'city_owner_agency' is not NA.
- "Business (LLC)" if 'city_owner_agency' is NA and "LLC" is found in 'owner_1' or 'owner_2'.
- "Individual" if 'city_owner_agency' is NA and "LLC" is not found in 'owner_1' or 'owner_2'.
Args:
primary_featurelayer (FeatureLayer): The feature layer containing property ownership data.
Returns:
FeatureLayer: The updated feature layer with the 'owner_type' column added.
"""
owner_types = []

for _, row in primary_featurelayer.gdf.iterrows():
# Extract owner1, owner2, and city_owner_agency
owner1 = str(row["owner_1"]).lower()
owner2 = str(row["owner_2"]).lower()
city_owner_agency = row["city_owner_agency"]

# Determine ownership type based on the conditions
if pd.notna(city_owner_agency):
owner_types.append("Public")
elif " llc" in owner1 or " llc" in owner2:
owner_types.append("Business (LLC)")
else:
owner_types.append("Individual")

# Add the 'owner_type' column to the GeoDataFrame
primary_featurelayer.gdf["owner_type"] = owner_types

return primary_featurelayer
4 changes: 2 additions & 2 deletions data/src/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from data_utils.gun_crimes import gun_crimes
from data_utils.imm_dang_buildings import imm_dang_buildings
from data_utils.l_and_i import l_and_i
from data_utils.llc_owner import llc_owner
from data_utils.owner_type import owner_type
from data_utils.nbhoods import nbhoods
from data_utils.negligent_devs import negligent_devs
from data_utils.opa_properties import opa_properties
Expand Down Expand Up @@ -50,7 +50,7 @@
imm_dang_buildings,
tactical_urbanism,
conservatorship,
llc_owner,
owner_type,
community_gardens,
park_priority,
ppr_properties,
Expand Down
4 changes: 2 additions & 2 deletions src/components/FilterView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ const filters = [
type: 'buttonGroup',
},
{
property: 'llc_owner',
property: 'owner_type',
display: 'Owner',
options: ['Yes', 'No'],
options: ['Public', 'Business (LLC)', 'Individual'],
type: 'buttonGroup',
},
];
Expand Down
7 changes: 4 additions & 3 deletions src/components/Filters/DimensionFilter.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ type OptionDisplayMapping = {
};

const optionsDisplayMapping: OptionDisplayMapping = {
llc_owner: {
Yes: 'Business',
No: 'Individual',
owner_type: {
Public: 'Public',
'Business (LLC)': 'Business (LLC)',
Individual: 'Individual',
},
};

Expand Down

0 comments on commit 52ae6ca

Please sign in to comment.