Skip to content

Commit

Permalink
Merge pull request #967 from CodeForPhilly/staging
Browse files Browse the repository at this point in the history
Staging
  • Loading branch information
nlebovits authored Oct 19, 2024
2 parents dd59156 + d66fc4f commit 293e3e5
Show file tree
Hide file tree
Showing 10 changed files with 38 additions and 57 deletions.
30 changes: 1 addition & 29 deletions data/src/classes/featurelayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,35 +314,7 @@ def build_and_publish(self, tiles_file_id_prefix: str) -> None:
self.centroid_gdf["geometry"] = self.centroid_gdf["geometry"].centroid
self.centroid_gdf = self.centroid_gdf.to_crs(epsg=4326)
self.centroid_gdf.to_file(temp_geojson_points, driver="GeoJSON")

# Load the GeoJSON from the polygons, drop geometry, and save as Parquet
gdf_polygons = gpd.read_file(temp_geojson_polygons)
df_no_geom = gdf_polygons.drop(columns=["geometry"])

# Check if the DataFrame has fewer than 25,000 rows
num_rows, num_cols = df_no_geom.shape
if num_rows < 25000:
print(
f"Parquet file has {num_rows} rows, which is fewer than 25,000. Skipping upload."
)
return

# Save the DataFrame as Parquet
df_no_geom.to_parquet(temp_parquet)

# Upload Parquet to Google Cloud Storage
blob_parquet = bucket.blob(f"{tiles_file_id_prefix}.parquet")
try:
blob_parquet.upload_from_filename(temp_parquet)
parquet_size = os.stat(temp_parquet).st_size
parquet_size_mb = parquet_size / (1024 * 1024)
print(
f"Parquet upload successful! Size: {parquet_size} bytes ({parquet_size_mb:.2f} MB), Dimensions: {num_rows} rows, {num_cols} columns."
)
except Exception as e:
print(f"Parquet upload failed: {e}")
return


# Command for generating PMTiles for points up to zoom level zoom_threshold
points_command: list[str] = [
"tippecanoe",
Expand Down
7 changes: 1 addition & 6 deletions data/src/data_utils/access_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,5 @@ def access_process(dataset: Any) -> Any:
access_processes.append(access_process)

dataset.gdf["access_process"] = access_processes

# Print the distribution of "access_process"
distribution = dataset.gdf["access_process"].value_counts()
print("Distribution of access process:")
print(distribution)


return dataset
4 changes: 2 additions & 2 deletions data/src/data_utils/phs_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ def phs_properties(primary_featurelayer: FeatureLayer) -> FeatureLayer:
primary_featurelayer.spatial_join(phs_properties)

# Initialize 'phs_care_program' column with default "no" for all rows
primary_featurelayer.gdf["phs_care_program"] = "no"
primary_featurelayer.gdf["phs_care_program"] = "No"

# Set 'phs_care_program' to "yes" for matched rows
primary_featurelayer.gdf.loc[primary_featurelayer.gdf["phs_care_program"] != "no", "phs_care_program"] = "yes"
primary_featurelayer.gdf.loc[primary_featurelayer.gdf["program"].notna(), "phs_care_program"] = "Yes"

# Rebuild the GeoDataFrame after updates
primary_featurelayer.rebuild_gdf()
Expand Down
1 change: 1 addition & 0 deletions data/src/data_utils/priority_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@ def priority_level(dataset):
priority_levels.append(priority_level)

dataset.gdf["priority_level"] = priority_levels

return dataset
11 changes: 5 additions & 6 deletions data/src/data_utils/vacant_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,7 @@ def vacant_properties() -> FeatureLayer:
vacant_properties.gdf, geometry="geometry"
)

print(
f"Vacant properties data size before dropping NAs: {len(vacant_properties.gdf)} rows."
)
vacant_properties.gdf.dropna(subset=["opa_id"], inplace=True)
print(
f"Vacant properties data size after dropping NAs: {len(vacant_properties.gdf)} rows."
)

# Final null value check before returning
check_null_percentage(vacant_properties.gdf)
Expand Down Expand Up @@ -184,4 +178,9 @@ def vacant_properties() -> FeatureLayer:
# Ensure concatenated data is still a GeoDataFrame
vacant_properties.gdf = gpd.GeoDataFrame(vacant_properties.gdf, geometry="geometry")

before_drop = vacant_properties.gdf.shape[0]
vacant_properties.gdf = vacant_properties.gdf.drop_duplicates(subset="opa_id")
after_drop = vacant_properties.gdf.shape[0]
print(f"Duplicate vacant properties dropped: {before_drop - after_drop}")

return vacant_properties
20 changes: 20 additions & 0 deletions data/src/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,32 @@
for service in services:
dataset = service(dataset)

before_drop = dataset.gdf.shape[0]
dataset.gdf = dataset.gdf.drop_duplicates(subset="opa_id")
after_drop = dataset.gdf.shape[0]
print(f"Duplicate dataset rows dropped after initial services: {before_drop - after_drop}")

# Add Priority Level
dataset = priority_level(dataset)

# Print the distribution of "priority_level"
distribution = dataset.gdf["priority_level"].value_counts()
print("Distribution of priority level:")
print(distribution)

# Add Access Process
dataset = access_process(dataset)

# Print the distribution of "access_process"
distribution = dataset.gdf["access_process"].value_counts()
print("Distribution of access process:")
print(distribution)

before_drop = dataset.gdf.shape[0]
dataset.gdf = dataset.gdf.drop_duplicates(subset="opa_id")
after_drop = dataset.gdf.shape[0]
print(f"Duplicate final dataset rows droppeds: {before_drop - after_drop}")

# back up old tiles file whether we are reloading data or not
if backup is None:
backup = BackupArchiveDatabase()
Expand Down
6 changes: 0 additions & 6 deletions src/components/FilterView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,6 @@ const filters = [
options: ['Land', 'Building'],
type: 'buttonGroup',
},
{
property: 'owner_type',
display: 'Owner',
options: ['Public', 'Business (LLC)', 'Individual'],
type: 'buttonGroup',
},
];

interface FilterViewProps {
Expand Down
4 changes: 2 additions & 2 deletions src/components/PropertyCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function getPriorityClass(priorityLevel: string) {
}

const PropertyCard = ({ feature, setSelectedProperty }: PropertyCardProps) => {
const { address, guncrime_density, priority_level, opa_id } =
const { address, gun_crimes_density_label, priority_level, opa_id } =
feature.properties;

const image = `https://storage.googleapis.com/cleanandgreenphl/${opa_id}.jpg`;
Expand Down Expand Up @@ -62,7 +62,7 @@ const PropertyCard = ({ feature, setSelectedProperty }: PropertyCardProps) => {
{formattedAddress}
</button>
<div className="text-gray-700 body-sm">
{guncrime_density} Gun Crime Rate
{gun_crimes_density_label} Gun Crime Rate
</div>
</div>
<Chip
Expand Down
2 changes: 1 addition & 1 deletion src/components/PropertyDetailSection.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ const tableCols = [
label: 'Address',
},
{
key: 'guncrime_density',
key: 'gun_crimes_density_label',
label: 'Crime Rate',
},
{
Expand Down
10 changes: 5 additions & 5 deletions src/components/SinglePropertyDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ const SinglePropertyDetail = ({
const {
address,
council_district,
guncrime_density,
gun_crimes_density_label,
market_value,
neighborhood,
open_violations_past_year,
Expand All @@ -101,7 +101,7 @@ const SinglePropertyDetail = ({
tree_canopy_gap,
zipcode,
opa_id,
phs_partner_agency,
phs_care_program,
} = properties;
const image = `https://storage.googleapis.com/cleanandgreenphl/${opa_id}.jpg`;
const atlasUrl = `https://atlas.phila.gov/${address}`;
Expand Down Expand Up @@ -310,7 +310,7 @@ const SinglePropertyDetail = ({
rows={[
{
label: 'Gun Crime Rate',
content: guncrime_density,
content: gun_crimes_density_label,
},
{
label: 'Tree Canopy Gap',
Expand All @@ -321,8 +321,8 @@ const SinglePropertyDetail = ({
content: open_violations_past_year,
},
{
label: 'PHS LandCare',
content: phs_partner_agency,
label: 'In PHS LandCare?',
content: phs_care_program,
},
{
label: 'Suggested Priority',
Expand Down

0 comments on commit 293e3e5

Please sign in to comment.