Merge pull request #912 from CodeForPhilly/staging

Deploy Staging to Main branch
CodeForPhilly · Sep 25, 2024 · 250cf3f · 250cf3f
2 parents bc6a06e + b8e0886
commit 250cf3f
Show file tree

Hide file tree

Showing 27 changed files with 5,632 additions and 9,592 deletions.
diff --git a/.github/workflows/pr_checks_backend.yml b/.github/workflows/pr_checks_backend.yml
@@ -8,13 +8,12 @@ on:
       - 'data/**'
       - 'Dockerfile-pg'
       - 'init_pg.sql'
-      - 'docker-compose.yml'
+      - 'docker compose.yml'
   workflow_dispatch:
 
 jobs:
-  build:
+  setup:
     runs-on: ubuntu-latest
-
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
@@ -30,49 +29,90 @@ jobs:
         with:
           python-version: '3.11.4'
 
+  run-formatter:
+    runs-on: ubuntu-latest
+    needs: setup
+    continue-on-error: true
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       - name: Run Ruff Formatter in Docker
         run: |
           cd data
-          docker-compose run --rm formatter
-        continue-on-error: true
+          docker compose run --rm formatter
+
+  run-linter:
+    runs-on: ubuntu-latest
+    needs: setup
+    continue-on-error: true
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
 
       - name: Run Ruff Linter in Docker
         run: |
           cd data
-          docker-compose run --rm linter
-        continue-on-error: true
+          docker compose run --rm linter
+
+  build-project:
+    runs-on: ubuntu-latest
+    needs: setup
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
 
       - name: Build vacant-lots-proj
         run: |
           cd data
-          docker-compose build vacant-lots-proj
+          docker compose build vacant-lots-proj
 
-      - name: Build data-fetcher
-        run: |
-          cd data
-          docker-compose build data-fetcher
+  run-services:
+    runs-on: ubuntu-latest
+    needs: build-project
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
 
       - name: Run all services
         run: |
           cd data
-          docker-compose up -d
+          docker compose up -d
+
+  check-build-status:
+    runs-on: ubuntu-latest
+    needs: run-services
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
 
       - name: Check build status
         if: failure()
         run: |
           echo "One or more services failed to build and run."
           exit 1
-
       - name: Report success
         if: success()
         run: echo "All services built and ran successfully."
 
+  check-lint-format-status:
+    runs-on: ubuntu-latest
+    needs: [run-formatter, run-linter]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
       - name: Check formatter and linter status
         if: failure()
         run: |
           echo "Formatting or linting issues found. Please fix the issues."
           exit 1
-
       - name: Formatter and linter success
         if: success()
         run: echo "Formatting and linting passed successfully."
diff --git a/.gitignore b/.gitignore
@@ -60,4 +60,4 @@ data/src/app/service-account-key.json
 
 # awkde build files
 data/src/awkde/build/
-tmp/
+tmp/
diff --git a/.prettierignore b/.prettierignore
@@ -0,0 +1,3 @@
+# Ignore Ruff cache
+.ruff_cache
+data/src/.ruff_cache
diff --git a/README.md b/README.md
@@ -76,7 +76,7 @@ Please see the documents in our [docs](/docs) folder for background on the proje
 2. The [code of conduct](/docs/CODE_OF_CONDUCT.md)
 3. The appropriate installation setup instructions ([front end](/docs/SETUP/FRONT_END.md) and/or [back end](/docs/SETUP/BACK_END.md))
 
-### Contributors
+### Point of Contact
 
 <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
 <!-- prettier-ignore-start -->

diff --git a/data/docker-compose.yml b/data/docker-compose.yml
@@ -26,15 +26,15 @@ services:
       context: .
     volumes:
       - ./src:/usr/src/app
-    command: sh -c "pip install ruff && ruff /usr/src/app --fix --exclude '/usr/src/app/data/src/awkde/'"
+    command: sh -c "pip install ruff && ruff format --exclude '/usr/src/app/awkde/'"
     network_mode: 'host'
 
   linter:
     build:
       context: .
     volumes:
       - ./src:/usr/src/app
-    command: sh -c "pip install ruff && ruff check /usr/src/app --exclude '/usr/src/app/data/src/awkde/'"
+    command: sh -c "pip install ruff && ruff check --fix --exclude '/usr/src/app/awkde/'"
     network_mode: 'host'
 
   streetview:

diff --git a/data/src/Pipfile b/data/src/Pipfile
@@ -30,6 +30,7 @@ networkx = "*"
 libpysal = "*"
 jenkspy = "*"
 pyarrow = "*"
+tqdm = "*"
 
 [dev-packages]
 

diff --git a/data/src/Pipfile.lock b/data/src/Pipfile.lock
diff --git a/data/src/classes/featurelayer.py b/data/src/classes/featurelayer.py
@@ -1,40 +1,49 @@
+import logging as log
 import os
 import subprocess
 import traceback
-import sqlalchemy as sa
-import logging as log
+
 import geopandas as gpd
 import pandas as pd
 import requests
+import sqlalchemy as sa
+from config.config import (
+    FORCE_RELOAD,
+    USE_CRS,
+    log_level,
+    min_tiles_file_size_in_bytes,
+    write_production_tiles_file,
+)
 from config.psql import conn, local_engine
 from esridump.dumper import EsriDumper
 from google.cloud import storage
 from google.cloud.storage.bucket import Bucket
 from shapely import Point, wkb
 
-from config.config import FORCE_RELOAD, USE_CRS, write_production_tiles_file, min_tiles_file_size_in_bytes, log_level
-
 log.basicConfig(level=log_level)
 
+
 def google_cloud_bucket() -> Bucket:
     """Build the google cloud bucket with name configured in your environ or default of cleanandgreenphl
 
     Returns:
         Bucket: the gcp bucket
     """
     credentials_path = os.path.expanduser("/app/service-account-key.json")
-    
+
     if not os.path.exists(credentials_path):
         raise FileNotFoundError(f"Credentials file not found at {credentials_path}")
-    
+
     os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
     bucket_name = os.getenv("GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl")
-    
+
     storage_client = storage.Client(project="clean-and-green-philly")
     return storage_client.bucket(bucket_name)
 
+
 bucket = google_cloud_bucket()
 
+
 class FeatureLayer:
     """
     FeatureLayer is a class to represent a GIS dataset. It can be initialized with a URL to an Esri Feature Service, a SQL query to Carto, or a GeoDataFrame.
@@ -50,7 +59,7 @@ def __init__(
         force_reload=FORCE_RELOAD,
         from_xy=False,
         use_wkb_geom_field=None,
-        cols: list[str] = None
+        cols: list[str] = None,
     ):
         self.name = name
         self.esri_rest_urls = (
@@ -230,7 +239,7 @@ def spatial_join(self, other_layer, how="left", predicate="intersects"):
         self.gdf.drop_duplicates(inplace=True)
 
         # Coerce opa_id to integer and drop rows where opa_id is null or non-numeric
-        self.gdf["opa_id"] = pd.to_numeric(self.gdf["opa_id"], errors="coerce")
+        self.gdf.loc[:, "opa_id"] = pd.to_numeric(self.gdf["opa_id"], errors="coerce")
         self.gdf = self.gdf.dropna(subset=["opa_id"])
 
     def opa_join(self, other_df, opa_column):
@@ -239,11 +248,13 @@ def opa_join(self, other_df, opa_column):
         """
 
         # Coerce opa_column to integer and drop rows where opa_column is null or non-numeric
-        other_df[opa_column] = pd.to_numeric(other_df[opa_column], errors="coerce")
+        other_df.loc[:, opa_column] = pd.to_numeric(
+            other_df[opa_column], errors="coerce"
+        )
         other_df = other_df.dropna(subset=[opa_column])
 
         # Coerce opa_id to integer and drop rows where opa_id is null or non-numeric
-        self.gdf["opa_id"] = pd.to_numeric(self.gdf["opa_id"], errors="coerce")
+        self.gdf.loc[:, "opa_id"] = pd.to_numeric(self.gdf["opa_id"], errors="coerce")
         self.gdf = self.gdf.dropna(subset=["opa_id"])
 
         # Perform the merge
@@ -253,13 +264,14 @@ def opa_join(self, other_df, opa_column):
 
         # Check if 'geometry' column exists in both dataframes and clean up
         if "geometry_x" in joined.columns and "geometry_y" in joined.columns:
-            joined = joined.drop(columns=["geometry_y"])
+            joined = joined.drop(columns=["geometry_y"]).copy()  # Ensure a full copy
             joined = joined.rename(columns={"geometry_x": "geometry"})
 
         if opa_column != "opa_id":
             joined = joined.drop(columns=[opa_column])
 
-        self.gdf = joined
+        # Assign the joined DataFrame to self.gdf as a full copy
+        self.gdf = joined.copy()
         self.rebuild_gdf()
 
     def rebuild_gdf(self):
@@ -270,7 +282,7 @@ def create_centroid_gdf(self):
         Convert the geometry of the GeoDataFrame to centroids.
         """
         self.centroid_gdf = self.gdf.copy()
-        self.centroid_gdf["geometry"] = self.gdf["geometry"].centroid
+        self.centroid_gdf.loc[:, "geometry"] = self.gdf["geometry"].centroid
 
     def build_and_publish_pmtiles(self, tileset_id):
         zoom_threshold = 13
@@ -336,17 +348,19 @@ def build_and_publish_pmtiles(self, tileset_id):
             subprocess.run(command)
 
         write_files = [f"{tileset_id}_staging.pmtiles"]
-        
+
         if write_production_tiles_file:
             write_files.append(f"{tileset_id}.pmtiles")
-        
+
         # check whether the temp saved tiles files is big enough.
         # If not then it might be corrupted so log error and don't upload to gcp.
         file_size = os.stat(temp_merged_pmtiles).st_size
         if file_size < min_tiles_file_size_in_bytes:
-            raise ValueError(f"{temp_merged_pmtiles} is {file_size} bytes in size but should be at least {min_tiles_file_size_in_bytes}.  Therefore, we are not uploading any files to the GCP bucket.  The file may be corrupt or incomplete.")
-
+            raise ValueError(
+                f"{temp_merged_pmtiles} is {file_size} bytes in size but should be at least {min_tiles_file_size_in_bytes}.  Therefore, we are not uploading any files to the GCP bucket.  The file may be corrupt or incomplete."
+            )
+
         # Upload to Google Cloud Storage
         for file in write_files:
             blob = bucket.blob(file)
-            blob.upload_from_filename(temp_merged_pmtiles)
+            blob.upload_from_filename(temp_merged_pmtiles)
diff --git a/data/src/data_utils/city_owned_properties.py b/data/src/data_utils/city_owned_properties.py
@@ -18,26 +18,46 @@ def city_owned_properties(primary_featurelayer):
         "sideyardeligible": "side_yard_eligible",
     }
     primary_featurelayer.gdf.rename(columns=rename_columns, inplace=True)
-
-    primary_featurelayer.gdf.loc[primary_featurelayer.gdf['owner_1'].isin(["PHILADELPHIA HOUSING AUTH", "PHILADELPHIA LAND BANK", "REDEVELOPMENT AUTHORITY", "PHILA REDEVELOPMENT AUTH"]), 'city_owner_agency'] = primary_featurelayer.gdf["owner_1"].replace({
-        "PHILADELPHIA HOUSING AUTH": "PHA",
-        "PHILADELPHIA LAND BANK": "Land Bank (PHDC)",
-        "REDEVELOPMENT AUTHORITY": "PRA",
-        "PHILA REDEVELOPMENT AUTH": "PRA"
-    })
 
     primary_featurelayer.gdf.loc[
-        (primary_featurelayer.gdf['owner_1'] == "CITY OF PHILA") & 
-        (primary_featurelayer.gdf['owner_2'].str.contains("PUBLIC PROP|PUBLC PROP", na=False)), 
-        'city_owner_agency'
+        primary_featurelayer.gdf["owner_1"].isin(
+            [
+                "PHILADELPHIA HOUSING AUTH",
+                "PHILADELPHIA LAND BANK",
+                "REDEVELOPMENT AUTHORITY",
+                "PHILA REDEVELOPMENT AUTH",
+            ]
+        ),
+        "city_owner_agency",
+    ] = primary_featurelayer.gdf["owner_1"].replace(
+        {
+            "PHILADELPHIA HOUSING AUTH": "PHA",
+            "PHILADELPHIA LAND BANK": "Land Bank (PHDC)",
+            "REDEVELOPMENT AUTHORITY": "PRA",
+            "PHILA REDEVELOPMENT AUTH": "PRA",
+        }
+    )
+
+    primary_featurelayer.gdf.loc[
+        (primary_featurelayer.gdf["owner_1"] == "CITY OF PHILA")
+        & (
+            primary_featurelayer.gdf["owner_2"].str.contains(
+                "PUBLIC PROP|PUBLC PROP", na=False
+            )
+        ),
+        "city_owner_agency",
     ] = "DPP"
-    
+
     primary_featurelayer.gdf.loc[
-        primary_featurelayer.gdf['owner_1'].isin(["CITY OF PHILADELPHIA", "CITY OF PHILA"]) & 
-        primary_featurelayer.gdf['owner_2'].isna(), 
-        'city_owner_agency'
+        primary_featurelayer.gdf["owner_1"].isin(
+            ["CITY OF PHILADELPHIA", "CITY OF PHILA"]
+        )
+        & primary_featurelayer.gdf["owner_2"].isna(),
+        "city_owner_agency",
     ] = "City of Philadelphia"
 
-    primary_featurelayer.gdf["side_yard_eligible"].fillna("No", inplace=True)
+    primary_featurelayer.gdf.loc[:, "side_yard_eligible"] = primary_featurelayer.gdf[
+        "side_yard_eligible"
+    ].fillna("No")
 
     return primary_featurelayer
diff --git a/data/src/data_utils/deliquencies.py b/data/src/data_utils/deliquencies.py
@@ -24,6 +24,8 @@ def deliquencies(primary_featurelayer):
         "opa_number",
     )
 
-    primary_featurelayer.gdf["sheriff_sale"].fillna("N", inplace=True)
+    primary_featurelayer.gdf.loc[:, "sheriff_sale"] = primary_featurelayer.gdf[
+        "sheriff_sale"
+    ].fillna("N")
 
     return primary_featurelayer