Added census block aggregation area in the exposure data when using s…

…etup_equity
Deltares · Dec 14, 2023 · 4bbfd75 · 4bbfd75
1 parent 1b002c4
commit 4bbfd75
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 25 deletions.
diff --git a/hydromt_fiat/fiat.py b/hydromt_fiat/fiat.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from hydromt.models.model_grid import GridModel
 from shapely.geometry import box
-import os 
+import os
 import shutil
 
 from hydromt_fiat import DATADIR
@@ -71,7 +71,9 @@ def __init__(
         self.exposure = None
         self.vulnerability = None
         self.vf_ids_and_linking_df = pd.DataFrame()
-        self.additional_attributes_fn = ""  # Path or paths to the additional attributes dataset(s)
+        self.additional_attributes_fn = (
+            ""  # Path or paths to the additional attributes dataset(s)
+        )
         self.building_footprint_fn = ""  # Path to the building footprints dataset
 
     def setup_global_settings(
@@ -105,7 +107,6 @@ def setup_output(
         output_dir: str = "output",
         output_csv_name: str = "output.csv",
         output_vector_name: Union[str, List[str]] = "spatial.gpkg",
-
     ) -> None:
         """Setup Delft-FIAT output folder and files.
 
@@ -429,7 +430,7 @@ def update_max_potential_damage(
                 method=method,
                 max_dist=max_dist,
             )
-    
+
     def update_ground_elevation(
         self,
         ground_elevation: Union[int, float, None, str, Path],
@@ -505,7 +506,9 @@ def setup_hazard(
             # read maps and retrieve their attributes
             da_map_fn, da_name, da_type = read_maps(params, da_map_fn, idx)
 
-            da = self.data_catalog.get_rasterdataset(da_map_fn)  # removed geom=self.region because it is not always there
+            da = self.data_catalog.get_rasterdataset(
+                da_map_fn
+            )  # removed geom=self.region because it is not always there
 
             # Convert to units of the exposure data if required
             if (
@@ -761,37 +764,49 @@ def setup_equity_data(
 
         self.set_tables(df=equity.equity_data_shp, name="equity_data")
 
-        # Update (if necessary) the aggregation label: census block
         # Save the census block aggregation area data
+        block_groups = equity.get_block_groups()
+        self.set_geoms(block_groups, "aggregation_areas/block_groups")
+
+        # Update the aggregation label: census block
+        del self.exposure.exposure_db["Aggregation Label: Census Blockgroup"]
+        self.setup_aggregation_areas(
+            aggregation_area_fn=block_groups,
+            attribute_names="GEOID_short",
+            label_names="Aggregation Label: Census Blockgroup",
+        )
 
     def setup_aggregation_areas(
         self,
-        aggregation_area_fn: Union[List[str], List[Path], str, Path],
+        aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame], str, Path, gpd.GeoDataFrame],
         attribute_names: Union[List[str], str],
         label_names: Union[List[str], str],
     ):
         """_summary_
 
         Parameters
         ----------
-        exposure_gdf : gpd.GeoDataFrame
-            Exposure data to join the aggregation areas to as `label_names`.
         aggregation_area_fn : Union[List[str], List[Path], str, Path]
             Path(s) to the aggregation area(s).
         attribute_names : Union[List[str], str]
             Name of the attribute(s) to join.
         label_names : Union[List[str], str]
             The name that the new attribute will get in the exposure data.
         """
-        # TODO: add a census block option to automatically download and save
-        # the data
         exposure_gdf = self.exposure.get_full_gdf(self.exposure.exposure_db)
         self.exposure.exposure_db = join_exposure_aggregation_areas(
             exposure_gdf, aggregation_area_fn, attribute_names, label_names
         )
 
         # Set the additional_attributes_fn property to save the additional datasets
-        self.additional_attributes_fn = aggregation_area_fn
+        if isinstance(aggregation_area_fn, list):
+            the_type = type(aggregation_area_fn[0])
+        else:
+            the_type = type(aggregation_area_fn)
+        if the_type != gpd.GeoDataFrame:
+            # This copies data from one location to the root folder for the FIAT
+            # model, only use user-input data here (not the census blocks)
+            self.additional_attributes_fn = aggregation_area_fn
 
     def setup_building_footprint(
         self,
@@ -818,7 +833,7 @@ def setup_building_footprint(
             building_footprint_fn,
             attribute_name,
         )
-        
+
         # Set the building_footprint_fn property to save the building footprints
         self.building_footprint_fn = building_footprint_fn
 
@@ -953,10 +968,12 @@ def write(self):
             folder = Path(self.root).joinpath("additional_attributes")
             self.copy_datasets(self.additional_attributes_fn, folder)
         if self.building_footprint_fn:
-            folder = Path(self.root).joinpath("exposure" , "building_footprints")
+            folder = Path(self.root).joinpath("exposure", "building_footprints")
             self.copy_datasets(self.building_footprint_fn, folder)
 
-    def copy_datasets(self, data: Union[list, str, Path], folder: Union[Path, str]) -> None:
+    def copy_datasets(
+        self, data: Union[list, str, Path], folder: Union[Path, str]
+    ) -> None:
         """Copies datasets to another folder
 
         Parameters
@@ -966,7 +983,7 @@ def copy_datasets(self, data: Union[list, str, Path], folder: Union[Path, str])
         folder : Union[Path, str]
             _description_
         """
-        # Create additional attributes folder in root 
+        # Create additional attributes folder in root
         if not os.path.exists(folder):
             os.makedirs(folder)
 

diff --git a/hydromt_fiat/workflows/aggregation_areas.py b/hydromt_fiat/workflows/aggregation_areas.py
@@ -12,7 +12,7 @@ def process_value(value):
 
 def join_exposure_aggregation_multiple_areas(
     exposure_gdf: gpd.GeoDataFrame,
-    aggregation_area_fn: Union[List[str], List[Path]],
+    aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame]],
     attribute_names: List[str],
     label_names: List[str],
 ) -> gpd.GeoDataFrame:
@@ -35,7 +35,10 @@ def join_exposure_aggregation_multiple_areas(
         _description_
     """
     for file_path, attribute_name, label_name in zip(aggregation_area_fn, attribute_names, label_names):
-        aggregation_gdf = gpd.read_file(file_path)
+        if isinstance(file_path, str) or isinstance(file_path, Path):
+            aggregation_gdf = gpd.read_file(file_path)
+        else:
+            aggregation_gdf = file_path
 
         ## check the projection of both gdf and if not match, reproject
         if exposure_gdf.crs != aggregation_gdf.crs:
@@ -50,7 +53,7 @@ def join_exposure_aggregation_multiple_areas(
         exposure_gdf = gpd.sjoin(
             exposure_gdf,
             aggregation_gdf[["geometry", attribute_name]],
-            op="intersects",
+            predicate="intersects",
             how="left",
         )
 
@@ -69,7 +72,7 @@ def join_exposure_aggregation_multiple_areas(
 
 def join_exposure_aggregation_areas(
     exposure_gdf: gpd.GeoDataFrame,
-    aggregation_area_fn: Union[List[str], List[Path], str, Path],
+    aggregation_area_fn: Union[List[str], List[Path], List[gpd.GeoDataFrame], str, Path, gpd.GeoDataFrame],
     attribute_names: Union[List[str], str],
     label_names: Union[List[str], str],
 ) -> gpd.GeoDataFrame:
@@ -86,7 +89,7 @@ def join_exposure_aggregation_areas(
     label_names : Union[List[str], str]
         Name of the label(s) to join.
     """
-    if isinstance(aggregation_area_fn, str) or isinstance(aggregation_area_fn, Path):
+    if isinstance(aggregation_area_fn, str) or isinstance(aggregation_area_fn, Path) or isinstance(aggregation_area_fn, gpd.GeoDataFrame):
         aggregation_area_fn = [aggregation_area_fn]
     if isinstance(attribute_names, str):
         attribute_names = [attribute_names]

diff --git a/hydromt_fiat/workflows/equity_data.py b/hydromt_fiat/workflows/equity_data.py
@@ -9,6 +9,7 @@
 from zipfile import ZipFile
 from pathlib import Path
 from typing import List
+import shutil
 
 
 class EquityData:
@@ -25,7 +26,7 @@ def __init__(self, data_catalog: DataCatalog, logger: Logger, save_folder: str):
 
         self.pd_domain_scores_geo = pd.DataFrame()
         self.logger = logger
-        self.svi_data_shp = gpd.GeoDataFrame()
+        self.equity_data_shp = gpd.GeoDataFrame()
         self.block_groups = gpd.GeoDataFrame()
 
     def set_up_census_key(self, census_key: str):
@@ -177,12 +178,19 @@ def download_shp_geom(self, year_data: int, counties: List[str]):
 
             block_groups_shp = shp.dissolve(by=attrs, as_index=False)
             block_groups_shp = block_groups_shp[attrs + ["geometry"]]
-            block_groups_shp["GEOID_short"] = block_groups_shp['STATEFP' + code].astype(str) + block_groups_shp['COUNTYFP' + code].astype(str) + block_groups_shp['TRACTCE' + code].astype(str) + block_groups_shp['BLKGRPCE' + code].astype(str)
             block_groups_shp["GEO_ID"] = "1500000US" + block_groups_shp['STATEFP' + code].astype(str) + block_groups_shp['COUNTYFP' + code].astype(str) + block_groups_shp['TRACTCE' + code].astype(str) + block_groups_shp['BLKGRPCE' + code].astype(str)
+            block_groups_shp["GEOID_short"] = block_groups_shp["GEO_ID"].str.split("US").str[1]
             block_groups_list.append(block_groups_shp)
 
         self.block_groups = gpd.GeoDataFrame(pd.concat(block_groups_list))
 
+        # Delete the shapefile, that is not used anymore
+        shp_folder = Path(self.save_folder) / "shapefiles"
+        try:
+            shutil.rmtree(shp_folder)
+        except Exception as e:
+            self.logger.warning(f"Folder {shp_folder} cannot be removed: {e}")
+
     def merge_equity_data_shp(self):
         """Merges the geometry data with the equity_data downloaded"""
         self.equity_data_shp = self.pd_domain_scores_geo.merge(self.block_groups[["GEO_ID", "geometry"]], on="GEO_ID", how="left")
@@ -196,8 +204,9 @@ def merge_equity_data_shp(self):
             "The geometry information was successfully added to the equity information"
         )
 
-        aggregation_areas = self.block_groups[["GEOID_short", "geometry"]]
+    def get_block_groups(self):
+        return self.block_groups[["GEOID_short", "geometry"]]
 
     def clean(self):
         """Removes unnecessary columns"""
-        self.svi_data_shp = self.svi_data_shp[["GEOID_short", "TotalPopulationBG", "PerCapitaIncomeBG"]]
+        self.equity_data_shp = self.equity_data_shp[["GEOID_short", "TotalPopulationBG", "PerCapitaIncomeBG"]]