minor bug fixes

mheberger · Sep 21, 2023 · ae8575b · ae8575b
1 parent 0a3e6ca
commit ae8575b
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 35 deletions.
diff --git a/config.py b/config.py
@@ -11,13 +11,13 @@
 # Path to your CSV file with the watershed outlet data
 OUTLETS_CSV = 'outlets_sample.csv'
 
-# Set to True for "high-resolution" mode or False for "low-resolution."
+# Set to True for "higher resolution" mode or False for "lower resolution."
 HIGH_RES = True
 
 # Directory containing the MERIT basin-scale flow direction rasters (.tif).
 # Download from 
 # For all paths, do not include a trailing slash.
-MERIT_FDIR_DIR =  "data/raster/flowdir_basins"
+MERIT_FDIR_DIR = "data/raster/flowdir_basins"
 
 # Directory containing the MERIT the flow accumulation rasters (.tif files).
 MERIT_ACCUM_DIR = "data/raster/accum_basins"
@@ -32,21 +32,25 @@
 # Folder where you have stored the Merit-BASINS catchment shapefiles.
 HIGHRES_CATCHMENTS_DIR = "data/shp/merit_catchments"
 
-# Folder where you have stored the MERIT-Basins rivers shapefiles
-RIVERS_DIR = "data/shp/merit_rivers"
-
 # Location of simplified catchment boundaries. Download the files from
 # https://mghydro.org/watersheds/share/catchments_simplified.zip
 LOWRES_CATCHMENTS_DIR = "data/shp/catchments_simplified"
 
+# Folder where you have stored the MERIT-Basins rivers shapefiles
+RIVERS_DIR = "data/shp/merit_rivers"
+
+
 # Folder where the script will write the output GeoJSON files or shapefiles
 OUTPUT_DIR = "output"
 
 # The file extension will determine the types of files the script creates.
-# "geojson" for GeoJSON files (recommended) or "shp" for shapefiles
-# Use a blank string "" if you don't want any output 
-# (for example, you are only making the map)
-OUTPUT_EXT = "shp"
+#   "geojson" for GeoJSON files
+#   "shp" for shapefile
+#   "gpkg" for GeoPackage (recommended)
+# Use a blank string "" if you DO NOT want any output (for example,
+# you are only making the interactive map).  Other file formats are available;
+# see: https://geopandas.org/en/stable/docs/user_guide/io.html#writing-spatial-data
+OUTPUT_EXT = "gpkg"
 
 # Set to True to ouput a summary of the delineation in OUTPUT.CSV
 OUTPUT_CSV = True
@@ -57,45 +61,49 @@
 
 # If the requested watershed outlet is not inside a catchment, how far away 
 # from the point should we look for the nearest catchment (in degrees)
-SEARCH_DIST = 0.01
+SEARCH_DIST = 0.025
 
 # Watersheds created with Merit-Hydro data tend to have many "donut holes"
 # ranging from one or two pixels to much larger.
 FILL = True
 
 # If FILL = True, you many choose to to fill donut holes that are below a
-# certain size. This is the number of pixels, on the 3 arcsecond grid.
-FILL_THRESHOLD = 0
+# certain size. This is the number of pixels, on the 3 arcsecond grid. 
+# Set to 0 to fill ALL holes.
+FILL_THRESHOLD = 100
 
-# Simplify the watershed boundary? This will remove some vertices from the watershed boundary and output smaller files. 
+# Simplify the watershed boundary? This will remove some vertices 
+# from the watershed boundary and output smaller files.
 SIMPLIFY = False
 
-# If SIMPLIFY is True, set SIMPLIFY_TOLERANCE to a value in decimal degrees. Note that the vector polygons
+# If SIMPLIFY is True, set SIMPLIFY_TOLERANCE to a value in decimal degrees. 
+# Note that the vector polygons
 SIMPLIFY_TOLERANCE = 0.0008
 
 # Set to TRUE if you want the script to create a local web page where you 
 # can review the results
 MAKE_MAP = True
 
-# Folder where the script should put the map files.
+# Folder where the script should put the map files. (MAKE sure it exists!)
 # The mapping routine will make _viewer.html and .js files for every watershed
 MAP_FOLDER = "map"
 
 # On the map, do you also want to include the rivers?
 MAP_RIVERS = True
 
 # If you mapped the rivers, how many stream orders to include?
-# I recommend 4. More than this and the browser may not display all the rivers in a large watershed.
-NUM_STREAM_ORDERS = 4
+# I recommend 4 or 5. More than this and the browser may not display all the rivers in a large watershed.
+NUM_STREAM_ORDERS = 3
 
 # Set to True to use the experimental match areas feature. 
 # You must include watershed areas in your outlets CSV file to use this feature. 
 MATCH_AREAS = False
 
 # If you set MATCH_AREAS = True, how close of a match should the script look for?
 # Enter 0.25 for 25%. If you have not entered areas in your CSV file, you can ignore this parameter.
-AREA_MATCHING_THRESHOLD = 0.35
+AREA_MATCHING_THRESHOLD = 0.25
 
 # If you set MATCH_AREAS = True, how far away from the original outlet point should the script look 
 # for a river reach that is a better match in terms of upstream area?
-MAX_DIST = 0.15
+# Units are decimal degrees (not a proper distance measurement!)
+MAX_DIST = 0.075
diff --git a/delineate.py b/delineate.py
@@ -217,17 +217,19 @@ def plot_polys(polygons: list, wid: int):
     plt.close(fig)
 
 
-
 def delineate():
     """
-    Main watershed delineation routine
-    Make sure to set up the variables in `config.py` before running.
+    THIS is the Main watershed delineation routine
+    Make sure to set the variables in `config.py` before running.
+
     Reads a list of outlet points from a .csv file,
     then finds their watersheds or drainage basins,
     using hybrid of vector- and raster-based methods.
+
     Outputs geodata (.shp, .gpkg, etc.) and optionally a CSV file with a summary of results
-    (shows the watershed id, names, and areas)
-    and optionally creates an HTML page with a handy map viewer to review the results.
+    (shows the watershed id, names, and areas).
+
+    Optionally creates an HTML page with a handy map viewer to review the results.
     """
 
     # Regular expression used to find numbers so I can round lat, lng coordinates in GeoJSON files to make them smaller
@@ -439,6 +441,10 @@ def plot_basins():
         else:
             catchments_dir = LOWRES_CATCHMENTS_DIR
 
+        # TODO: Use the approach here:
+        #   https://stackoverflow.com/questions/76804871/create-save-and-load-spatial-index-using-geopandas
+        #   to pickle the geodataframe for future use, and check if there is a pickled file already
+
         catchments_shp = "{}/cat_pfaf_{}_MERIT_Hydro_v07_Basins_v01.shp".format(catchments_dir, basin)
 
         if not os.path.isfile(catchments_shp):
@@ -448,6 +454,8 @@ def plot_basins():
         catchments_gdf.set_index('COMID', inplace=True)
         catchments_gdf.to_crs(crs, inplace=True)
         print("  Building spatial index for catchments geodata in basin {}".format(basin))
+        # TODO: I am not using this spatial index for anything. It is not clear to me that
+        #   it is making a difference. I should benchmark this. 
         catchments_index = catchments_gdf.sindex
 
         # The network data is in the RIVERS file rather than the CATCHMENTS file
@@ -460,7 +468,8 @@ def plot_basins():
         rivers_gdf.set_index('COMID', inplace=True)
         rivers_sindex = rivers_gdf.sindex
 
-        # Spatial join to find which unit catchment our gage falls inside.
+        # Performa a Spatial join on gages (points) and unit catchments (polygons)
+        # to find the corresponding unit catchment for each gage
         # Adds the fields COMID and unitarea
         if VERBOSE: print("Performing spatial join on {} outlet points in basin #{}".format(num_gages_in_basin, basin))
         gages_in_basin.drop(['index_right'], axis=1, inplace=True)

diff --git a/py/fast_dissolve.py b/py/fast_dissolve.py
@@ -4,10 +4,10 @@
 For a layer with many polygons, it can be slow to dissolve to get the "outer boundary" or "outer perimeter"
 using GeoPandas
 
-I found a method that works a little more quickly.
+I found a method that works a little bit more quickly.
 
 (1) create a new rectangle out of the bounding box around all the features. 
-(2) clip the rectangle using the layer. 
+(2) clip the rectangle using the input layer (containing polygons).
 
 
 input: a geopandas dataframe with multiple polygons.
@@ -23,16 +23,20 @@
 
 
 def buffer(poly: Polygon) -> Polygon:
-    """ Little trick that works wonders to remove slivers, dangles
-    and other weird errors in a shapely polygon"""
+    """
+    Little trick that works wonders to remove slivers, dangles
+    and other weird errors in a shapely polygon. We do a series of
+    2 buffers, out and then in, and it magically fixes issues.
+
+    """
     dist = 0.00001
     return poly.buffer(dist, join_style=2).buffer(-dist, join_style=2)
 
 
-def close_holes(poly: Polygon or MultiPolygon, area_max: float) -> Polygon:
+def close_holes(poly: Polygon or MultiPolygon, area_max: float) -> Polygon or MultiPolygon:
     """
     Close polygon holes by limitation to the exterior ring.
-    Updated to accept MultiPolygon
+    Updated to accept a MultiPolygon as input
     Args:
         poly: Input shapely Polygon
         area_max: keep holes that are larger than this.
@@ -73,7 +77,7 @@ def close_holes(poly: Polygon or MultiPolygon, area_max: float) -> Polygon:
 
 
 
-def dissolve_shp(shp):
+def dissolve_shp(shp: str) -> gpd.GeoDataFrame:
     """
     input is the path to a shapefile on disk. 
     
@@ -84,12 +88,12 @@ def dissolve_shp(shp):
     return dissolve_geopandas(df)
 
 
-def fill_geopandas(df, area_max):
-    filled = df.geometry.apply(lambda p: close_holes(p, area_max))
+def fill_geopandas(gdf: gpd.GeoDataFrame, area_max: float) -> gpd.GeoDataFrame:
+    filled = gdf.geometry.apply(lambda p: close_holes(p, area_max))
     return filled
 
 
-def dissolve_geopandas(df):
+def dissolve_geopandas(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     """
     input is a Geopandas dataframe with multiple polygons that you want 
       to merge and dissolve into a single polygon