Skip to content

Commit

Permalink
minor bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
mheberger committed Sep 21, 2023
1 parent 0a3e6ca commit ae8575b
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 35 deletions.
46 changes: 27 additions & 19 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
# Path to your CSV file with the watershed outlet data
OUTLETS_CSV = 'outlets_sample.csv'

# Set to True for "high-resolution" mode or False for "low-resolution."
# Set to True for "higher resolution" mode or False for "lower resolution."
HIGH_RES = True

# Directory containing the MERIT basin-scale flow direction rasters (.tif).
# Download from
# For all paths, do not include a trailing slash.
MERIT_FDIR_DIR = "data/raster/flowdir_basins"
MERIT_FDIR_DIR = "data/raster/flowdir_basins"

# Directory containing the MERIT the flow accumulation rasters (.tif files).
MERIT_ACCUM_DIR = "data/raster/accum_basins"
Expand All @@ -32,21 +32,25 @@
# Folder where you have stored the Merit-BASINS catchment shapefiles.
HIGHRES_CATCHMENTS_DIR = "data/shp/merit_catchments"

# Folder where you have stored the MERIT-Basins rivers shapefiles
RIVERS_DIR = "data/shp/merit_rivers"

# Location of simplified catchment boundaries. Download the files from
# https://mghydro.org/watersheds/share/catchments_simplified.zip
LOWRES_CATCHMENTS_DIR = "data/shp/catchments_simplified"

# Folder where you have stored the MERIT-Basins rivers shapefiles
RIVERS_DIR = "data/shp/merit_rivers"


# Folder where the script will write the output GeoJSON files or shapefiles
OUTPUT_DIR = "output"

# The file extension will determine the types of files the script creates.
# "geojson" for GeoJSON files (recommended) or "shp" for shapefiles
# Use a blank string "" if you don't want any output
# (for example, you are only making the map)
OUTPUT_EXT = "shp"
# "geojson" for GeoJSON files
# "shp" for shapefile
# "gpkg" for GeoPackage (recommended)
# Use a blank string "" if you DO NOT want any output (for example,
# you are only making the interactive map). Other file formats are available;
# see: https://geopandas.org/en/stable/docs/user_guide/io.html#writing-spatial-data
OUTPUT_EXT = "gpkg"

# Set to True to ouput a summary of the delineation in OUTPUT.CSV
OUTPUT_CSV = True
Expand All @@ -57,45 +61,49 @@

# If the requested watershed outlet is not inside a catchment, how far away
# from the point should we look for the nearest catchment (in degrees)
SEARCH_DIST = 0.01
SEARCH_DIST = 0.025

# Watersheds created with Merit-Hydro data tend to have many "donut holes"
# ranging from one or two pixels to much larger.
FILL = True

# If FILL = True, you many choose to to fill donut holes that are below a
# certain size. This is the number of pixels, on the 3 arcsecond grid.
FILL_THRESHOLD = 0
# certain size. This is the number of pixels, on the 3 arcsecond grid.
# Set to 0 to fill ALL holes.
FILL_THRESHOLD = 100

# Simplify the watershed boundary? This will remove some vertices from the watershed boundary and output smaller files.
# Simplify the watershed boundary? This will remove some vertices
# from the watershed boundary and output smaller files.
SIMPLIFY = False

# If SIMPLIFY is True, set SIMPLIFY_TOLERANCE to a value in decimal degrees. Note that the vector polygons
# If SIMPLIFY is True, set SIMPLIFY_TOLERANCE to a value in decimal degrees.
# Note that the vector polygons
SIMPLIFY_TOLERANCE = 0.0008

# Set to TRUE if you want the script to create a local web page where you
# can review the results
MAKE_MAP = True

# Folder where the script should put the map files.
# Folder where the script should put the map files. (MAKE sure it exists!)
# The mapping routine will make _viewer.html and .js files for every watershed
MAP_FOLDER = "map"

# On the map, do you also want to include the rivers?
MAP_RIVERS = True

# If you mapped the rivers, how many stream orders to include?
# I recommend 4. More than this and the browser may not display all the rivers in a large watershed.
NUM_STREAM_ORDERS = 4
# I recommend 4 or 5. More than this and the browser may not display all the rivers in a large watershed.
NUM_STREAM_ORDERS = 3

# Set to True to use the experimental match areas feature.
# You must include watershed areas in your outlets CSV file to use this feature.
MATCH_AREAS = False

# If you set MATCH_AREAS = True, how close of a match should the script look for?
# Enter 0.25 for 25%. If you have not entered areas in your CSV file, you can ignore this parameter.
AREA_MATCHING_THRESHOLD = 0.35
AREA_MATCHING_THRESHOLD = 0.25

# If you set MATCH_AREAS = True, how far away from the original outlet point should the script look
# for a river reach that is a better match in terms of upstream area?
MAX_DIST = 0.15
# Units are decimal degrees (not a proper distance measurement!)
MAX_DIST = 0.075
21 changes: 15 additions & 6 deletions delineate.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,17 +217,19 @@ def plot_polys(polygons: list, wid: int):
plt.close(fig)



def delineate():
"""
Main watershed delineation routine
Make sure to set up the variables in `config.py` before running.
THIS is the Main watershed delineation routine
Make sure to set the variables in `config.py` before running.
Reads a list of outlet points from a .csv file,
then finds their watersheds or drainage basins,
using hybrid of vector- and raster-based methods.
Outputs geodata (.shp, .gpkg, etc.) and optionally a CSV file with a summary of results
(shows the watershed id, names, and areas)
and optionally creates an HTML page with a handy map viewer to review the results.
(shows the watershed id, names, and areas).
Optionally creates an HTML page with a handy map viewer to review the results.
"""

# Regular expression used to find numbers so I can round lat, lng coordinates in GeoJSON files to make them smaller
Expand Down Expand Up @@ -439,6 +441,10 @@ def plot_basins():
else:
catchments_dir = LOWRES_CATCHMENTS_DIR

# TODO: Use the approach here:
# https://stackoverflow.com/questions/76804871/create-save-and-load-spatial-index-using-geopandas
# to pickle the geodataframe for future use, and check if there is a pickled file already

catchments_shp = "{}/cat_pfaf_{}_MERIT_Hydro_v07_Basins_v01.shp".format(catchments_dir, basin)

if not os.path.isfile(catchments_shp):
Expand All @@ -448,6 +454,8 @@ def plot_basins():
catchments_gdf.set_index('COMID', inplace=True)
catchments_gdf.to_crs(crs, inplace=True)
print(" Building spatial index for catchments geodata in basin {}".format(basin))
# TODO: I am not using this spatial index for anything. It is not clear to me that
# it is making a difference. I should benchmark this.
catchments_index = catchments_gdf.sindex

# The network data is in the RIVERS file rather than the CATCHMENTS file
Expand All @@ -460,7 +468,8 @@ def plot_basins():
rivers_gdf.set_index('COMID', inplace=True)
rivers_sindex = rivers_gdf.sindex

# Spatial join to find which unit catchment our gage falls inside.
# Performa a Spatial join on gages (points) and unit catchments (polygons)
# to find the corresponding unit catchment for each gage
# Adds the fields COMID and unitarea
if VERBOSE: print("Performing spatial join on {} outlet points in basin #{}".format(num_gages_in_basin, basin))
gages_in_basin.drop(['index_right'], axis=1, inplace=True)
Expand Down
24 changes: 14 additions & 10 deletions py/fast_dissolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
For a layer with many polygons, it can be slow to dissolve to get the "outer boundary" or "outer perimeter"
using GeoPandas
I found a method that works a little more quickly.
I found a method that works a little bit more quickly.
(1) create a new rectangle out of the bounding box around all the features.
(2) clip the rectangle using the layer.
(2) clip the rectangle using the input layer (containing polygons).
input: a geopandas dataframe with multiple polygons.
Expand All @@ -23,16 +23,20 @@


def buffer(poly: Polygon) -> Polygon:
""" Little trick that works wonders to remove slivers, dangles
and other weird errors in a shapely polygon"""
"""
Little trick that works wonders to remove slivers, dangles
and other weird errors in a shapely polygon. We do a series of
2 buffers, out and then in, and it magically fixes issues.
"""
dist = 0.00001
return poly.buffer(dist, join_style=2).buffer(-dist, join_style=2)


def close_holes(poly: Polygon or MultiPolygon, area_max: float) -> Polygon:
def close_holes(poly: Polygon or MultiPolygon, area_max: float) -> Polygon or MultiPolygon:
"""
Close polygon holes by limitation to the exterior ring.
Updated to accept MultiPolygon
Updated to accept a MultiPolygon as input
Args:
poly: Input shapely Polygon
area_max: keep holes that are larger than this.
Expand Down Expand Up @@ -73,7 +77,7 @@ def close_holes(poly: Polygon or MultiPolygon, area_max: float) -> Polygon:



def dissolve_shp(shp):
def dissolve_shp(shp: str) -> gpd.GeoDataFrame:
"""
input is the path to a shapefile on disk.
Expand All @@ -84,12 +88,12 @@ def dissolve_shp(shp):
return dissolve_geopandas(df)


def fill_geopandas(df, area_max):
filled = df.geometry.apply(lambda p: close_holes(p, area_max))
def fill_geopandas(gdf: gpd.GeoDataFrame, area_max: float) -> gpd.GeoDataFrame:
filled = gdf.geometry.apply(lambda p: close_holes(p, area_max))
return filled


def dissolve_geopandas(df):
def dissolve_geopandas(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""
input is a Geopandas dataframe with multiple polygons that you want
to merge and dissolve into a single polygon
Expand Down

0 comments on commit ae8575b

Please sign in to comment.