Ignore ruff less (#119)

This removes a bunch of ruff ignores, with partly automatic and partly manual fixes.
Deltares · Jul 30, 2024 · 6ea29f2 · 6ea29f2
1 parent 9c5af32
commit 6ea29f2
Show file tree

Hide file tree

Showing 75 changed files with 258 additions and 551 deletions.
diff --git a/ruff.toml b/ruff.toml
@@ -5,7 +5,23 @@ line-length = 120
 [lint]
 # See https://docs.astral.sh/ruff/rules/
 select = ["D", "E", "F", "NPY", "PD", "C4", "I", "UP"]
-ignore = ["D1", "D202", "D205", "D400", "D401", "D404", "E402", "E501", "E712", "E721", "F401", "F403", "F405", "PD002", "PD003", "PD008", "PD009", "PD010", "PD011", "PD013", "PD015", "PD901"]
+ignore = [
+    "D1",
+    "D400",
+    "D401",
+    "E501", # TODO https://docs.astral.sh/ruff/rules/line-too-long/
+    "F403", # TODO https://docs.astral.sh/ruff/rules/undefined-local-with-import-star/
+    "F405", # TODO https://docs.astral.sh/ruff/rules/undefined-local-with-import-star-usage/
+    "PD002",
+    "PD003",
+    "PD008",
+    "PD009",
+    "PD010",
+    "PD011",
+    "PD013",
+    "PD015",
+    "PD901",
+]
 fixable = ["ALL"]
 
 [lint.pydocstyle]

diff --git a/scripts/notebooks/hydamo_0_analyse_data_waterboard.ipynb b/scripts/notebooks/hydamo_0_analyse_data_waterboard.ipynb
@@ -11,15 +11,12 @@
    "source": [
     "from pathlib import Path\n",
     "\n",
-    "import geopandas as gpd\n",
-    "import numpy as np\n",
     "import pandas as pd\n",
     "from hydamo.datamodel import HyDAMO\n",
     "from pandas_xlsx_tables import xlsx_tables_to_dfs\n",
     "from ribasim_lumping_tools.LHM_data_bewerking_analyse_utils import (\n",
     "    check_ids_hydamo_data,\n",
     "    check_if_object_on_hydroobject,\n",
-    "    export_to_geopackage,\n",
     "    read_original_data,\n",
     "    translate_data_to_hydamo_format,\n",
     ")"

diff --git a/scripts/notebooks/hydamo_1_process_hydamo_data.ipynb b/scripts/notebooks/hydamo_1_process_hydamo_data.ipynb
@@ -25,11 +25,10 @@
     "import geopandas as gpd\n",
     "import numpy as np\n",
     "import pandas as pd\n",
+    "from hydamo_preprocessing.preprocessing import preprocess_hydamo_hydroobjects\n",
     "\n",
     "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n",
-    "warnings.simplefilter(action=\"ignore\", category=FutureWarning)\n",
-    "\n",
-    "from hydamo_preprocessing.preprocessing import preprocess_hydamo_hydroobjects"
+    "warnings.simplefilter(action=\"ignore\", category=FutureWarning)"
    ]
   },
   {

diff --git a/scripts/notebooks/hydamo_2_run_ribasim_lumping_waterboard.ipynb b/scripts/notebooks/hydamo_2_run_ribasim_lumping_waterboard.ipynb
@@ -6,13 +6,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
     "import warnings\n",
     "from pathlib import Path\n",
     "\n",
-    "import geopandas as gpd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
     "import pandas as pd\n",
     "from ribasim_lumping_tools.run_ribasim_lumping_waterboard import run_ribasim_lumping_for_waterboard\n",
     "\n",

diff --git a/scripts/notebooks/hydamo_3_create_ribasim_model_networks.ipynb b/scripts/notebooks/hydamo_3_create_ribasim_model_networks.ipynb
@@ -10,11 +10,7 @@
     "from pathlib import Path\n",
     "\n",
     "import geopandas as gpd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import networkx as nx\n",
-    "import numpy as np\n",
     "import pandas as pd\n",
-    "import ribasim\n",
     "\n",
     "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n",
     "warnings.simplefilter(action=\"ignore\", category=FutureWarning)"

diff --git a/scripts/notebooks/hydamo_4_create_dummy_ribasim_models.ipynb b/scripts/notebooks/hydamo_4_create_dummy_ribasim_models.ipynb
@@ -12,11 +12,7 @@
     "\n",
     "import fiona\n",
     "import geopandas as gpd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import networkx as nx\n",
-    "import numpy as np\n",
     "import pandas as pd\n",
-    "import ribasim\n",
     "from ribasim_lumping_tools.default_model import DEFAULTS, default_model\n",
     "\n",
     "warnings.simplefilter(action=\"ignore\", category=UserWarning)\n",

diff --git a/scripts/notebooks/hydamo_preprocessing/general.py b/scripts/notebooks/hydamo_preprocessing/general.py
@@ -1,16 +1,12 @@
 import datetime
 import itertools
 import logging
-import os
 import time
 import warnings
 
 import geopandas as gpd
-import networkx as nx
-import numpy as np
 import pandas as pd
-from shapely.geometry import LineString, Point, Polygon
-from shapely.ops import snap, split
+from shapely.geometry import LineString, Point
 
 # %% Monitoring
 
@@ -76,9 +72,7 @@ def get_endpoints_from_lines(lines: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
 
 def add_point_to_linestring(point: Point, linestring: LineString) -> LineString:
     """
-    Inserts point into a linestring, placing the point next to its
-    nearest neighboring point in a way that minimizes the total length
-    of the linestring.
+    Inserts point into a linestring, placing the point next to its nearest neighboring point in a way that minimizes the total length of the linestring.
 
     Args:
         point (Point): point
@@ -108,8 +102,7 @@ def add_point_to_linestring(point: Point, linestring: LineString) -> LineString:
 
 def split_linestring_by_indices(linestring: LineString, split_indices: list) -> list:
     """
-    Divides a linestring into multiple linestrings based on a list that contains
-    the indices of the split points within the linestring
+    Divides a linestring into multiple linestrings based on a list that contains the indices of the split points within the linestring.
 
     Args:
         linestring (LineString): Linestring
@@ -129,8 +122,9 @@ def split_linestring_by_indices(linestring: LineString, split_indices: list) ->
 
 def remove_duplicate_split_lines(lines: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     """
-    Removes duplicates of split lines from line feature vector dataset. Duplicates
-    are removed from a subselection from the line feature dataset that contains
+    Removes duplicates of split lines from line feature vector dataset.
+
+    Duplicates are removed from a subselection from the line feature dataset that contains
     all line features that have been split.
 
     Args:
@@ -160,8 +154,9 @@ def remove_duplicate_split_lines(lines: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
 
 def connect_lines_by_endpoints(split_endpoints: gpd.GeoDataFrame, lines: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
     """
-    Connects boundary lines to other lines, based on instructions for each line endpoint. Connections are
-    created by inserting the endpoints into their target lines. The target line features are
+    Connects boundary lines to other lines, based on instructions for each line endpoint.
+
+    Connections are created by inserting the endpoints into their target lines. The target line features are
     split afterwards in order to create nodes at the intersection of the connected linestrings.
 
     Args:
@@ -172,7 +167,6 @@ def connect_lines_by_endpoints(split_endpoints: gpd.GeoDataFrame, lines: gpd.Geo
     -------
         gpd.GeoDataFrame: line feature dataframe
     """
-
     listed_lines = list(itertools.chain.from_iterable(split_endpoints["target_lines"]))
     listed_points = list(itertools.chain.from_iterable(split_endpoints["points_to_target_lines"]))
     connections_to_create = pd.DataFrame({"lines": listed_lines, "point": listed_points})
@@ -193,12 +187,12 @@ def connect_lines_by_endpoints(split_endpoints: gpd.GeoDataFrame, lines: gpd.Geo
             (modified_linestring, index_nearest_neighbour) = add_point_to_linestring(Point(node), linestring)
 
             if index_nearest_neighbour == 0 and linestring.coords[0] in list(
-                connections_to_create.loc[connections_to_create["inserted"] == True, "point"].values
+                connections_to_create.loc[connections_to_create["inserted"], "point"].values
             ):
                 continue
 
             elif index_nearest_neighbour == len(linestring.coords) - 1 and linestring.coords[-1] in list(
-                connections_to_create.loc[connections_to_create["inserted"] == True, "point"].values
+                connections_to_create.loc[connections_to_create["inserted"], "point"].values
             ):
                 continue
 
@@ -231,9 +225,9 @@ def connect_lines_by_endpoints(split_endpoints: gpd.GeoDataFrame, lines: gpd.Geo
 
 def connect_endpoints_by_buffer(lines: gpd.GeoDataFrame, buffer_distance: float = 0.5) -> gpd.GeoDataFrame:
     """
-    Connects boundary line endpoints within a vector dataset to neighbouring lines that pass
-    within a specified buffer distance with respect to the the boundary endpoints. The boundary
-    endpoints are inserted into the passing linestrings
+    Connects boundary line endpoints to neighbouring lines that pass within a specified buffer distance with respect to the the boundary endpoints.
+
+    The boundary endpoints are inserted into the passing linestrings
 
     Args:
         lines (gpd.GeoDataFrame): Line vector data
@@ -281,9 +275,9 @@ def connect_endpoints_by_buffer(lines: gpd.GeoDataFrame, buffer_distance: float
         boundary_endpoints["crossed_by_unconnected_lines"] = boundary_endpoints.apply(
             lambda x: True in [True not in y for y in x["start_or_endpoint_overlaying_line_buffers"]], axis=1
         )
-        unconnected_endpoints = boundary_endpoints[
-            boundary_endpoints["crossed_by_unconnected_lines"] == True
-        ].reset_index(drop=True)
+        unconnected_endpoints = boundary_endpoints[boundary_endpoints["crossed_by_unconnected_lines"]].reset_index(
+            drop=True
+        )
         unconnected_endpoints["target_lines"] = unconnected_endpoints.apply(
             lambda x: [
                 x["overlaying_line_buffers"][i]
@@ -323,14 +317,14 @@ def connect_endpoints_by_buffer(lines: gpd.GeoDataFrame, buffer_distance: float
 
 
 def add_overlapping_polygons(
-    left_geodataframe: gpd.GeoDataFrame(),
-    right_geodataframe: gpd.GeoDataFrame(),
+    left_geodataframe: gpd.GeoDataFrame,
+    right_geodataframe: gpd.GeoDataFrame,
     left_id_column: str,
     right_id_column: str,
 ):
     """
-    creates a column in a left geodataframe where it lists the overlapping
-    polygons from the right geodataframe for each polygon in the left geodataframe.
+    Create a column in a left geodataframe where it lists the overlapping polygons from the right geodataframe for each polygon in the left geodataframe.
+
     The id columns of the right and left dataframe have to be defined.
 
     Parameters
@@ -352,7 +346,6 @@ def add_overlapping_polygons(
         insights in the overlapping polygons from the right dataframe
 
     """
-
     # Calculate total areas of left and right polygons
     left_geodataframe["left_area"] = left_geodataframe["geometry"].apply(lambda x: x.area)
     right_geodataframe["surface_area"] = right_geodataframe.area
@@ -409,8 +402,8 @@ def get_most_overlapping_polygon(
     right_id_column: str,
 ):
     """
-    creates a column in a left geodataframe that contains IDs of the most overlapping
-    polygon from the right geodataframe based on their geometries.
+    Create a column in a left geodataframe that contains IDs of the most overlapping polygon from the right geodataframe based on their geometries.
+
     The id columns of the left and right dataframe have to be defined.
 
     Parameters
@@ -431,7 +424,6 @@ def get_most_overlapping_polygon(
         the updated left geodataframe
 
     """
-
     left_geodataframe = add_overlapping_polygons(left_geodataframe, right_geodataframe, left_id_column, right_id_column)
 
     left_geodataframe["overlapping_areas"] = left_geodataframe["overlapping_areas"].apply(lambda x: pd.DataFrame(x))
@@ -511,15 +503,15 @@ def get_most_intersecting(gdf, polygon, left_id):
             return None
 
     left_gdf = get_touching_polygons_from_within_gdf(left_gdf, left_id)
-    if type(right_gdf) == gpd.GeoDataFrame:
+    if isinstance(right_gdf, gpd.GeoDataFrame):
         left_gdf = get_most_overlapping_polygon_from_other_gdf(left_gdf, right_gdf, left_id, right_id)
         left_gdf["right_id"][left_gdf[left_id] == left_gdf["right_id"]] = None
     left_gdf["touching_polygons"] = left_gdf["touching_polygons"].apply(
         lambda x: pd.DataFrame(left_gdf[left_gdf[left_id].isin(x)])
     )
     left_gdf["touching_polygons"] = left_gdf["touching_polygons"].apply(lambda x: x[x["basin"] is not None])
-    left_gdf["touching_polygons"] = left_gdf["touching_polygons"].apply(lambda x: x[x["basin"].isna() == False])
-    if type(right_gdf) == gpd.GeoDataFrame:
+    left_gdf["touching_polygons"] = left_gdf["touching_polygons"].apply(lambda x: x[x["basin"].notna()])
+    if isinstance(right_gdf, gpd.GeoDataFrame):
         left_gdf["touching_polygons"] = left_gdf.apply(
             lambda x: x["touching_polygons"][x["touching_polygons"]["right_id"] == x["right_id"]]
             if x["right_id"] is not None

diff --git a/scripts/notebooks/hydamo_preprocessing/preprocessing.py b/scripts/notebooks/hydamo_preprocessing/preprocessing.py
@@ -1,27 +1,14 @@
-import datetime
-import itertools
 import logging
-import os
 import time
 import warnings
 
 import geopandas as gpd
-import networkx as nx
-import numpy as np
 import pandas as pd
-from shapely.geometry import LineString, Point, Polygon
-from shapely.ops import snap, split
 
 from .general import (
-    add_overlapping_polygons,
-    add_point_to_linestring,
     connect_endpoints_by_buffer,
-    connect_lines_by_endpoints,
-    get_endpoints_from_lines,
     get_most_overlapping_polygon,
-    remove_duplicate_split_lines,
     report_time_interval,
-    split_linestring_by_indices,
 )
 
 # %% wfd
@@ -36,7 +23,7 @@ def add_wfd_id_to_hydroobjects(
     overlap_ratio: float = 0.9,
 ) -> gpd.GeoDataFrame:
     """
-
+    Assign Water Framework Directive (WFD) waterbody ID to hydroobjects based on overlap ratio.
 
     Parameters
     ----------
@@ -58,7 +45,6 @@ def add_wfd_id_to_hydroobjects(
     GeoDataFrame that contains hydroobjects with their assigned wfd body id
 
     """
-
     warnings.filterwarnings("ignore")
     start_time = time.time()
 
@@ -94,7 +80,7 @@ def add_wfd_id_to_hydroobjects(
         lambda x: x["most_overlapping_polygon_id"]
         if x["most_overlapping_polygon_area"] > overlap_ratio * x["left_area"]
         else None
-        if type(x["most_overlapping_polygon_area"]) == float
+        if isinstance(x["most_overlapping_polygon_area"], float)
         else None,
         axis=1,
     )
@@ -105,8 +91,8 @@ def add_wfd_id_to_hydroobjects(
     end_time = time.time()
     passed_time = report_time_interval(start_time, end_time)
 
-    nr_hydroobjects_wfd = len(hydroobjects[hydroobjects[wfd_id_column].isna() == False][wfd_id_column])
-    nr_unique_wfd_ids = len(hydroobjects[hydroobjects[wfd_id_column].isna() == False][wfd_id_column].unique())
+    nr_hydroobjects_wfd = len(hydroobjects[hydroobjects[wfd_id_column].notna()][wfd_id_column])
+    nr_unique_wfd_ids = len(hydroobjects[hydroobjects[wfd_id_column].notna()][wfd_id_column].unique())
 
     hydroobjects = hydroobjects.drop(
         columns=[
@@ -143,6 +129,7 @@ def preprocess_hydamo_hydroobjects(
     overlap_ratio_wfd: float = 0.9,
 ) -> gpd.GeoDataFrame:
     """
+    Preprocess hydamo hydroobjects.
 
     Parameters
     ----------

diff --git a/scripts/notebooks/ribasim_lumping_tools/run_ribasim_lumping_waterboard.py b/scripts/notebooks/ribasim_lumping_tools/run_ribasim_lumping_waterboard.py
@@ -10,9 +10,6 @@
 import warnings
 from pathlib import Path
 
-import geopandas as gpd
-import matplotlib.pyplot as plt
-import numpy as np
 import pandas as pd
 from ribasim_lumping import create_ribasim_lumping_network
 

diff --git a/scripts/ribasim_lumping/__init__.py b/scripts/ribasim_lumping/__init__.py
@@ -1 +1,3 @@
 from ribasim_lumping.ribasim_lumping import create_ribasim_lumping_network
+
+__all__ = ["create_ribasim_lumping_network"]
diff --git a/scripts/ribasim_lumping/dhydro/read_dhydro_network.py b/scripts/ribasim_lumping/dhydro/read_dhydro_network.py
@@ -1,5 +1,6 @@
 """
 Read network locations from D-Hydro simulation
+
 Harm Nomden (Sweco)
 """
 
@@ -202,7 +203,9 @@ def get_dhydro_structures_locations(structures_file: Path, branches_gdf: gpd.Geo
 
 
 def check_number_of_pumps_at_pumping_station(pumps_gdf: gpd.GeoDataFrame, set_name: str):
-    """Check number of pumps at pumping station and combine them into one representative pump
+    """
+    Check number of pumps at pumping station and combine them into one representative pump.
+
     Input:  Geodataframe with pumps with multiple per location
     Output: Geodataframe with one pump per location.
     Total capacity (sum), Max start level, Min stop level
@@ -241,7 +244,6 @@ def check_number_of_pumps_at_pumping_station(pumps_gdf: gpd.GeoDataFrame, set_na
 
 def split_dhydro_structures(structures_gdf: gpd.GeoDataFrame, set_name: str):
     """Get all DHydro structures dataframes"""
-
     list_structure_types = list(structures_gdf["object_type"].unique())
     structures_gdf_dict = {}
     for structure_type in list_structure_types:

diff --git a/scripts/ribasim_lumping/dhydro/read_dhydro_simulations.py b/scripts/ribasim_lumping/dhydro/read_dhydro_simulations.py
@@ -90,7 +90,9 @@ def add_dhydro_simulation_data(
     his_data: xr.Dataset = None,
     map_data: xu.UgridDataset = None,
 ):
-    """Receives his- and map-data. calculations should be placed in dhydro_results_dir
+    """
+    Receives his- and map-data. calculations should be placed in dhydro_results_dir.
+
     - set_name
     - within directory: simulations_dir
     - at timestamps: simulations_ts