linted

BasisResearch · Oct 24, 2023 · 7185df8 · 7185df8
1 parent 713022f
commit 7185df8
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 26 deletions.
diff --git a/cities/utils/clean_spending_HHS.py b/cities/utils/clean_spending_HHS.py
@@ -109,10 +109,9 @@ def clean_spending_HHS():
 
     assert spending_HHS["GeoFIPS"].nunique() == spending_HHS["GeoName"].nunique()
     assert spending_HHS["GeoFIPS"].nunique() == gdp["GeoFIPS"].nunique()
-
-    # Assuming you have a DataFrame named 'your_dataframe'
-    spending_HHS = spending_HHS.rename(columns={'year': 'Year'})
 
+    # Assuming you have a DataFrame named 'your_dataframe'
+    spending_HHS = spending_HHS.rename(columns={"year": "Year"})
 
     # standardizing and saving
     spending_HHS_long = spending_HHS.copy()

diff --git a/cities/utils/clean_spending_commerce.py b/cities/utils/clean_spending_commerce.py
@@ -116,7 +116,7 @@ def clean_spending_commerce():
     )
     assert spending_commerce["GeoFIPS"].nunique() == gdp["GeoFIPS"].nunique()
 
-    spending_commerce = spending_commerce.rename(columns={'year': 'Year'})
+    spending_commerce = spending_commerce.rename(columns={"year": "Year"})
 
     # standardizing and saving
     spending_commerce_long = spending_commerce.copy()

diff --git a/cities/utils/clean_spending_transportation.py b/cities/utils/clean_spending_transportation.py
@@ -143,8 +143,8 @@ def clean_spending_transportation():
         == spending_transportation["GeoName"].nunique()
     )
     assert spending_transportation["GeoFIPS"].nunique() == gdp["GeoFIPS"].nunique()
-    
-    spending_transportation = spending_transportation.rename(columns={'year': 'Year'})
+
+    spending_transportation = spending_transportation.rename(columns={"year": "Year"})
 
     # standardizing and saving
     spending_transportation_long = spending_transportation.copy()

diff --git a/cities/utils/cleaning_utils.py b/cities/utils/cleaning_utils.py
@@ -1,11 +1,10 @@
 import os
+import re
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
-from pathlib import Path
-import re
-
 
 
 def find_repo_root() -> Path:
@@ -51,18 +50,17 @@ def standardize_and_scale(data: pd.DataFrame) -> pd.DataFrame:
 
 
 def list_available_features():
-
     root = find_repo_root()
     folder_path = f"{root}/data/processed"
     file_names = [f for f in os.listdir(folder_path) if f != ".gitkeep"]
     processed_file_names = []
 
     for file_name in file_names:
         # Use regular expressions to find the patterns and split accordingly
-        matches = re.split(r'_wide|_long|_std', file_name)
+        matches = re.split(r"_wide|_long|_std", file_name)
         if matches:
             processed_file_names.append(matches[0])
-        
+
     feature_names = list(set(processed_file_names))
 
     return feature_names
diff --git a/tests/test_cleaning_utils.py b/tests/test_cleaning_utils.py
@@ -4,8 +4,11 @@
 import numpy as np
 import pandas as pd
 
-from cities.utils.cleaning_utils import (standardize_and_scale,
-                                         list_available_features, find_repo_root)
+from cities.utils.cleaning_utils import (
+    find_repo_root,
+    list_available_features,
+    standardize_and_scale,
+)
 
 sys.path.insert(0, os.path.dirname(os.getcwd()))
 
@@ -19,13 +22,21 @@ def test_data_folder():
 
     for file_name in file_names:
         if file_name != ".gitkeep":
-            ends_with_allowed_extension = any(file_name.endswith(ext) for ext in allowed_extensions)
-            assert ends_with_allowed_extension, f"File '{file_name}' does not have an allowed extension."
-
+            ends_with_allowed_extension = any(
+                file_name.endswith(ext) for ext in allowed_extensions
+            )
+            assert (
+                ends_with_allowed_extension
+            ), f"File '{file_name}' does not have an allowed extension."
+
     all_features = list_available_features()
     for feature in all_features:
-        valid_files = [feature + ext for ext in allowed_extensions if feature + ext in file_names]
-        assert len(valid_files) == 4,  f"For feature '{feature}' some data formats are missing."
+        valid_files = [
+            feature + ext for ext in allowed_extensions if feature + ext in file_names
+        ]
+        assert (
+            len(valid_files) == 4
+        ), f"For feature '{feature}' some data formats are missing."
 
 
 # set up gdp data
@@ -52,7 +63,6 @@ def test_standardize_and_scale():
     assert gdp.shape == gdp_scaled.shape
 
 
-
 all_features = list_available_features()
 assert "spending_commerce" in all_features
 assert ".gitkeep" not in all_features
@@ -61,4 +71,4 @@ def test_standardize_and_scale():
     if item not in unique_features:
         unique_features.append(item)
 
-assert len(unique_features) == len(all_features)
+assert len(unique_features) == len(all_features)
diff --git a/tests/test_data_grabber.py b/tests/test_data_grabber.py
@@ -5,11 +5,6 @@
 from cities.utils.cleaning_utils import list_available_features
 from cities.utils.data_grabber import DataGrabber
 
-
-
-
-
-
 # features = [
 #     "gdp",
 #     "population",