Skip to content

Commit

Permalink
linted
Browse files Browse the repository at this point in the history
  • Loading branch information
riadas committed Oct 24, 2023
1 parent 713022f commit 7185df8
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 26 deletions.
5 changes: 2 additions & 3 deletions cities/utils/clean_spending_HHS.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,9 @@ def clean_spending_HHS():

assert spending_HHS["GeoFIPS"].nunique() == spending_HHS["GeoName"].nunique()
assert spending_HHS["GeoFIPS"].nunique() == gdp["GeoFIPS"].nunique()

# Assuming you have a DataFrame named 'your_dataframe'
spending_HHS = spending_HHS.rename(columns={'year': 'Year'})

# Assuming you have a DataFrame named 'your_dataframe'
spending_HHS = spending_HHS.rename(columns={"year": "Year"})

# standardizing and saving
spending_HHS_long = spending_HHS.copy()
Expand Down
2 changes: 1 addition & 1 deletion cities/utils/clean_spending_commerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def clean_spending_commerce():
)
assert spending_commerce["GeoFIPS"].nunique() == gdp["GeoFIPS"].nunique()

spending_commerce = spending_commerce.rename(columns={'year': 'Year'})
spending_commerce = spending_commerce.rename(columns={"year": "Year"})

# standardizing and saving
spending_commerce_long = spending_commerce.copy()
Expand Down
4 changes: 2 additions & 2 deletions cities/utils/clean_spending_transportation.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ def clean_spending_transportation():
== spending_transportation["GeoName"].nunique()
)
assert spending_transportation["GeoFIPS"].nunique() == gdp["GeoFIPS"].nunique()
spending_transportation = spending_transportation.rename(columns={'year': 'Year'})

spending_transportation = spending_transportation.rename(columns={"year": "Year"})

# standardizing and saving
spending_transportation_long = spending_transportation.copy()
Expand Down
10 changes: 4 additions & 6 deletions cities/utils/cleaning_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import os
import re
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from pathlib import Path
import re



def find_repo_root() -> Path:
Expand Down Expand Up @@ -51,18 +50,17 @@ def standardize_and_scale(data: pd.DataFrame) -> pd.DataFrame:


def list_available_features():

root = find_repo_root()
folder_path = f"{root}/data/processed"
file_names = [f for f in os.listdir(folder_path) if f != ".gitkeep"]
processed_file_names = []

for file_name in file_names:
# Use regular expressions to find the patterns and split accordingly
matches = re.split(r'_wide|_long|_std', file_name)
matches = re.split(r"_wide|_long|_std", file_name)
if matches:
processed_file_names.append(matches[0])

feature_names = list(set(processed_file_names))

return feature_names
28 changes: 19 additions & 9 deletions tests/test_cleaning_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
import numpy as np
import pandas as pd

from cities.utils.cleaning_utils import (standardize_and_scale,
list_available_features, find_repo_root)
from cities.utils.cleaning_utils import (
find_repo_root,
list_available_features,
standardize_and_scale,
)

sys.path.insert(0, os.path.dirname(os.getcwd()))

Expand All @@ -19,13 +22,21 @@ def test_data_folder():

for file_name in file_names:
if file_name != ".gitkeep":
ends_with_allowed_extension = any(file_name.endswith(ext) for ext in allowed_extensions)
assert ends_with_allowed_extension, f"File '{file_name}' does not have an allowed extension."

ends_with_allowed_extension = any(
file_name.endswith(ext) for ext in allowed_extensions
)
assert (
ends_with_allowed_extension
), f"File '{file_name}' does not have an allowed extension."

all_features = list_available_features()
for feature in all_features:
valid_files = [feature + ext for ext in allowed_extensions if feature + ext in file_names]
assert len(valid_files) == 4, f"For feature '{feature}' some data formats are missing."
valid_files = [
feature + ext for ext in allowed_extensions if feature + ext in file_names
]
assert (
len(valid_files) == 4
), f"For feature '{feature}' some data formats are missing."


# set up gdp data
Expand All @@ -52,7 +63,6 @@ def test_standardize_and_scale():
assert gdp.shape == gdp_scaled.shape



all_features = list_available_features()
assert "spending_commerce" in all_features
assert ".gitkeep" not in all_features
Expand All @@ -61,4 +71,4 @@ def test_standardize_and_scale():
if item not in unique_features:
unique_features.append(item)

assert len(unique_features) == len(all_features)
assert len(unique_features) == len(all_features)
5 changes: 0 additions & 5 deletions tests/test_data_grabber.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@
from cities.utils.cleaning_utils import list_available_features
from cities.utils.data_grabber import DataGrabber






# features = [
# "gdp",
# "population",
Expand Down

0 comments on commit 7185df8

Please sign in to comment.