Skip to content

Commit

Permalink
General cleanup (#244)
Browse files Browse the repository at this point in the history
* Remove mutable defaults

* Remove unnecessary else

* Use getattr instead of __getattribute__

* Rename for improved clarity

* Use dict comprehension

* Update test data for indent

* Use error instead of e

* Use getattr instaed of __getattr__
  • Loading branch information
phackstock authored May 17, 2023
1 parent d9de678 commit b76b28f
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 63 deletions.
4 changes: 3 additions & 1 deletion nomenclature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
__version__ = version("nomenclature-iamc")


def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=[]):
def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=None):
"""Parses an xlsx file with a codelist and writes a yaml file
Parameters
Expand All @@ -46,6 +46,8 @@ def create_yaml_from_xlsx(source, target, sheet_name, col, attrs=[]):
attrs : list, optional
Columns from `sheet_name` to use as attributes.
"""
if attrs is None:
attrs = []
SPECIAL_CODELIST.get(col.lower(), CodeList).read_excel(
name="", source=source, sheet_name=sheet_name, col=col, attrs=attrs
).to_yaml(target)
28 changes: 14 additions & 14 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
from pathlib import Path
from typing import ClassVar, Dict, List

import pandas as pd
import numpy as np
import pandas as pd
import yaml
import logging
from jsonschema import validate
from pyam.utils import write_sheet
from pydantic import BaseModel, validator
Expand All @@ -17,18 +17,17 @@
VariableRenameTargetError,
)


here = Path(__file__).parent.absolute()


def read_validation_schema(i):
with open(here / "validation_schemas" / f"{i}_schema.yaml", "r") as f:
def read_validation_schema(schema):
with open(here / "validation_schemas" / f"{schema}_schema.yaml", "r") as f:
schema = yaml.safe_load(f)
return schema


SCHEMA_TYPES = ("variable", "tag", "region", "generic")
SCHEMA_MAPPING = dict([(i, read_validation_schema(i)) for i in SCHEMA_TYPES])
SCHEMA_MAPPING = {schema: read_validation_schema(schema) for schema in SCHEMA_TYPES}


class CodeList(BaseModel):
Expand Down Expand Up @@ -223,7 +222,7 @@ def from_directory(cls, name: str, path: Path, file_glob_pattern: str = "**/*"):
return cls(name=name, mapping=mapping)

@classmethod
def read_excel(cls, name, source, sheet_name, col, attrs=[]):
def read_excel(cls, name, source, sheet_name, col, attrs=None):
"""Parses an xlsx file with a codelist
Parameters
Expand All @@ -239,6 +238,8 @@ def read_excel(cls, name, source, sheet_name, col, attrs=[]):
attrs : list, optional
Columns from `sheet_name` to use as attributes.
"""
if attrs is None:
attrs = []
codelist = pd.read_excel(source, sheet_name=sheet_name, usecols=[col] + attrs)

# replace nan with None
Expand Down Expand Up @@ -274,8 +275,8 @@ def to_yaml(self, path=None):
"""

class Dumper(yaml.Dumper):
def increase_indent(self, flow=False, *args, **kwargs):
return super().increase_indent(flow=flow, indentless=False)
def increase_indent(self, flow: bool = False, indentless: bool = False):
return super().increase_indent(flow=flow, indentless=indentless)

# translate to list of nested dicts, replace None by empty field, write to file
stream = (
Expand All @@ -288,11 +289,10 @@ def increase_indent(self, flow=False, *args, **kwargs):
.replace(": nan\n", ":\n")
)

if path is not None:
with open(path, "w") as file:
file.write(stream)
else:
if path is None:
return stream
with open(path, "w") as file:
file.write(stream)

def to_pandas(self, sort_by_code: bool = False) -> pd.DataFrame:
"""Export the CodeList to a :class:`pandas.DataFrame`
Expand Down Expand Up @@ -585,7 +585,7 @@ def hierarchy(self) -> List[str]:
List[str]
"""
return sorted(list(set(v.hierarchy for v in self.mapping.values())))
return sorted(list({v.hierarchy for v in self.mapping.values()}))


class MetaCodeList(CodeList):
Expand Down
2 changes: 1 addition & 1 deletion nomenclature/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
logger = logging.getLogger(__name__)


@validate_arguments(config=dict(arbitrary_types_allowed=True))
@validate_arguments(config={"arbitrary_types_allowed": True})
def process(
df: pyam.IamDataFrame,
dsd: DataStructureDefinition,
Expand Down
68 changes: 35 additions & 33 deletions nomenclature/processor/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,9 @@ def is_single_constituent_region(self):
def rename_dict(self):
if self.is_single_constituent_region:
return {self.constituent_regions[0]: self.name}
else:
raise AttributeError(
"rename_dict is only available for single constituent regions"
)
raise AttributeError(
"rename_dict is only available for single constituent regions"
)


class RegionAggregationMapping(BaseModel):
Expand Down Expand Up @@ -223,20 +222,20 @@ def from_file(cls, file: Union[Path, str]):
This function is used to convert a model mapping yaml file into a dictionary
which is used to initialize a RegionAggregationMapping.
"""
SCHEMA_FILE = here / "../validation_schemas" / "region_mapping_schema.yaml"
schema_file = here / "../validation_schemas" / "region_mapping_schema.yaml"
file = Path(file) if isinstance(file, str) else file
with open(file, "r") as f:
mapping_input = yaml.safe_load(f)
with open(SCHEMA_FILE, "r") as f:
with open(schema_file, "r") as f:
schema = yaml.safe_load(f)

# Validate the input data using jsonschema
try:
jsonschema.validate(mapping_input, schema)
except jsonschema.ValidationError as e:
except jsonschema.ValidationError as error:
# Add file information in case of error
raise jsonschema.ValidationError(
f"{e.message} in {get_relative_path(file)}"
f"{error.message} in {get_relative_path(file)}"
)

# Add the file name to mapping_input
Expand All @@ -245,24 +244,27 @@ def from_file(cls, file: Union[Path, str]):
# Reformat the "native_regions"
if "native_regions" in mapping_input:
native_region_list: List[Dict] = []
for nr in mapping_input["native_regions"]:
if isinstance(nr, str):
native_region_list.append({"name": nr})
elif isinstance(nr, dict):
for native_region in mapping_input["native_regions"]:
if isinstance(native_region, str):
native_region_list.append({"name": native_region})
elif isinstance(native_region, dict):
native_region_list.append(
{"name": list(nr)[0], "rename": list(nr.values())[0]}
{
"name": list(native_region)[0],
"rename": list(native_region.values())[0],
}
)
mapping_input["native_regions"] = native_region_list

# Reformat the "common_regions"
if "common_regions" in mapping_input:
common_region_list: List[Dict[str, List[Dict[str, str]]]] = []
for cr in mapping_input["common_regions"]:
cr_name = list(cr)[0]
for common_region in mapping_input["common_regions"]:
common_region_name = list(common_region)[0]
common_region_list.append(
{
"name": cr_name,
"constituent_regions": cr[cr_name],
"name": common_region_name,
"constituent_regions": common_region[common_region_name],
}
)
mapping_input["common_regions"] = common_region_list
Expand Down Expand Up @@ -349,22 +351,22 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition):
for file in (f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}):
try:
mapping = RegionAggregationMapping.from_file(file)
for m in mapping.model:
if m not in mapping_dict:
mapping_dict[m] = mapping
for model in mapping.model:
if model not in mapping_dict:
mapping_dict[model] = mapping
else:
errors.append(
ErrorWrapper(
ModelMappingCollisionError(
model=m,
model=model,
file1=mapping.file,
file2=mapping_dict[m].file,
file2=mapping_dict[model].file,
),
"__root__",
)
)
except (pydantic.ValidationError, jsonschema.ValidationError) as e:
errors.append(ErrorWrapper(e, "__root__"))
except (pydantic.ValidationError, jsonschema.ValidationError) as error:
errors.append(ErrorWrapper(error, "__root__"))

if errors:
raise pydantic.ValidationError(errors, model=RegionProcessor)
Expand Down Expand Up @@ -457,18 +459,18 @@ def _apply_region_processing(self, model_df: IamDataFrame) -> IamDataFrame:

# aggregate common regions
if self.mappings[model].common_regions is not None:
for cr in self.mappings[model].common_regions:
for common_region in self.mappings[model].common_regions:
# if a common region is consists of a single native region, rename
if cr.is_single_constituent_region:
_df = model_df.filter(region=cr.constituent_regions[0]).rename(
region=cr.rename_dict
)
if common_region.is_single_constituent_region:
_df = model_df.filter(
region=common_region.constituent_regions[0]
).rename(region=common_region.rename_dict)
if not _df.empty:
_processed_data.append(_df._data)
continue

# if there are multiple constituent regions, aggregate
regions = [cr.name, cr.constituent_regions]
regions = [common_region.name, common_region.constituent_regions]

# first, perform 'simple' aggregation (no arguments)
simple_vars = [
Expand Down Expand Up @@ -540,13 +542,13 @@ def _aggregate_region(df, var, *regions, **kwargs):
"""Perform region aggregation with kwargs catching inconsistent-index errors"""
try:
return df.aggregate_region(var, *regions, **kwargs)
except ValueError as e:
if str(e) == "Inconsistent index between variable and weight!":
except ValueError as error:
if str(error) == "Inconsistent index between variable and weight!":
logger.info(
f"Could not aggregate '{var}' for region '{regions[0]}' ({kwargs})"
)
else:
raise e
raise error


def _compare_and_merge(original: pd.Series, aggregated: pd.Series) -> IamDataFrame:
Expand Down
4 changes: 2 additions & 2 deletions nomenclature/processor/required_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def validate_with_definition(self, dsd: DataStructureDefinition) -> None:
("region", "region"),
("variable", "variables"),
):
if invalid := dsd.__getattribute__(dimension).validate_items(
self.__getattribute__(attribute_name) or []
if invalid := getattr(dsd, dimension).validate_items(
getattr(self, attribute_name) or []
):
error_msg += (
f"The following {dimension}(s) were not found in the "
Expand Down
14 changes: 7 additions & 7 deletions nomenclature/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ def _check_mappings(
raise FileNotFoundError(f"Mappings directory not found: {path / mappings}")


def _collect_requiredData_errors(
requiredDatadir: Path, dsd: DataStructureDefinition
def _collect_RequiredData_errors(
required_data_dir: Path, dsd: DataStructureDefinition
) -> None:
errors: List[str] = []
for file in (requiredDatadir).iterdir():
for file in required_data_dir.iterdir():
try:
RequiredDataValidator.from_file(file).validate_with_definition(dsd)
except pydantic.ValidationError as pve:
Expand All @@ -77,7 +77,7 @@ def _collect_requiredData_errors(
raise ValueError(f"Found error(s) in required data files: {all_errors}")


def _check_requiredData(
def _check_RequiredData(
path: Path,
definitions: str = "definitions",
dimensions: Optional[List[str]] = None,
Expand All @@ -86,10 +86,10 @@ def _check_requiredData(
dsd = DataStructureDefinition(path / definitions, dimensions)
if required_data is None:
if (path / "requiredData").is_dir():
_collect_requiredData_errors(path / "required_data", dsd)
_collect_RequiredData_errors(path / "required_data", dsd)

elif (path / required_data).is_dir():
_collect_requiredData_errors(path / required_data, dsd)
_collect_RequiredData_errors(path / required_data, dsd)
else:
raise FileNotFoundError(
f"Directory for required data not found at: {path / required_data}"
Expand Down Expand Up @@ -143,7 +143,7 @@ def assert_valid_structure(
f"`definitions` directory is empty: {path / definitions}"
)
_check_mappings(path, definitions, dimensions, mappings)
_check_requiredData(path, definitions, dimensions, required_data)
_check_RequiredData(path, definitions, dimensions, required_data)


# Todo: add function which runs `DataStructureDefinition(path).validate(scenario)`
4 changes: 1 addition & 3 deletions nomenclature/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ def validate(dsd, df, dimensions):
error = False

for dim in dimensions:
if invalid := dsd.__getattribute__(dim).validate_items(
df.__getattribute__(dim)
):
if invalid := getattr(dsd, dim).validate_items(getattr(df, dim)):
log_error(dim, invalid)
error = True

Expand Down
4 changes: 2 additions & 2 deletions tests/data/excel_io/validation_nc_list_arg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
description: Total primary energy consumption
unit: EJ/yr
region-aggregation:
- Primary Energy (mean):
method: mean
- Primary Energy (mean):
method: mean
- Primary Energy (mean):
description: Mean primary energy consumption
unit: EJ/yr
Expand Down

0 comments on commit b76b28f

Please sign in to comment.