diff --git a/dask-felleskomponenter/example_table_metadata_gold.json b/dask-felleskomponenter/example_table_metadata_gold.json index 8720d26..d921aa6 100644 --- a/dask-felleskomponenter/example_table_metadata_gold.json +++ b/dask-felleskomponenter/example_table_metadata_gold.json @@ -4,10 +4,10 @@ "table": "table", "beskrivelse": "beskrivelse", "tilgangsnivaa": "Begrenset", - "medaljongnivaa": "gull", + "medaljongnivaa": "gold", "tema": "Befolkningsfordeling", "begrep": "Høydedata", "epsg_koder": "25835", - "bruksomraade": "Transport - trafikk - navigasjon", - "emneord": "bruksomraade" + "emneord": "bruksomraade", + "bruksvilkaar": "Åpne data" } \ No newline at end of file diff --git a/dask-felleskomponenter/requirements.txt b/dask-felleskomponenter/requirements.txt index ca3d873..f97cbc4 100644 --- a/dask-felleskomponenter/requirements.txt +++ b/dask-felleskomponenter/requirements.txt @@ -2,5 +2,5 @@ build twine coverage requests -pyspark +databricks-connect setuptools \ No newline at end of file diff --git a/dask-felleskomponenter/setup.py b/dask-felleskomponenter/setup.py index 8b8dda0..b6ca4cb 100644 --- a/dask-felleskomponenter/setup.py +++ b/dask-felleskomponenter/setup.py @@ -7,7 +7,7 @@ setuptools.setup( name="dask-felleskomponenter", - version="0.0.23", + version="0.0.29", author="Dataplattform@Statens Kartverk", author_email="dataplattform@kartverket.no", description="Felleskomponenter på DASK", @@ -27,7 +27,6 @@ python_requires=">=3.7", install_requires=[ "requests", - "pyspark", "setuptools" ], ) diff --git a/dask-felleskomponenter/src/dask_felleskomponenter/governance/checks/common.py b/dask-felleskomponenter/src/dask_felleskomponenter/governance/checks/common.py index e149c75..2727144 100644 --- a/dask-felleskomponenter/src/dask_felleskomponenter/governance/checks/common.py +++ b/dask-felleskomponenter/src/dask_felleskomponenter/governance/checks/common.py @@ -1,9 +1,9 @@ from dataclasses import dataclass, field -from typing import Any, List, Optional +from typing import Any, Dict, List, Optional import requests -@dataclass +@dataclass(init=False) class TableMetadata: catalog: Optional[str] = field(default=None) schema: Optional[str] = field(default=None) @@ -12,10 +12,21 @@ class TableMetadata: tilgangsnivaa: Optional[str] = field(default=None) medaljongnivaa: Optional[str] = field(default=None) tema: Optional[str] = field(default=None) - emneord: Optional[str] = field(default=None) + emneord: Optional[str] = field(default=None) epsg_koder: Optional[str] = field(default=None) - bruksomraade: Optional[str] = field(default=None) begrep: Optional[str] = field(default=None) + bruksvilkaar: Optional[str] = field(default=None) + + optional_params: Dict[str, Any] = field(default_factory=dict) + + def __init__(self, **kwargs): + self.optional_params = {} + for field_name in self.__dataclass_fields__: + if field_name != 'optional_params': + setattr(self, field_name, kwargs.get(field_name, None)) + for key, value in kwargs.items(): + if key not in self.__dataclass_fields__ and "delta." not in key: + self.optional_params[key] = value @dataclass class MetadataError: @@ -25,6 +36,8 @@ class MetadataError: column: Optional[str] description: str solution: Optional[str] + for_field: str + valid_values: str | List[str] def get_valid_codelist_values(kodeliste_url: str, override_kodeliste_keyword: Optional[str] = None) -> List[str]: kodeliste_entry = "label" if override_kodeliste_keyword == None else override_kodeliste_keyword diff --git a/dask-felleskomponenter/src/dask_felleskomponenter/governance/checks/table.py b/dask-felleskomponenter/src/dask_felleskomponenter/governance/checks/table.py index c096774..008e63d 100644 --- a/dask-felleskomponenter/src/dask_felleskomponenter/governance/checks/table.py +++ b/dask-felleskomponenter/src/dask_felleskomponenter/governance/checks/table.py @@ -1,16 +1,16 @@ from typing import List, Optional from .common import MetadataError, check_codelist_value, TableMetadata, get_valid_codelist_values -def _generate_metadata_error(catalog: str, schema: str, table: str, field: str, type: str, is_missing: bool, valid_values: Optional[str] = None): +def _generate_metadata_error(catalog: str, schema: str, table: str, field: str, type: str, is_missing: bool, valid_values_description: Optional[str] = None, valid_values: str | List[str] = "string"): error_reason = "mangler" if is_missing else "er ugyldig" description = f"🔴 Feil: '{field}' {error_reason} i table properties. Type: <{type}>" - if valid_values != None: - description += f" - {valid_values}" + if valid_values_description != None: + description += f" - {valid_values_description}" if field == "beskrivelse": solution = f"COMMENT ON TABLE {catalog}.{schema}.{table} IS '<>'" else: solution = f"ALTER TABLE {catalog}.{schema}.{table} SET TAGS ( '{field}' = '<>')" - return MetadataError(catalog=catalog, schema=schema, table=table, column=None, description=description, solution=solution) + return MetadataError(catalog=catalog, schema=schema, table=table, column=None, description=description, solution=solution, for_field=field, valid_values=valid_values) def check_beskrivelse(metadata: TableMetadata, context: List[MetadataError]) -> List[MetadataError]: if not check_codelist_value(None, metadata.beskrivelse): @@ -23,14 +23,14 @@ def check_tilgangsnivaa(metadata: TableMetadata, context: List[MetadataError]) - if not check_codelist_value(kodeliste_url, metadata.tilgangsnivaa): valid_values = get_valid_codelist_values(kodeliste_url) - context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "tilgangsnivaa", "sikkerhetsnivaa", metadata.tilgangsnivaa == None, f"gyldige verdier: {valid_values}")) + context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "tilgangsnivaa", "sikkerhetsnivaa", metadata.tilgangsnivaa == None, f"gyldige verdier: {valid_values}", valid_values=valid_values)) return context def check_medaljongnivaa(metadata: TableMetadata, context: List[MetadataError]) -> List[MetadataError]: - valid_values = ["bronse", "sølv", "gull"] + valid_values = ["bronze", "silver", "gold"] if not check_codelist_value(None, metadata.medaljongnivaa, valid_values): - context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "medaljongnivaa", "valør", metadata.medaljongnivaa == None, f"gyldige verdier: {valid_values}")) + context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "medaljongnivaa", "valør", metadata.medaljongnivaa == None, f"gyldige verdier: {valid_values}", valid_values=valid_values)) return context @@ -39,7 +39,7 @@ def check_tema(metadata: TableMetadata, context: List[MetadataError]) -> List[Me if not check_codelist_value(kodeliste_url, metadata.tema): valid_values = get_valid_codelist_values(kodeliste_url) - context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "tema", "inspiretema", metadata.tema == None, f"gyldige verdier: {valid_values}")) + context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "tema", "inspiretema", metadata.tema == None, f"gyldige verdier: {valid_values}", valid_values=valid_values)) return context @@ -54,7 +54,7 @@ def check_Bruksvilkaar(metadata: TableMetadata, context: List[MetadataError]) -> if not check_codelist_value(kodeliste_url, metadata.bruksvilkaar): valid_values = get_valid_codelist_values(kodeliste_url) - context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "bruksvilkaar", "tilgangsrestriksjoner", metadata.bruksvilkaar == None, f"gyldige verdier: {valid_values}")) + context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "bruksvilkaar", "tilgangsrestriksjoner", metadata.bruksvilkaar == None, f"gyldige verdier: {valid_values}", valid_values=valid_values)) return context @@ -63,14 +63,14 @@ def check_begrep(metadata: TableMetadata, context: List[MetadataError]) -> List[ if not check_codelist_value(kodeliste_url, metadata.begrep): valid_values = get_valid_codelist_values(kodeliste_url) - context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "begrep", "nasjonal", metadata.begrep == None, f"gyldige verdier: {valid_values}")) + context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "begrep", "nasjonal", metadata.begrep == None, f"gyldige verdier: {valid_values}", valid_values=valid_values)) return context checks_for_valor = { - "bronse": [check_beskrivelse, check_tilgangsnivaa], - "sølv": [check_beskrivelse, check_tema, check_emneord, check_tilgangsnivaa, check_bruksomraade], - "gull": [check_beskrivelse, check_tema, check_emneord, check_begrep, check_tilgangsnivaa, check_bruksomraade], + "bronze": [check_beskrivelse, check_tilgangsnivaa], + "silver": [check_beskrivelse, check_tema, check_emneord, check_tilgangsnivaa, check_Bruksvilkaar], + "gold": [check_beskrivelse, check_tema, check_emneord, check_begrep, check_tilgangsnivaa, check_Bruksvilkaar], } @@ -84,3 +84,12 @@ def validate_table(metadata: TableMetadata) -> List[MetadataError]: validation_context = check(metadata, validation_context) return validation_context + +def get_mandatory_metadata_for_medaljongnivaa(medaljongnivaa: str): + metadata_dict = { } + + for check in checks_for_valor[medaljongnivaa]: + metadata_error = check(TableMetadata(), [])[0] + metadata_dict[metadata_error.for_field] = metadata_error + + return metadata_dict diff --git a/dask-felleskomponenter/src/dask_felleskomponenter/tests/test_table_validation.py b/dask-felleskomponenter/src/dask_felleskomponenter/tests/test_table_validation.py index 9916595..5438420 100644 --- a/dask-felleskomponenter/src/dask_felleskomponenter/tests/test_table_validation.py +++ b/dask-felleskomponenter/src/dask_felleskomponenter/tests/test_table_validation.py @@ -37,3 +37,15 @@ def test_validate_succeeds_with_correct_set_of_gold_metadata(self): # Assert self.assertListEqual(result, []) + def test_validate_succeeds_with_correct_set_of_gold_metadata_and_additional_optional_field(self): + # Arrange + gold_mock_json_data = read_file("example_table_metadata_gold.json") + gold_mock_json_data["optional"] = "value" + Metadata.get_table_metadata = MagicMock(return_value=TableMetadata(**gold_mock_json_data)) + metadata = Metadata("catalog", "schema", "table") + + # Act + result = metadata.validate() + + # Assert + self.assertListEqual(result, [])