Skip to content

Commit

Permalink
Merge pull request #111 from kartverket/tillat-valgrie-metadata
Browse files Browse the repository at this point in the history
Utvid tester og tillat valgfrie properties i TableMetadata
  • Loading branch information
jonasmw94 authored Oct 4, 2024
2 parents 7c6c3e8 + 097b87c commit 7a2b598
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 23 deletions.
6 changes: 3 additions & 3 deletions dask-felleskomponenter/example_table_metadata_gold.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
"table": "table",
"beskrivelse": "beskrivelse",
"tilgangsnivaa": "Begrenset",
"medaljongnivaa": "gull",
"medaljongnivaa": "gold",
"tema": "Befolkningsfordeling",
"begrep": "Høydedata",
"epsg_koder": "25835",
"bruksomraade": "Transport - trafikk - navigasjon",
"emneord": "bruksomraade"
"emneord": "bruksomraade",
"bruksvilkaar": "Åpne data"
}
2 changes: 1 addition & 1 deletion dask-felleskomponenter/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ build
twine
coverage
requests
pyspark
databricks-connect
setuptools
3 changes: 1 addition & 2 deletions dask-felleskomponenter/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setuptools.setup(
name="dask-felleskomponenter",
version="0.0.23",
version="0.0.29",
author="Dataplattform@Statens Kartverk",
author_email="[email protected]",
description="Felleskomponenter på DASK",
Expand All @@ -27,7 +27,6 @@
python_requires=">=3.7",
install_requires=[
"requests",
"pyspark",
"setuptools"
],
)
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from dataclasses import dataclass, field
from typing import Any, List, Optional
from typing import Any, Dict, List, Optional

import requests

@dataclass
@dataclass(init=False)
class TableMetadata:
catalog: Optional[str] = field(default=None)
schema: Optional[str] = field(default=None)
Expand All @@ -12,10 +12,21 @@ class TableMetadata:
tilgangsnivaa: Optional[str] = field(default=None)
medaljongnivaa: Optional[str] = field(default=None)
tema: Optional[str] = field(default=None)
emneord: Optional[str] = field(default=None)
emneord: Optional[str] = field(default=None)
epsg_koder: Optional[str] = field(default=None)
bruksomraade: Optional[str] = field(default=None)
begrep: Optional[str] = field(default=None)
bruksvilkaar: Optional[str] = field(default=None)

optional_params: Dict[str, Any] = field(default_factory=dict)

def __init__(self, **kwargs):
self.optional_params = {}
for field_name in self.__dataclass_fields__:
if field_name != 'optional_params':
setattr(self, field_name, kwargs.get(field_name, None))
for key, value in kwargs.items():
if key not in self.__dataclass_fields__ and "delta." not in key:
self.optional_params[key] = value

@dataclass
class MetadataError:
Expand All @@ -25,6 +36,8 @@ class MetadataError:
column: Optional[str]
description: str
solution: Optional[str]
for_field: str
valid_values: str | List[str]

def get_valid_codelist_values(kodeliste_url: str, override_kodeliste_keyword: Optional[str] = None) -> List[str]:
kodeliste_entry = "label" if override_kodeliste_keyword == None else override_kodeliste_keyword
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from typing import List, Optional
from .common import MetadataError, check_codelist_value, TableMetadata, get_valid_codelist_values

def _generate_metadata_error(catalog: str, schema: str, table: str, field: str, type: str, is_missing: bool, valid_values: Optional[str] = None):
def _generate_metadata_error(catalog: str, schema: str, table: str, field: str, type: str, is_missing: bool, valid_values_description: Optional[str] = None, valid_values: str | List[str] = "string"):
error_reason = "mangler" if is_missing else "er ugyldig"
description = f"🔴 Feil: '{field}' {error_reason} i table properties. Type: <{type}>"
if valid_values != None:
description += f" - {valid_values}"
if valid_values_description != None:
description += f" - {valid_values_description}"
if field == "beskrivelse":
solution = f"COMMENT ON TABLE {catalog}.{schema}.{table} IS '<<SETT_{field.upper()}_HER>>'"
else:
solution = f"ALTER TABLE {catalog}.{schema}.{table} SET TAGS ( '{field}' = '<<SETT_{field.upper()}_HER>>')"
return MetadataError(catalog=catalog, schema=schema, table=table, column=None, description=description, solution=solution)
return MetadataError(catalog=catalog, schema=schema, table=table, column=None, description=description, solution=solution, for_field=field, valid_values=valid_values)

def check_beskrivelse(metadata: TableMetadata, context: List[MetadataError]) -> List[MetadataError]:
if not check_codelist_value(None, metadata.beskrivelse):
Expand All @@ -23,14 +23,14 @@ def check_tilgangsnivaa(metadata: TableMetadata, context: List[MetadataError]) -

if not check_codelist_value(kodeliste_url, metadata.tilgangsnivaa):
valid_values = get_valid_codelist_values(kodeliste_url)
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "tilgangsnivaa", "sikkerhetsnivaa", metadata.tilgangsnivaa == None, f"gyldige verdier: {valid_values}"))
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "tilgangsnivaa", "sikkerhetsnivaa", metadata.tilgangsnivaa == None, f"gyldige verdier: {valid_values}", valid_values=valid_values))

return context

def check_medaljongnivaa(metadata: TableMetadata, context: List[MetadataError]) -> List[MetadataError]:
valid_values = ["bronse", "sølv", "gull"]
valid_values = ["bronze", "silver", "gold"]
if not check_codelist_value(None, metadata.medaljongnivaa, valid_values):
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "medaljongnivaa", "valør", metadata.medaljongnivaa == None, f"gyldige verdier: {valid_values}"))
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "medaljongnivaa", "valør", metadata.medaljongnivaa == None, f"gyldige verdier: {valid_values}", valid_values=valid_values))

return context

Expand All @@ -39,7 +39,7 @@ def check_tema(metadata: TableMetadata, context: List[MetadataError]) -> List[Me

if not check_codelist_value(kodeliste_url, metadata.tema):
valid_values = get_valid_codelist_values(kodeliste_url)
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "tema", "inspiretema", metadata.tema == None, f"gyldige verdier: {valid_values}"))
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "tema", "inspiretema", metadata.tema == None, f"gyldige verdier: {valid_values}", valid_values=valid_values))

return context

Expand All @@ -54,7 +54,7 @@ def check_Bruksvilkaar(metadata: TableMetadata, context: List[MetadataError]) ->

if not check_codelist_value(kodeliste_url, metadata.bruksvilkaar):
valid_values = get_valid_codelist_values(kodeliste_url)
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "bruksvilkaar", "tilgangsrestriksjoner", metadata.bruksvilkaar == None, f"gyldige verdier: {valid_values}"))
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "bruksvilkaar", "tilgangsrestriksjoner", metadata.bruksvilkaar == None, f"gyldige verdier: {valid_values}", valid_values=valid_values))

return context

Expand All @@ -63,14 +63,14 @@ def check_begrep(metadata: TableMetadata, context: List[MetadataError]) -> List[

if not check_codelist_value(kodeliste_url, metadata.begrep):
valid_values = get_valid_codelist_values(kodeliste_url)
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "begrep", "nasjonal", metadata.begrep == None, f"gyldige verdier: {valid_values}"))
context.append(_generate_metadata_error(metadata.catalog, metadata.schema, metadata.table, "begrep", "nasjonal", metadata.begrep == None, f"gyldige verdier: {valid_values}", valid_values=valid_values))

return context

checks_for_valor = {
"bronse": [check_beskrivelse, check_tilgangsnivaa],
"sølv": [check_beskrivelse, check_tema, check_emneord, check_tilgangsnivaa, check_bruksomraade],
"gull": [check_beskrivelse, check_tema, check_emneord, check_begrep, check_tilgangsnivaa, check_bruksomraade],
"bronze": [check_beskrivelse, check_tilgangsnivaa],
"silver": [check_beskrivelse, check_tema, check_emneord, check_tilgangsnivaa, check_Bruksvilkaar],
"gold": [check_beskrivelse, check_tema, check_emneord, check_begrep, check_tilgangsnivaa, check_Bruksvilkaar],
}


Expand All @@ -84,3 +84,12 @@ def validate_table(metadata: TableMetadata) -> List[MetadataError]:
validation_context = check(metadata, validation_context)

return validation_context

def get_mandatory_metadata_for_medaljongnivaa(medaljongnivaa: str):
metadata_dict = { }

for check in checks_for_valor[medaljongnivaa]:
metadata_error = check(TableMetadata(), [])[0]
metadata_dict[metadata_error.for_field] = metadata_error

return metadata_dict
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,15 @@ def test_validate_succeeds_with_correct_set_of_gold_metadata(self):
# Assert
self.assertListEqual(result, [])

def test_validate_succeeds_with_correct_set_of_gold_metadata_and_additional_optional_field(self):
# Arrange
gold_mock_json_data = read_file("example_table_metadata_gold.json")
gold_mock_json_data["optional"] = "value"
Metadata.get_table_metadata = MagicMock(return_value=TableMetadata(**gold_mock_json_data))
metadata = Metadata("catalog", "schema", "table")

# Act
result = metadata.validate()

# Assert
self.assertListEqual(result, [])

0 comments on commit 7a2b598

Please sign in to comment.