From 5567c9c27446fefcec56f5b2998adfb5ce0039aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludwig=20H=C3=BClk?= Date: Fri, 8 Nov 2024 14:52:31 +0100 Subject: [PATCH] Update scripts for example and tests #210 --- .../v20/v20/build_source/schemas/fields.json | 2 +- .../v20/v20/build_source/schemas/review.json | 2 +- .../scripts/generate_example_from_schema.py | 122 ++++++++++- .../scripts/generate_template_from_schema.py | 14 ++ metadata/v20/v20/example.json | 190 ++++++++++-------- metadata/v20/v20/schema.json | 5 - metadata/v20/v20/template.json | 21 +- 7 files changed, 240 insertions(+), 116 deletions(-) diff --git a/metadata/v20/v20/build_source/schemas/fields.json b/metadata/v20/v20/build_source/schemas/fields.json index c19f9d8..73f7add 100644 --- a/metadata/v20/v20/build_source/schemas/fields.json +++ b/metadata/v20/v20/build_source/schemas/fields.json @@ -16,7 +16,7 @@ "format": { "description": "A file extension format. Possible options are 'csv', 'xlsx', 'json', 'PostgreSQL', 'SQLite' and other standard file extensions.", "type": ["string", "null"], - "examples": ["PostgreSQL", "CSV"], + "examples": ["CSV"], "badge": "Gold", "title": "Format", "options": { diff --git a/metadata/v20/v20/build_source/schemas/review.json b/metadata/v20/v20/build_source/schemas/review.json index ce70a2b..6dad0d9 100644 --- a/metadata/v20/v20/build_source/schemas/review.json +++ b/metadata/v20/v20/build_source/schemas/review.json @@ -18,7 +18,7 @@ "badge": { "description": "A badge of either Iron, Bronze, Silver, Gold or Platinum is used to label the quality of the metadata.", "type": ["string", "null"], - "examples": ["Iron","Bronze","Silver","Gold","Platinum"], + "examples": ["Platinum"], "badge": null, "title": "Badge" } diff --git a/metadata/v20/v20/build_source/scripts/generate_example_from_schema.py b/metadata/v20/v20/build_source/scripts/generate_example_from_schema.py index 1627dec..9c15280 100644 --- a/metadata/v20/v20/build_source/scripts/generate_example_from_schema.py +++ b/metadata/v20/v20/build_source/scripts/generate_example_from_schema.py @@ -15,12 +15,10 @@ import json import logging +import os from typing import Any, Dict, Union, List - -# from datetime import datetime from pathlib import Path - from settings import RESOLVED_SCHEMA_FILE_NAME, EXAMPLE_PATH, LOG_FORMAT # Configuration @@ -32,7 +30,7 @@ def read_schema(filepath: str) -> Dict[str, Any]: """Read a JSON schema from a file. Args: - filename (str): The path to the JSON schema file. + filepath (str): The path to the JSON schema file. Returns: Dict[str, Any]: The JSON schema as a dictionary. @@ -40,10 +38,54 @@ def read_schema(filepath: str) -> Dict[str, Any]: with open(filepath, "r", encoding="utf-8") as file: schema = json.load(file) + print(f"Processing schema: {schema}") return schema -def generate_example( +def read_metadata_schema(filepath: str) -> Dict[str, Any]: + """Read a JSON schema from a file. + + Args: + filepath (str): The path to the JSON schema file. + + Returns: + Dict[str, Any]: The JSON schema as a dictionary. + """ + if not os.path.exists(filepath): + print(f"Error: File '{filepath}' does not exist.") + return {} + + try: + with open(filepath, "r", encoding="utf-8") as file: + schema = json.load(file) + + # Basic validation of schema structure + if not isinstance(schema, dict): + print("Error: Schema is not a dictionary. Check the schema format.") + return {} + + print(f"Schema loaded successfully from {filepath}") + print(f"Schema top-level keys: {list(schema.keys())}") + + # Additional debugging info: Check expected keys + if "$schema" not in schema or "type" not in schema: + print( + "Warning: Schema may be missing key fields like '$schema' or 'type'.") + + print( + f"Full schema content (trimmed for large files): {str(schema)[:500]}...") + + return schema + + except json.JSONDecodeError as e: + print(f"Error reading JSON: {e}") + return {} + except Exception as e: + print(f"An unexpected error occurred while reading the schema: {e}") + return {} + + +def generate_example_old( schema: Dict[str, Any] ) -> Union[Dict[str, Any], List[Any], str, None]: """Generate a JSON object from the schema using the @@ -91,7 +133,52 @@ def generate_example( return None -def generate_json_from_schema(schema_file: str) -> Dict[str, Any]: +def extract_examples_from_schema(schema: Dict[str, Any]) -> Union[ + Dict[str, Any], List[Any], str, None]: + """Generate a valid example from the schema using the provided example values.""" + + # If the schema has an "examples" field, handle it appropriately + if "examples" in schema: + examples = schema["examples"] + if isinstance(examples, list): + # Return a single value if the list contains one item + if len(examples) == 1: + return examples[0] + return examples # If multiple items, return the whole list + return examples # If it's a single item, return the value + + # If the schema type is an object, process each property recursively + schema_type = schema.get("type") + if isinstance(schema_type, list): + schema_type = schema_type[0] + + if schema_type == "object": + example_object = {} + properties = schema.get("properties", {}) + for key, value in properties.items(): + example_object[key] = extract_examples_from_schema(value) + return example_object + + # If the schema type is an array, process the items recursively + elif schema_type == "array": + items = schema.get("items", {}) + example = extract_examples_from_schema(items) + return [example] if not isinstance(example, list) else example + + # Handle basic types like string, number, boolean, null + elif schema_type == "string": + return "" # Example string + elif schema_type == "number": + return 0 # Example number + elif schema_type == "boolean": + return True # Example boolean + elif schema_type == "null": + return None # Example null + + return None # Default fallback + + +def create_json_from_schema(schema_file: str) -> Dict[str, Any]: """Generate a JSON object that conforms to the schema read from a file. Args: @@ -100,8 +187,10 @@ def generate_json_from_schema(schema_file: str) -> Dict[str, Any]: Returns: Dict[str, Any]: A JSON object generated from the schema. """ - schema = read_schema(schema_file) - return generate_example(schema) + schema = read_metadata_schema(schema_file) + print(f"Create JSON from schema: {schema_file}") + return extract_examples_from_schema(schema) + print(f"Create JSON object: {result}") def save_json(data: Dict[str, Any], filename: Path) -> None: @@ -116,10 +205,21 @@ def save_json(data: Dict[str, Any], filename: Path) -> None: logger.info(f"example JSON generated and saved to {filename}") +def test_oemetadata_schema_should_validate_oemetadata_example(example): + from jsonschema import validate, ValidationError + from metadata.v20.v20.schema import OEMETADATA_V20_SCHEMA + + try: + validate(example, OEMETADATA_V20_SCHEMA) + print("OEMetadata Example is valid OEMetadata Schema (v2.0).") + except ValidationError as e: + print("Cannot validate OEMetadata Example with Schema (v2.0)!", e) + if __name__ == "__main__": - logger.info("Generation started.") + logger.info("Create OEMetadata Example from Schema.") schema_filename = RESOLVED_SCHEMA_FILE_NAME - json_data = generate_json_from_schema(schema_filename) + json_data = create_json_from_schema(schema_filename) save_json(json_data, EXAMPLE_PATH) - logger.info("Generation ended.") + logger.info("OEMetadata Example created!") + test_oemetadata_schema_should_validate_oemetadata_example(json_data) diff --git a/metadata/v20/v20/build_source/scripts/generate_template_from_schema.py b/metadata/v20/v20/build_source/scripts/generate_template_from_schema.py index a8af48d..35f5ffc 100644 --- a/metadata/v20/v20/build_source/scripts/generate_template_from_schema.py +++ b/metadata/v20/v20/build_source/scripts/generate_template_from_schema.py @@ -85,8 +85,22 @@ def main(): logger.info(f"template JSON generated and saved to {template_file_path}") + # WARNING: The metaMetadata is missing and the boundingBox is wrong! + +def test_oemetadata_schema_should_validate_oemetadata_template(): + from jsonschema import validate, ValidationError + from metadata.v20.v20.template import OEMETADATA_V20_TEMPLATE + from metadata.v20.v20.schema import OEMETADATA_V20_SCHEMA + + try: + validate(OEMETADATA_V20_TEMPLATE, OEMETADATA_V20_SCHEMA) + print("OEMetadata Template is valid OEMetadata Schema (v2.0).") + except ValidationError as e: + print("Cannot validate OEMetadata Template with Schema (v2.0)!", e) + if __name__ == "__main__": logger.info("Generation started.") main() + test_oemetadata_schema_should_validate_oemetadata_template() logger.info("Generation ended.") diff --git a/metadata/v20/v20/example.json b/metadata/v20/v20/example.json index c72c7dd..4686857 100644 --- a/metadata/v20/v20/example.json +++ b/metadata/v20/v20/example.json @@ -1,172 +1,184 @@ { - "name": "", - "title": "", - "description": "", - "id": "", + "name": "oep_oemetadata", + "title": "OEP OEMetadata", + "description": "A collection of tables for the OEMetadata examples.", + "id": "https://databus.openenergyplatform.org/oeplatform/reference", "resources": [ { - "@id": "", - "@context": "", - "name": "", + "@id": "https://databus.openenergyplatform.org/oeplatform/supply/wri_global_power_plant_database/2022-11-07", + "@context": "https://raw.githubusercontent.com/OpenEnergyPlatform/oemetadata/production/metadata/latest/context.json", + "name": "oep_oemetadata_table_example", "topics": [ - "" + "model_draft", + "reference" ], - "title": "", - "path": "", - "description": "", + "title": "OEP OEMetadata Example Table", + "path": "http://openenergyplatform.org/dataedit/view/model_draft/oep_oemetadata_table_example", + "description": "Example table used to illustrate the OEMetadata structure and meaning.", "languages": [ - "" + "en-GB", + "de-DE" ], "subject": [ { - "name": "", - "path": "" + "name": "energy", + "path": "https://openenergy-platform.org/ontology/oeo/OEO_00000150" } ], "keywords": [ - "" + "example", + "ODbL-1.0", + "NFDI4Energy" ], - "publicationDate": "", + "publicationDate": "2024-10-15", "embargoPeriod": { - "start": "", - "end": "", - "isActive": null + "start": "2024-10-11", + "end": "2025-01-01", + "isActive": true }, "context": { - "title": "", - "homepage": "", - "documentation": "", - "sourceCode": "", - "publisher": "", - "publisherLogo": "", - "contact": "", - "fundingAgency": "", - "fundingAgencyLogo": "", - "grantNo": "" + "title": "NFDI4Energy", + "homepage": "https://nfdi4energy.uol.de/", + "documentation": "https://nfdi4energy.uol.de/sites/about_us/", + "sourceCode": "https://github.com/NFDI4Energy", + "publisher": "Open Energy Platform (OEP)", + "publisherLogo": "https://github.com/OpenEnergyPlatform/organisation/blob/production/logo/OpenEnergyFamily_Logo_OpenEnergyPlatform.svg", + "contact": "contact@example.com", + "fundingAgency": " Deutsche Forschungsgemeinschaft (DFG)", + "fundingAgencyLogo": "https://upload.wikimedia.org/wikipedia/commons/8/86/DFG-logo-blau.svg", + "grantNo": "501865131" }, "spatial": { "location": { - "address": "", - "@id": "", - "latitude": "", - "longitude": "" + "address": "Rudower Chaussee 12, 12489 Berlin", + "@id": "https://www.wikidata.org/wiki/Q77077223", + "latitude": "52.432822", + "longitude": "13.5351004" }, "extent": { - "name": "", - "@id": "", - "resolutionValue": "", - "resolutionUnit": "", + "name": "Berlin", + "@id": "https://www.wikidata.org/wiki/Q64", + "resolutionValue": "100", + "resolutionUnit": "m", "boundingBox": [ - null + 13.08825, + 52.33859, + 13.76104, + 52.6754 ], - "crs": "" + "crs": "EPSG:4326" } }, "temporal": { - "referenceDate": "", + "referenceDate": "2020-01-01", "timeseries": [ { - "start": "", - "end": "", - "resolution": "", - "alignment": "", - "aggregationType": "" + "start": "2020-01-01T00:00:00+00:00", + "end": "2020-01-01T23:59:30+00:00", + "resolution": "30 s", + "alignment": "left", + "aggregationType": "current" } ] }, "sources": [ { - "title": "", + "title": "IPCC Sixth Assessment Report (AR6) - Climate Change 2023 - Synthesis Report", "authors": [ - "" + "Hoesung Lee", + "José Romero", + "The Core Writing Team" ], - "description": "", - "publicationYear": "", - "path": "", + "description": "A Report of the Intergovernmental Panel on Climate Change.", + "publicationYear": "2023", + "path": "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_FullVolume.pdf", "licenses": [ { - "name": "", - "title": "", - "path": "", - "instruction": "", - "attribution": "", - "copyrightStatement": "" + "name": "ODbL-1.0", + "title": "Open Data Commons Open Database License 1.0", + "path": "https://opendatacommons.org/licenses/odbl/1-0/index.html", + "instruction": "You are free to share and change, but you must attribute, and share derivations under the same license. See https://tldrlegal.com/license/odc-open-database-license-odbl for further information.", + "attribution": "© Intergovernmental Panel on Climate Change 2023", + "copyrightStatement": "https://www.ipcc.ch/copyright/" } ] } ], "licenses": [ { - "name": "ODbL-1.0", - "title": "Open Data Commons Open Database License 1.0", - "path": "https://opendatacommons.org/licenses/odbl/1-0/index.html", - "instruction": "You are free to share and change, but you must attribute, and share derivations under the same license. See https://tldrlegal.com/license/odc-open-database-license-(odbl) for further information.", - "attribution": "© Reiner Lemoine Institut" + "name": "", + "title": "", + "path": "", + "instruction": "", + "attribution": "" } ], "contributors": [ { - "title": "", - "path": "", - "organization": "", + "title": "Ludwig Hülk", + "path": "https://github.com/Ludee", + "organization": "Reiner Lemoine Institut", "roles": [ - "" + "creator", + "dataCurator" ], - "date": "", - "object": "", - "comment": "" + "date": "2024-10-21", + "object": "data and metadata", + "comment": "Add general context." } ], - "type": "", - "format": "", - "encoding": "", + "type": "table", + "format": "CSV", + "encoding": "UTF-8", "schema": { "fields": [ { - "name": "", - "description": "", - "type": "", - "nullable": null, - "unit": "", + "name": "year", + "description": "Reference year for which the data was collected.", + "type": "geometry(Point, 4326)", + "nullable": true, + "unit": "MW", "isAbout": [ { - "name": "", - "path": "" + "name": "wind energy converting unit", + "path": "https://openenergyplatform.org/ontology/oeo/OEO_00000044" } ], "valueReference": [ { - "value": "", - "name": "", - "path": "" + "value": "onshore", + "name": "onshore wind farm", + "path": "https://openenergyplatform.org/ontology/oeo/OEO_00000311" } ] } ], "primaryKey": [ - "" + "id" ], "foreignKeys": [ { "fields": [ - "" + "id", + "version" ], "reference": { - "resource": "", + "resource": "model_draft.oep_oemetadata_table_example_version", "fields": [ - "" + "id", + "version" ] } } ] }, "dialect": { - "delimiter": "", - "decimalSeparator": "" + "delimiter": ";", + "decimalSeparator": "." }, "review": { - "path": "", - "badge": "" + "path": "https://www.example.com", + "badge": "Platinum" } } ], diff --git a/metadata/v20/v20/schema.json b/metadata/v20/v20/schema.json index 0e67ea0..66ca378 100644 --- a/metadata/v20/v20/schema.json +++ b/metadata/v20/v20/schema.json @@ -1030,7 +1030,6 @@ "null" ], "examples": [ - "PostgreSQL", "CSV" ], "badge": "Gold", @@ -1386,10 +1385,6 @@ "null" ], "examples": [ - "Iron", - "Bronze", - "Silver", - "Gold", "Platinum" ], "badge": null, diff --git a/metadata/v20/v20/template.json b/metadata/v20/v20/template.json index a122c84..90295c0 100644 --- a/metadata/v20/v20/template.json +++ b/metadata/v20/v20/template.json @@ -57,8 +57,11 @@ "resolutionValue": "", "resolutionUnit": "", "boundingBox": [ - 0 - ], + 13.08825, + 52.33859, + 13.76104, + 52.6754 + ], "crs": "" } }, @@ -170,12 +173,12 @@ } } ], - "metaMetadata": { - "metadataVersion": "", - "metadataLicense": { - "name": "", - "title": "", - "path": "" + "metaMetadata": { + "metadataVersion": "OEMetadata-2.0.1", + "metadataLicense": { + "name": "CC0-1.0", + "title": "Creative Commons Zero v1.0 Universal", + "path": "https://creativecommons.org/publicdomain/zero/1.0" + } } - } } \ No newline at end of file