From 5567c9c27446fefcec56f5b2998adfb5ce0039aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ludwig=20H=C3=BClk?= <ludwig.huelk@rl-institut.de>
Date: Fri, 8 Nov 2024 14:52:31 +0100
Subject: [PATCH] Update scripts for example and tests #210

---
 .../v20/v20/build_source/schemas/fields.json  |   2 +-
 .../v20/v20/build_source/schemas/review.json  |   2 +-
 .../scripts/generate_example_from_schema.py   | 122 ++++++++++-
 .../scripts/generate_template_from_schema.py  |  14 ++
 metadata/v20/v20/example.json                 | 190 ++++++++++--------
 metadata/v20/v20/schema.json                  |   5 -
 metadata/v20/v20/template.json                |  21 +-
 7 files changed, 240 insertions(+), 116 deletions(-)

diff --git a/metadata/v20/v20/build_source/schemas/fields.json b/metadata/v20/v20/build_source/schemas/fields.json
index c19f9d8..73f7add 100644
--- a/metadata/v20/v20/build_source/schemas/fields.json
+++ b/metadata/v20/v20/build_source/schemas/fields.json
@@ -16,7 +16,7 @@
             "format": {
                 "description": "A file extension format. Possible options are 'csv', 'xlsx', 'json', 'PostgreSQL', 'SQLite' and other standard file extensions.",
                 "type": ["string", "null"],
-                "examples": ["PostgreSQL", "CSV"],
+                "examples": ["CSV"],
                 "badge": "Gold",
                 "title": "Format",
                 "options": {
diff --git a/metadata/v20/v20/build_source/schemas/review.json b/metadata/v20/v20/build_source/schemas/review.json
index ce70a2b..6dad0d9 100644
--- a/metadata/v20/v20/build_source/schemas/review.json
+++ b/metadata/v20/v20/build_source/schemas/review.json
@@ -18,7 +18,7 @@
                 "badge": {
                     "description": "A badge of either Iron, Bronze, Silver, Gold or Platinum is used to label the quality of the metadata.",
                     "type": ["string", "null"],
-                    "examples": ["Iron","Bronze","Silver","Gold","Platinum"],
+                    "examples": ["Platinum"],
                     "badge": null,
                     "title": "Badge"
                 }
diff --git a/metadata/v20/v20/build_source/scripts/generate_example_from_schema.py b/metadata/v20/v20/build_source/scripts/generate_example_from_schema.py
index 1627dec..9c15280 100644
--- a/metadata/v20/v20/build_source/scripts/generate_example_from_schema.py
+++ b/metadata/v20/v20/build_source/scripts/generate_example_from_schema.py
@@ -15,12 +15,10 @@
 
 import json
 import logging
+import os
 
 from typing import Any, Dict, Union, List
-
-# from datetime import datetime
 from pathlib import Path
-
 from settings import RESOLVED_SCHEMA_FILE_NAME, EXAMPLE_PATH, LOG_FORMAT
 
 # Configuration
@@ -32,7 +30,7 @@ def read_schema(filepath: str) -> Dict[str, Any]:
     """Read a JSON schema from a file.
 
     Args:
-        filename (str): The path to the JSON schema file.
+        filepath (str): The path to the JSON schema file.
 
     Returns:
         Dict[str, Any]: The JSON schema as a dictionary.
@@ -40,10 +38,54 @@ def read_schema(filepath: str) -> Dict[str, Any]:
 
     with open(filepath, "r", encoding="utf-8") as file:
         schema = json.load(file)
+    print(f"Processing schema: {schema}")
     return schema
 
 
-def generate_example(
+def read_metadata_schema(filepath: str) -> Dict[str, Any]:
+    """Read a JSON schema from a file.
+
+    Args:
+        filepath (str): The path to the JSON schema file.
+
+    Returns:
+        Dict[str, Any]: The JSON schema as a dictionary.
+    """
+    if not os.path.exists(filepath):
+        print(f"Error: File '{filepath}' does not exist.")
+        return {}
+
+    try:
+        with open(filepath, "r", encoding="utf-8") as file:
+            schema = json.load(file)
+
+        # Basic validation of schema structure
+        if not isinstance(schema, dict):
+            print("Error: Schema is not a dictionary. Check the schema format.")
+            return {}
+
+        print(f"Schema loaded successfully from {filepath}")
+        print(f"Schema top-level keys: {list(schema.keys())}")
+
+        # Additional debugging info: Check expected keys
+        if "$schema" not in schema or "type" not in schema:
+            print(
+                "Warning: Schema may be missing key fields like '$schema' or 'type'.")
+
+        print(
+            f"Full schema content (trimmed for large files): {str(schema)[:500]}...")
+
+        return schema
+
+    except json.JSONDecodeError as e:
+        print(f"Error reading JSON: {e}")
+        return {}
+    except Exception as e:
+        print(f"An unexpected error occurred while reading the schema: {e}")
+        return {}
+
+
+def generate_example_old(
     schema: Dict[str, Any]
 ) -> Union[Dict[str, Any], List[Any], str, None]:
     """Generate a JSON object from the schema using the
@@ -91,7 +133,52 @@ def generate_example(
     return None
 
 
-def generate_json_from_schema(schema_file: str) -> Dict[str, Any]:
+def extract_examples_from_schema(schema: Dict[str, Any]) -> Union[
+    Dict[str, Any], List[Any], str, None]:
+    """Generate a valid example from the schema using the provided example values."""
+
+    # If the schema has an "examples" field, handle it appropriately
+    if "examples" in schema:
+        examples = schema["examples"]
+        if isinstance(examples, list):
+            # Return a single value if the list contains one item
+            if len(examples) == 1:
+                return examples[0]
+            return examples  # If multiple items, return the whole list
+        return examples  # If it's a single item, return the value
+
+    # If the schema type is an object, process each property recursively
+    schema_type = schema.get("type")
+    if isinstance(schema_type, list):
+        schema_type = schema_type[0]
+
+    if schema_type == "object":
+        example_object = {}
+        properties = schema.get("properties", {})
+        for key, value in properties.items():
+            example_object[key] = extract_examples_from_schema(value)
+        return example_object
+
+    # If the schema type is an array, process the items recursively
+    elif schema_type == "array":
+        items = schema.get("items", {})
+        example = extract_examples_from_schema(items)
+        return [example] if not isinstance(example, list) else example
+
+    # Handle basic types like string, number, boolean, null
+    elif schema_type == "string":
+        return ""  # Example string
+    elif schema_type == "number":
+        return 0  # Example number
+    elif schema_type == "boolean":
+        return True  # Example boolean
+    elif schema_type == "null":
+        return None  # Example null
+
+    return None  # Default fallback
+
+
+def create_json_from_schema(schema_file: str) -> Dict[str, Any]:
     """Generate a JSON object that conforms to the schema read from a file.
 
     Args:
@@ -100,8 +187,10 @@ def generate_json_from_schema(schema_file: str) -> Dict[str, Any]:
     Returns:
         Dict[str, Any]: A JSON object generated from the schema.
     """
-    schema = read_schema(schema_file)
-    return generate_example(schema)
+    schema = read_metadata_schema(schema_file)
+    print(f"Create JSON from schema: {schema_file}")
+    return extract_examples_from_schema(schema)
+    print(f"Create JSON object: {result}")
 
 
 def save_json(data: Dict[str, Any], filename: Path) -> None:
@@ -116,10 +205,21 @@ def save_json(data: Dict[str, Any], filename: Path) -> None:
 
     logger.info(f"example JSON generated and saved to {filename}")
 
+def test_oemetadata_schema_should_validate_oemetadata_example(example):
+    from jsonschema import validate, ValidationError
+    from metadata.v20.v20.schema import OEMETADATA_V20_SCHEMA
+
+    try:
+        validate(example, OEMETADATA_V20_SCHEMA)
+        print("OEMetadata Example is valid OEMetadata Schema (v2.0).")
+    except ValidationError as e:
+        print("Cannot validate OEMetadata Example with Schema (v2.0)!", e)
+
 
 if __name__ == "__main__":
-    logger.info("Generation started.")
+    logger.info("Create OEMetadata Example from Schema.")
     schema_filename = RESOLVED_SCHEMA_FILE_NAME
-    json_data = generate_json_from_schema(schema_filename)
+    json_data = create_json_from_schema(schema_filename)
     save_json(json_data, EXAMPLE_PATH)
-    logger.info("Generation ended.")
+    logger.info("OEMetadata Example created!")
+    test_oemetadata_schema_should_validate_oemetadata_example(json_data)
diff --git a/metadata/v20/v20/build_source/scripts/generate_template_from_schema.py b/metadata/v20/v20/build_source/scripts/generate_template_from_schema.py
index a8af48d..35f5ffc 100644
--- a/metadata/v20/v20/build_source/scripts/generate_template_from_schema.py
+++ b/metadata/v20/v20/build_source/scripts/generate_template_from_schema.py
@@ -85,8 +85,22 @@ def main():
 
     logger.info(f"template JSON generated and saved to {template_file_path}")
 
+    # WARNING: The metaMetadata is missing and the boundingBox is wrong!
+
+def test_oemetadata_schema_should_validate_oemetadata_template():
+    from jsonschema import validate, ValidationError
+    from metadata.v20.v20.template import OEMETADATA_V20_TEMPLATE
+    from metadata.v20.v20.schema import OEMETADATA_V20_SCHEMA
+
+    try:
+        validate(OEMETADATA_V20_TEMPLATE, OEMETADATA_V20_SCHEMA)
+        print("OEMetadata Template is valid OEMetadata Schema (v2.0).")
+    except ValidationError as e:
+        print("Cannot validate OEMetadata Template with Schema (v2.0)!", e)
+
 
 if __name__ == "__main__":
     logger.info("Generation started.")
     main()
+    test_oemetadata_schema_should_validate_oemetadata_template()
     logger.info("Generation ended.")
diff --git a/metadata/v20/v20/example.json b/metadata/v20/v20/example.json
index c72c7dd..4686857 100644
--- a/metadata/v20/v20/example.json
+++ b/metadata/v20/v20/example.json
@@ -1,172 +1,184 @@
 {
-    "name": "",
-    "title": "",
-    "description": "",
-    "id": "",
+    "name": "oep_oemetadata",
+    "title": "OEP OEMetadata",
+    "description": "A collection of tables for the OEMetadata examples.",
+    "id": "https://databus.openenergyplatform.org/oeplatform/reference",
     "resources": [
         {
-            "@id": "",
-            "@context": "",
-            "name": "",
+            "@id": "https://databus.openenergyplatform.org/oeplatform/supply/wri_global_power_plant_database/2022-11-07",
+            "@context": "https://raw.githubusercontent.com/OpenEnergyPlatform/oemetadata/production/metadata/latest/context.json",
+            "name": "oep_oemetadata_table_example",
             "topics": [
-                ""
+                "model_draft",
+                "reference"
             ],
-            "title": "",
-            "path": "",
-            "description": "",
+            "title": "OEP OEMetadata Example Table",
+            "path": "http://openenergyplatform.org/dataedit/view/model_draft/oep_oemetadata_table_example",
+            "description": "Example table used to illustrate the OEMetadata structure and meaning.",
             "languages": [
-                ""
+                "en-GB",
+                "de-DE"
             ],
             "subject": [
                 {
-                    "name": "",
-                    "path": ""
+                    "name": "energy",
+                    "path": "https://openenergy-platform.org/ontology/oeo/OEO_00000150"
                 }
             ],
             "keywords": [
-                ""
+                "example",
+                "ODbL-1.0",
+                "NFDI4Energy"
             ],
-            "publicationDate": "",
+            "publicationDate": "2024-10-15",
             "embargoPeriod": {
-                "start": "",
-                "end": "",
-                "isActive": null
+                "start": "2024-10-11",
+                "end": "2025-01-01",
+                "isActive": true
             },
             "context": {
-                "title": "",
-                "homepage": "",
-                "documentation": "",
-                "sourceCode": "",
-                "publisher": "",
-                "publisherLogo": "",
-                "contact": "",
-                "fundingAgency": "",
-                "fundingAgencyLogo": "",
-                "grantNo": ""
+                "title": "NFDI4Energy",
+                "homepage": "https://nfdi4energy.uol.de/",
+                "documentation": "https://nfdi4energy.uol.de/sites/about_us/",
+                "sourceCode": "https://github.com/NFDI4Energy",
+                "publisher": "Open Energy Platform (OEP)",
+                "publisherLogo": "https://github.com/OpenEnergyPlatform/organisation/blob/production/logo/OpenEnergyFamily_Logo_OpenEnergyPlatform.svg",
+                "contact": "contact@example.com",
+                "fundingAgency": " Deutsche Forschungsgemeinschaft (DFG)",
+                "fundingAgencyLogo": "https://upload.wikimedia.org/wikipedia/commons/8/86/DFG-logo-blau.svg",
+                "grantNo": "501865131"
             },
             "spatial": {
                 "location": {
-                    "address": "",
-                    "@id": "",
-                    "latitude": "",
-                    "longitude": ""
+                    "address": "Rudower Chaussee 12, 12489 Berlin",
+                    "@id": "https://www.wikidata.org/wiki/Q77077223",
+                    "latitude": "52.432822",
+                    "longitude": "13.5351004"
                 },
                 "extent": {
-                    "name": "",
-                    "@id": "",
-                    "resolutionValue": "",
-                    "resolutionUnit": "",
+                    "name": "Berlin",
+                    "@id": "https://www.wikidata.org/wiki/Q64",
+                    "resolutionValue": "100",
+                    "resolutionUnit": "m",
                     "boundingBox": [
-                        null
+                        13.08825,
+                        52.33859,
+                        13.76104,
+                        52.6754
                     ],
-                    "crs": ""
+                    "crs": "EPSG:4326"
                 }
             },
             "temporal": {
-                "referenceDate": "",
+                "referenceDate": "2020-01-01",
                 "timeseries": [
                     {
-                        "start": "",
-                        "end": "",
-                        "resolution": "",
-                        "alignment": "",
-                        "aggregationType": ""
+                        "start": "2020-01-01T00:00:00+00:00",
+                        "end": "2020-01-01T23:59:30+00:00",
+                        "resolution": "30 s",
+                        "alignment": "left",
+                        "aggregationType": "current"
                     }
                 ]
             },
             "sources": [
                 {
-                    "title": "",
+                    "title": "IPCC Sixth Assessment Report (AR6) - Climate Change 2023 - Synthesis Report",
                     "authors": [
-                        ""
+                        "Hoesung Lee",
+                        "José Romero",
+                        "The Core Writing Team"
                     ],
-                    "description": "",
-                    "publicationYear": "",
-                    "path": "",
+                    "description": "A Report of the Intergovernmental Panel on Climate Change.",
+                    "publicationYear": "2023",
+                    "path": "https://www.ipcc.ch/report/ar6/syr/downloads/report/IPCC_AR6_SYR_FullVolume.pdf",
                     "licenses": [
                         {
-                            "name": "",
-                            "title": "",
-                            "path": "",
-                            "instruction": "",
-                            "attribution": "",
-                            "copyrightStatement": ""
+                            "name": "ODbL-1.0",
+                            "title": "Open Data Commons Open Database License 1.0",
+                            "path": "https://opendatacommons.org/licenses/odbl/1-0/index.html",
+                            "instruction": "You are free to share and change, but you must attribute, and share derivations under the same license. See https://tldrlegal.com/license/odc-open-database-license-odbl for further information.",
+                            "attribution": "© Intergovernmental Panel on Climate Change 2023",
+                            "copyrightStatement": "https://www.ipcc.ch/copyright/"
                         }
                     ]
                 }
             ],
             "licenses": [
                 {
-                    "name": "ODbL-1.0",
-                    "title": "Open Data Commons Open Database License 1.0",
-                    "path": "https://opendatacommons.org/licenses/odbl/1-0/index.html",
-                    "instruction": "You are free to share and change, but you must attribute, and share derivations under the same license. See https://tldrlegal.com/license/odc-open-database-license-(odbl) for further information.",
-                    "attribution": "© Reiner Lemoine Institut"
+                    "name": "",
+                    "title": "",
+                    "path": "",
+                    "instruction": "",
+                    "attribution": ""
                 }
             ],
             "contributors": [
                 {
-                    "title": "",
-                    "path": "",
-                    "organization": "",
+                    "title": "Ludwig Hülk",
+                    "path": "https://github.com/Ludee",
+                    "organization": "Reiner Lemoine Institut",
                     "roles": [
-                        ""
+                        "creator",
+                        "dataCurator"
                     ],
-                    "date": "",
-                    "object": "",
-                    "comment": ""
+                    "date": "2024-10-21",
+                    "object": "data and metadata",
+                    "comment": "Add general context."
                 }
             ],
-            "type": "",
-            "format": "",
-            "encoding": "",
+            "type": "table",
+            "format": "CSV",
+            "encoding": "UTF-8",
             "schema": {
                 "fields": [
                     {
-                        "name": "",
-                        "description": "",
-                        "type": "",
-                        "nullable": null,
-                        "unit": "",
+                        "name": "year",
+                        "description": "Reference year for which the data was collected.",
+                        "type": "geometry(Point, 4326)",
+                        "nullable": true,
+                        "unit": "MW",
                         "isAbout": [
                             {
-                                "name": "",
-                                "path": ""
+                                "name": "wind energy converting unit",
+                                "path": "https://openenergyplatform.org/ontology/oeo/OEO_00000044"
                             }
                         ],
                         "valueReference": [
                             {
-                                "value": "",
-                                "name": "",
-                                "path": ""
+                                "value": "onshore",
+                                "name": "onshore wind farm",
+                                "path": "https://openenergyplatform.org/ontology/oeo/OEO_00000311"
                             }
                         ]
                     }
                 ],
                 "primaryKey": [
-                    ""
+                    "id"
                 ],
                 "foreignKeys": [
                     {
                         "fields": [
-                            ""
+                            "id",
+                            "version"
                         ],
                         "reference": {
-                            "resource": "",
+                            "resource": "model_draft.oep_oemetadata_table_example_version",
                             "fields": [
-                                ""
+                                "id",
+                                "version"
                             ]
                         }
                     }
                 ]
             },
             "dialect": {
-                "delimiter": "",
-                "decimalSeparator": ""
+                "delimiter": ";",
+                "decimalSeparator": "."
             },
             "review": {
-                "path": "",
-                "badge": ""
+                "path": "https://www.example.com",
+                "badge": "Platinum"
             }
         }
     ],
diff --git a/metadata/v20/v20/schema.json b/metadata/v20/v20/schema.json
index 0e67ea0..66ca378 100644
--- a/metadata/v20/v20/schema.json
+++ b/metadata/v20/v20/schema.json
@@ -1030,7 +1030,6 @@
               "null"
             ],
             "examples": [
-              "PostgreSQL",
               "CSV"
             ],
             "badge": "Gold",
@@ -1386,10 +1385,6 @@
                   "null"
                 ],
                 "examples": [
-                  "Iron",
-                  "Bronze",
-                  "Silver",
-                  "Gold",
                   "Platinum"
                 ],
                 "badge": null,
diff --git a/metadata/v20/v20/template.json b/metadata/v20/v20/template.json
index a122c84..90295c0 100644
--- a/metadata/v20/v20/template.json
+++ b/metadata/v20/v20/template.json
@@ -57,8 +57,11 @@
           "resolutionValue": "",
           "resolutionUnit": "",
           "boundingBox": [
-            0
-          ],
+                13.08825,
+                52.33859,
+                13.76104,
+                52.6754
+            ],
           "crs": ""
         }
       },
@@ -170,12 +173,12 @@
       }
     }
   ],
-  "metaMetadata": {
-    "metadataVersion": "",
-    "metadataLicense": {
-      "name": "",
-      "title": "",
-      "path": ""
+    "metaMetadata": {
+        "metadataVersion": "OEMetadata-2.0.1",
+        "metadataLicense": {
+            "name": "CC0-1.0",
+            "title": "Creative Commons Zero v1.0 Universal",
+            "path": "https://creativecommons.org/publicdomain/zero/1.0"
+        }
     }
-  }
 }
\ No newline at end of file