Skip to content

Commit

Permalink
Update scripts for example and tests #210
Browse files Browse the repository at this point in the history
  • Loading branch information
Ludee committed Nov 8, 2024
1 parent e6959dc commit 5567c9c
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 116 deletions.
2 changes: 1 addition & 1 deletion metadata/v20/v20/build_source/schemas/fields.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"format": {
"description": "A file extension format. Possible options are 'csv', 'xlsx', 'json', 'PostgreSQL', 'SQLite' and other standard file extensions.",
"type": ["string", "null"],
"examples": ["PostgreSQL", "CSV"],
"examples": ["CSV"],
"badge": "Gold",
"title": "Format",
"options": {
Expand Down
2 changes: 1 addition & 1 deletion metadata/v20/v20/build_source/schemas/review.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"badge": {
"description": "A badge of either Iron, Bronze, Silver, Gold or Platinum is used to label the quality of the metadata.",
"type": ["string", "null"],
"examples": ["Iron","Bronze","Silver","Gold","Platinum"],
"examples": ["Platinum"],
"badge": null,
"title": "Badge"
}
Expand Down
122 changes: 111 additions & 11 deletions metadata/v20/v20/build_source/scripts/generate_example_from_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@

import json
import logging
import os

from typing import Any, Dict, Union, List

# from datetime import datetime
from pathlib import Path

from settings import RESOLVED_SCHEMA_FILE_NAME, EXAMPLE_PATH, LOG_FORMAT

# Configuration
Expand All @@ -32,18 +30,62 @@ def read_schema(filepath: str) -> Dict[str, Any]:
"""Read a JSON schema from a file.
Args:
filename (str): The path to the JSON schema file.
filepath (str): The path to the JSON schema file.
Returns:
Dict[str, Any]: The JSON schema as a dictionary.
"""

with open(filepath, "r", encoding="utf-8") as file:
schema = json.load(file)
print(f"Processing schema: {schema}")
return schema


def generate_example(
def read_metadata_schema(filepath: str) -> Dict[str, Any]:
"""Read a JSON schema from a file.
Args:
filepath (str): The path to the JSON schema file.
Returns:
Dict[str, Any]: The JSON schema as a dictionary.
"""
if not os.path.exists(filepath):
print(f"Error: File '{filepath}' does not exist.")
return {}

try:
with open(filepath, "r", encoding="utf-8") as file:
schema = json.load(file)

# Basic validation of schema structure
if not isinstance(schema, dict):
print("Error: Schema is not a dictionary. Check the schema format.")
return {}

print(f"Schema loaded successfully from {filepath}")
print(f"Schema top-level keys: {list(schema.keys())}")

# Additional debugging info: Check expected keys
if "$schema" not in schema or "type" not in schema:
print(
"Warning: Schema may be missing key fields like '$schema' or 'type'.")

print(
f"Full schema content (trimmed for large files): {str(schema)[:500]}...")

return schema

except json.JSONDecodeError as e:
print(f"Error reading JSON: {e}")
return {}
except Exception as e:
print(f"An unexpected error occurred while reading the schema: {e}")
return {}


def generate_example_old(
schema: Dict[str, Any]
) -> Union[Dict[str, Any], List[Any], str, None]:
"""Generate a JSON object from the schema using the
Expand Down Expand Up @@ -91,7 +133,52 @@ def generate_example(
return None


def generate_json_from_schema(schema_file: str) -> Dict[str, Any]:
def extract_examples_from_schema(schema: Dict[str, Any]) -> Union[
Dict[str, Any], List[Any], str, None]:
"""Generate a valid example from the schema using the provided example values."""

# If the schema has an "examples" field, handle it appropriately
if "examples" in schema:
examples = schema["examples"]
if isinstance(examples, list):
# Return a single value if the list contains one item
if len(examples) == 1:
return examples[0]
return examples # If multiple items, return the whole list
return examples # If it's a single item, return the value

# If the schema type is an object, process each property recursively
schema_type = schema.get("type")
if isinstance(schema_type, list):
schema_type = schema_type[0]

if schema_type == "object":
example_object = {}
properties = schema.get("properties", {})
for key, value in properties.items():
example_object[key] = extract_examples_from_schema(value)
return example_object

# If the schema type is an array, process the items recursively
elif schema_type == "array":
items = schema.get("items", {})
example = extract_examples_from_schema(items)
return [example] if not isinstance(example, list) else example

# Handle basic types like string, number, boolean, null
elif schema_type == "string":
return "" # Example string
elif schema_type == "number":
return 0 # Example number
elif schema_type == "boolean":
return True # Example boolean
elif schema_type == "null":
return None # Example null

return None # Default fallback


def create_json_from_schema(schema_file: str) -> Dict[str, Any]:
"""Generate a JSON object that conforms to the schema read from a file.
Args:
Expand All @@ -100,8 +187,10 @@ def generate_json_from_schema(schema_file: str) -> Dict[str, Any]:
Returns:
Dict[str, Any]: A JSON object generated from the schema.
"""
schema = read_schema(schema_file)
return generate_example(schema)
schema = read_metadata_schema(schema_file)
print(f"Create JSON from schema: {schema_file}")
return extract_examples_from_schema(schema)
print(f"Create JSON object: {result}")


def save_json(data: Dict[str, Any], filename: Path) -> None:
Expand All @@ -116,10 +205,21 @@ def save_json(data: Dict[str, Any], filename: Path) -> None:

logger.info(f"example JSON generated and saved to {filename}")

def test_oemetadata_schema_should_validate_oemetadata_example(example):
from jsonschema import validate, ValidationError
from metadata.v20.v20.schema import OEMETADATA_V20_SCHEMA

try:
validate(example, OEMETADATA_V20_SCHEMA)
print("OEMetadata Example is valid OEMetadata Schema (v2.0).")
except ValidationError as e:
print("Cannot validate OEMetadata Example with Schema (v2.0)!", e)


if __name__ == "__main__":
logger.info("Generation started.")
logger.info("Create OEMetadata Example from Schema.")
schema_filename = RESOLVED_SCHEMA_FILE_NAME
json_data = generate_json_from_schema(schema_filename)
json_data = create_json_from_schema(schema_filename)
save_json(json_data, EXAMPLE_PATH)
logger.info("Generation ended.")
logger.info("OEMetadata Example created!")
test_oemetadata_schema_should_validate_oemetadata_example(json_data)
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,22 @@ def main():

logger.info(f"template JSON generated and saved to {template_file_path}")

# WARNING: The metaMetadata is missing and the boundingBox is wrong!

def test_oemetadata_schema_should_validate_oemetadata_template():
from jsonschema import validate, ValidationError
from metadata.v20.v20.template import OEMETADATA_V20_TEMPLATE
from metadata.v20.v20.schema import OEMETADATA_V20_SCHEMA

try:
validate(OEMETADATA_V20_TEMPLATE, OEMETADATA_V20_SCHEMA)
print("OEMetadata Template is valid OEMetadata Schema (v2.0).")
except ValidationError as e:
print("Cannot validate OEMetadata Template with Schema (v2.0)!", e)


if __name__ == "__main__":
logger.info("Generation started.")
main()
test_oemetadata_schema_should_validate_oemetadata_template()
logger.info("Generation ended.")
Loading

0 comments on commit 5567c9c

Please sign in to comment.