Skip to content

Commit

Permalink
Merge pull request #155 from linkml/dmbl_importer
Browse files Browse the repository at this point in the history
add stub of a dbml importer
  • Loading branch information
sierra-moxon authored Jan 15, 2025
2 parents 2ae9ba2 + 3b952f0 commit 3d1ecfd
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 8 deletions.
7 changes: 7 additions & 0 deletions docs/packages/importers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ NCI implements a JSON serialization of ISO-11197. You can import this JSON and c
schemauto import-cadsr "cdes/*.json"
Importing from DBML
--------------------

DBML is a simple DSL for defining database schemas. It is a subset of SQL DDL.



Packages for importing
----------------------

Expand Down
26 changes: 18 additions & 8 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ click-default-group = "^1.2.4"
linkml-runtime = "^1.7.2"
duckdb = "^0.10.1"
numpy = "<2.0"
pydbml = "^1.1.2"

[tool.poetry.dev-dependencies]
pytest = ">=7.1.1"
Expand Down
95 changes: 95 additions & 0 deletions schema_automator/importers/dbml_import_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from schema_automator.importers.import_engine import ImportEngine
from pydbml import PyDBML
from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition, SlotDefinition
from dataclasses import dataclass


def _map_dbml_type_to_linkml(dbml_type: str) -> str:
"""
Maps DBML data types to LinkML types.
:param dbml_type: The DBML column type.
:return: Corresponding LinkML type.
"""
type_mapping = {
"int": "integer",
"varchar": "string",
"text": "string",
"float": "float",
"boolean": "boolean",
"date": "date",
"datetime": "datetime",
}
return type_mapping.get(dbml_type.lower(), "string")


@dataclass
class DbmlImportEngine(ImportEngine):
"""
An ImportEngine that introspects a DBML schema to determine a corresponding LinkML schema.
"""

def convert(
self,
file: str,
name: str = None,
model_uri: str = None,
identifier: str = None,
**kwargs
) -> SchemaDefinition:
"""
Converts a DBML schema file into a LinkML SchemaDefinition.
:param file: Path to the DBML schema file.
:param name: Optional name for the generated LinkML schema.
:param model_uri: Optional URI for the schema.
:param identifier: Identifier field for the schema.
:return: SchemaDefinition object representing the DBML schema.
"""
# Initialize the schema definition
schema_name = name or "GeneratedSchema"
schema = SchemaDefinition(name=schema_name, id=model_uri or f"https://example.org/{schema_name}")

# Parse the DBML file
with open(file, 'r', encoding='utf-8') as f:
dbml_content = f.read()
parsed_dbml = PyDBML(dbml_content)

# Process tables
for table in parsed_dbml.tables:
class_def = ClassDefinition(
name=table.name,
description=table.note or f"Auto-generated class for table '{table.name}'",
slots=[],
unique_keys=[], # Initialize unique keys property
)
processed_slots = set() # Track processed slot names to avoid duplicates

# Handle primary key and unique constraints
primary_key_columns = [col for col in table.columns if col.pk]
unique_columns = [col for col in table.columns if col.unique and not col.pk]

# Process columns
for column in table.columns:

slot_name = column.name
slot_def = SlotDefinition(
name=slot_name,
range=_map_dbml_type_to_linkml(column.type),
description=column.note or f"Column '{slot_name}'",
required=column in primary_key_columns or column.unique,
identifier=column in primary_key_columns, # Mark primary key columns as identifiers
)
schema.slots[slot_name] = slot_def
class_def.slots.append(slot_name)
processed_slots.add(slot_name)

# Handle single unique column as primary key if no explicit primary key exists
if not primary_key_columns and len(unique_columns) == 1:
unique_column = unique_columns[0]
schema.slots[unique_column.name].identifier = True
schema.slots[unique_column.name].required = True

schema.classes[table.name] = class_def

return schema
72 changes: 72 additions & 0 deletions tests/test_importers/test_dbml_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import pytest
from linkml_runtime.linkml_model import SchemaDefinition
from schema_automator.importers.dbml_import_engine import DbmlImportEngine

# Sample DBML content for testing
DBML_SAMPLE = """
Table Users {
id int [primary key, not null]
email varchar [unique, not null]
username varchar
}
Table Orders {
order_id int [not null]
user_id int [not null]
product_id int [not null]
quantity int
}
Table Countries {
code varchar [primary key, not null]
name varchar [not null]
}
"""

@pytest.fixture
def dbml_file(tmp_path):
"""
Fixture to create a temporary DBML file.
"""
dbml_path = tmp_path / "test.dbml"
dbml_path.write_text(DBML_SAMPLE)
print(dbml_path)
return dbml_path

@pytest.fixture
def importer():
"""
Fixture to initialize the DbmlImportEngine.
"""
return DbmlImportEngine()

def test_dbml_to_linkml_conversion(dbml_file, importer):
"""
Test the basic conversion of DBML to a LinkML schema.
"""
schema = importer.convert(file=str(dbml_file), name="TestSchema")

# Assert the schema object is created
assert isinstance(schema, SchemaDefinition)

# Check that expected classes are present
assert "Users" in schema.classes
assert "Orders" in schema.classes

# Check that expected slots are present
assert "id" in schema.slots
assert schema.slots["id"].identifier
assert schema.slots["id"].required


def test_primary_key_handling(dbml_file, importer):
"""
Test correct handling of primary keys and required attributes.
"""
schema = importer.convert(file=str(dbml_file), name="TestSchema")

# Check that primary keys are marked as required and identifiers
users_class = schema.classes["Users"]
assert "id" in users_class.slots
assert schema.slots["id"].identifier
assert schema.slots["id"].required

0 comments on commit 3d1ecfd

Please sign in to comment.