diff --git a/docs/Earthfile b/docs/Earthfile index 2947909ec..9a8d79276 100644 --- a/docs/Earthfile +++ b/docs/Earthfile @@ -1,6 +1,7 @@ VERSION 0.8 IMPORT ../earthly/docs AS docs-ci +IMPORT ../utilities/cql-to-d2 AS cql-to-d2-ci IMPORT .. AS cat-ci IMPORT ../examples/postgresql AS postgresql-ci @@ -12,6 +13,10 @@ src: # Now copy into that any artifacts we pull from the builds. COPY --dir cat-ci+repo-docs/repo includes + # Copy D2 contents to display in the docs. + COPY cql-to-d2-ci+src/tests/input/test_1.cql src/appendix/examples/diagrams/sample_d2.cql + COPY cql-to-d2-ci+src/tests/expected_output/test_1.d2 src/appendix/examples/diagrams/sample_d2.d2 + # Copy docs we build in the postgres example. COPY --dir postgresql-ci+build/docs src/appendix/examples/built_docs/postgresql diff --git a/docs/src/appendix/examples/d2-diagrams.md b/docs/src/appendix/examples/d2-diagrams.md new file mode 100644 index 000000000..fbf98168c --- /dev/null +++ b/docs/src/appendix/examples/d2-diagrams.md @@ -0,0 +1,39 @@ +--- +icon: material/draw +--- + +# Converting CQL to D2 + +This is the guide how to use the earthly target +to convert a CQL schema file into D2 diagram entity. + +Following is the sample of using the target: + +```earthly +VERSION 0.8 + +IMPORT utilities/cql-to-d2 AS cql-to-d2-utils + +example: + FROM scratch + + COPY . . + + COPY (+cql-to-d2/diagrams --input="./input") ./output + + RUN ls ./output +``` + +## Converting result sample + +This is the sample valid CQL schema code: + +```cql +{{ include_file('src/appendix/examples/diagrams/sample_d2.cql') }} +``` + +Resulted in D2: + +```d2 +{{ include_file('src/appendix/examples/diagrams/sample_d2.d2') }} +``` diff --git a/earthly/cassandra/Earthfile b/earthly/cassandra/Earthfile new file mode 100644 index 000000000..8faa64c8b --- /dev/null +++ b/earthly/cassandra/Earthfile @@ -0,0 +1,37 @@ +# cspell: words scylladb ensurepath + +VERSION 0.8 + +IMPORT ../../utilities/cql-to-d2 AS cql-to-d2-utils + +scylladb-base: + FROM scylladb/scylla:6.1.1 + + WORKDIR /root + + RUN apt-get update && apt-get install -y python3 pipx + + RUN pipx ensurepath + RUN pipx install poetry + +# cql-to-d2 - converts cql files into d2 diagram entity files +cql-to-d2: + FROM +scylladb-base + + ARG --required input + + COPY cql-to-d2-utils+src/main.py . + + COPY $input ./src + + RUN python3 main.py ./src ./diagrams + + SAVE ARTIFACT ./diagrams + +CQL_TO_D2: + FUNCTION + + ARG --required input + ARG --required output + + COPY (+cql-to-d2/diagrams --input=$input) $output \ No newline at end of file diff --git a/utilities/cql-to-d2/Earthfile b/utilities/cql-to-d2/Earthfile new file mode 100644 index 000000000..256346b46 --- /dev/null +++ b/utilities/cql-to-d2/Earthfile @@ -0,0 +1,26 @@ +VERSION 0.8 + +IMPORT github.com/input-output-hk/catalyst-ci/earthly/python:v3.1.7 AS python-ci + +check: + FROM python-ci+python-base + + COPY . . + + DO python-ci+CHECK + +test: + FROM python-ci+python-base + + COPY . . + + RUN python3 main.py tests/input tests/output + RUN cmp -s tests/expected_output/test_1.d2 tests/output/test_1.d2 && echo "Results are identical." || { echo "Results are different."; exit 1; } + +src: + FROM scratch + + COPY . . + + SAVE ARTIFACT tests + SAVE ARTIFACT main.py \ No newline at end of file diff --git a/utilities/cql-to-d2/README.md b/utilities/cql-to-d2/README.md new file mode 100644 index 000000000..f06f84e95 --- /dev/null +++ b/utilities/cql-to-d2/README.md @@ -0,0 +1,39 @@ +# Cassandra Schema to D2 Diagram Converter + +Converts Cassandra schemas to D2 diagram entity `sql_table`. +The program accepts two arguments `` and ``. +So it reads the whole directory. +The files with `.cql` extension will be read. +And transform individually into the D2 diagram entity, `.d2` extension file. +If the `` does not exist, +then the directory will be created automatically. + +## How to use it as a CLI + +```bash +python3 main.py +``` + +## How to use it as an Earthly target + +You can simply refer the target to `earthly/cassandra` in this repository. +The target is `cql-to-d2`. +Make sure you include the required arguments. +After using the target, +you can save the artifact (output) according to your output path. + +```earthly +COPY (+cql-to-d2/ --input="./" --output="./") ./ +``` + +And include this line to your target. + +## A valid CQL file and limitations + +* Make sure that a CQL file is fundamentally syntactically correct. +* Only unquoted name is supported. +* Secondary index is not supported. +* User defined type (UDT) is not supported. +* One table per one CQL file. +* Items inside `PRIMARY KEY` must not be empty. +* In-line primary key is not supported. diff --git a/utilities/cql-to-d2/main.py b/utilities/cql-to-d2/main.py new file mode 100644 index 000000000..1fbd3c122 --- /dev/null +++ b/utilities/cql-to-d2/main.py @@ -0,0 +1,316 @@ +# cspell: words timeuuid tinyint + +import os +import re +import sys +from enum import Enum +from pathlib import Path + +RE_PARENS = r"\((.*?)\)" +RE_GENERIC = r"<(.*)>" +RE_COMMAS = r",\s*" +RE_SPACES = r"\s+" + +PRIMITIVE_TYPES = [ + "ascii", + "bigint", + "blob", + "boolean", + "date", + "decimal", + "double", + "float", + "inet", + "int", + "smallint", + "text", + "time", + "timestamp", + "timeuuid", + "tinyint", + "uuid", + "varchar", + "varint", +] + +DataContainerType = Enum( + "DataContainerType", ["NONE", "LIST", "MAP", "SET", "TUPLE", "UDT"] +) + + +class Table: + """Represents a single table object, typically for a single CQL file.""" + + def __init__(self, file_name: str): + self.file_name = file_name + self.name = "" + self.desc = "" + self.fields: list[Field] = [] + self.clustering_keys: list[str] = [] + self.asc_keys: list[str] = [] + self.desc_keys: list[str] = [] + + def alter_clustering_order(self, col_name: str, desc: bool): + if desc and col_name in self.asc_keys: + self.asc_keys.remove(col_name) + self.desc_keys.append(col_name) + if not desc and col_name in self.desc_keys: + self.desc_keys.remove(col_name) + self.asc_keys.append(col_name) + + def to_d2_format(self) -> str: + # format tooltip + f_tooltip_lines: list[str] = [] + if self.desc: + f_tooltip_lines.append(f"-- {self.desc}\n") + + # format fields + f_field_lines: list[str] = [] + for field in self.fields: + if field.is_only_comment(): + f_tooltip_lines.append(f"-- {field.comment}") + continue + + constraint_keys: list[str] = [] + + if field.name in self.clustering_keys: + constraint_keys.append("K") + if field.name in self.asc_keys: + constraint_keys.append("P↑") + if field.name in self.desc_keys: + constraint_keys.append("P↓") + + f_field_lines.append(field.to_d2_format(constraint_keys)) + + if field.comment != "": + f_tooltip_lines.append(f"{field.name} -- {field.comment}") + + return "\n".join( + [ + self.name + ": {", + "\tshape: sql_table", + "\ttooltip: |md", + "\n".join([f"\t\t{li}" for li in f_tooltip_lines]), + "\t|", + "", + "\n".join(f_field_lines), + "}", + ] + ) + + +class Field: + """Represents a field inside a table.""" + + def __init__(self) -> None: + self.name = "" + self.types: list[str] = [] + self.container_type = DataContainerType.NONE + self.comment = "" + self.is_static = False + self.is_counter = False + + def is_only_comment(self): + return self.name == "" or ( + len(self.types) == 0 and not self.is_counter + ) + + def to_d2_format(self, constraint_keys: list[str]) -> str: + if self.is_static: + constraint_keys.append("S") + if self.is_counter: + constraint_keys.append("++") + self.types.append("bigint") + + # format constraints + f_constraints = ( + " {constraint: [" + "; ".join(constraint_keys) + "]}" + if len(constraint_keys) + else "" + ) + + # check for udt + if ( + len(self.types) == 1 + and self.container_type == DataContainerType.NONE + and self.types[0] not in PRIMITIVE_TYPES + ): + self.container_type = DataContainerType.UDT + + # format col name according to its type container + f_name = self.name + if self.container_type == DataContainerType.LIST: + f_name = f'"[{self.name}]"' + if self.container_type == DataContainerType.SET: + f_name = '"{' + self.name + '}"' + if self.container_type == DataContainerType.MAP: + f_name = f'"<{self.name}>"' + if self.container_type == DataContainerType.TUPLE: + f_name = f'"({self.name})"' + if self.container_type == DataContainerType.UDT: + f_name = f'"*{self.name}*"' + + return f"\t{f_name}: ({', '.join(self.types)})" + f_constraints + + +def str_to_container_type(s: str) -> DataContainerType: + try: + return DataContainerType[s.upper()] + except KeyError: + return DataContainerType.NONE + + +def parse_src(src_dir: str) -> list[Table]: + """Reads the target directory and parses all the CQL files.""" + + if not os.path.isdir(src_dir): + raise Exception(f"'{src_dir}' is not a directory.") + + return [ + parse_file(os.path.join(src_dir, f)) + for f in os.listdir(src_dir) + if os.path.isfile(os.path.join(src_dir, f)) and f.endswith(".cql") + ] + + +def parse_file(file_path: str) -> Table: + """Reads a CQL file and parses the file.""" + + table = Table(extract_filename_without_ext(file_path)) + + with open(file_path) as f: + lines = f.readlines() + for line in lines: + if line.strip() == "": + continue + + # table description + if table.name == "" and line.startswith("--"): + table.desc += ( + table.desc + + ("" if table.desc == "" else " ") + + line[2:].strip() + ) + # table name + elif table.name == "" and "CREATE TABLE" in line: + tokens = [x for x in re.split(RE_SPACES, line) if x] + table.name = tokens[-2] + # table body + elif table.name != "" and not line.startswith(")"): + tokens = re.split(RE_SPACES, line.strip()) + + if len(tokens) == 0: + continue + + # primary definition line + if tokens[0] == "PRIMARY": + pk_str = re.findall(RE_PARENS, line.strip()) + partition_key_str = re.findall(RE_PARENS, pk_str[0]) + indexed_names = re.split(RE_COMMAS, pk_str[0]) + + if len(partition_key_str): + table.clustering_keys = re.split( + RE_COMMAS, partition_key_str[0] + ) + table.asc_keys = indexed_names[ + len(table.clustering_keys) : + ] + else: + table.clustering_keys = indexed_names[0] + table.asc_keys = indexed_names[1:] + # data column definition line + else: + field = Field() + + # get field name and type + comment_idx: None | int = None + type_tokens: list[str] = [] + for i, token in enumerate(tokens): + if token == "--": + comment_idx = i + break + elif i == 0: + field.name = token + else: + type_tokens.append(token) + + # join type tokens + type_str = re.sub(r",$", "", " ".join(type_tokens)) + generics_items: list[str] = re.findall( + RE_GENERIC, type_str + ) + + if type_str.endswith(" static"): + field.is_static = True + type_str = type_str.replace(" static", "") + if type_str.startswith("counter"): + field.is_counter = True + type_str = type_str.replace("counter", "") + + if len(generics_items) > 0: + field.container_type = str_to_container_type( + type_str.split("<")[0] + ) + field.types = re.split(RE_COMMAS, generics_items[0]) + else: + field.types = [] if type_str == "" else [type_str] + + # join comments + comment_tokens: list[str] = [] + if comment_idx is not None: + comment_tokens = tokens[(comment_idx + 1) :] + + field.comment = " ".join(comment_tokens) + + # add to table + table.fields.append(field) + # table options + elif table.name != "" and line.startswith(")"): + ordering_str: list[str] = re.findall(RE_PARENS, line.strip()) + + if len(ordering_str): + ordering_items: list[str] = re.split( + RE_COMMAS, ordering_str[0] + ) + + for item in ordering_items: + [col_name, ordering_type] = re.split(RE_SPACES, item) + + if ordering_type == "ASC": + table.alter_clustering_order(col_name, False) + elif ordering_type == "DESC": + table.alter_clustering_order(col_name, True) + + return table + + +def extract_filename_without_ext(path: str) -> str: + base_name = os.path.basename(path) + file_name, _ = os.path.splitext(base_name) + return file_name + + +def write_to_file(dir_path: str, file_name: str, content: str): + Path(dir_path).mkdir(parents=True, exist_ok=True) + + with open(f"{dir_path}/{file_name}.d2", "w") as file: + file.write(content) + + +def main(): + if len(sys.argv) != 3: + raise Exception("Requires and to execute.") + + [_, src_dir, out_dir] = sys.argv + + abs_src_dir = os.path.abspath(src_dir) + abs_out_dir = os.path.abspath(out_dir) + + tables = parse_src(abs_src_dir) + + for table in tables: + write_to_file(abs_out_dir, table.file_name, table.to_d2_format()) + + +if __name__ == "__main__": + main() diff --git a/utilities/cql-to-d2/poetry.lock b/utilities/cql-to-d2/poetry.lock new file mode 100644 index 000000000..e267e37a3 --- /dev/null +++ b/utilities/cql-to-d2/poetry.lock @@ -0,0 +1,32 @@ +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "ruff" +version = "0.1.15" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"}, + {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"}, + {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"}, + {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"}, + {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.12.4" +content-hash = "b53ca02d26931e705c6098bde02af032f8e4637caf817a9b26f6ecad688c7eb6" diff --git a/utilities/cql-to-d2/pyproject.toml b/utilities/cql-to-d2/pyproject.toml new file mode 100644 index 000000000..839cdbeef --- /dev/null +++ b/utilities/cql-to-d2/pyproject.toml @@ -0,0 +1,34 @@ +[tool.poetry] +name = "cql-to-d2" +version = "0.1.0" +description = "A CQL schema coverter to D2 diagram" +authors = ["Catalyst Team"] +license = "MIT" +readme = "README.md" + + +[tool.poetry.dependencies] +python = "^3.12.4" + +[tool.poetry.group.dev.dependencies] +ruff = "^0.1.14" + +[tool.ruff] +line-length = 79 + +[tool.ruff.lint] +select = [ + "E", # pycodestyle + "F", # Pyflakes + "UP", # pyupgrade + "B", # flake8-bugbear + "SIM", # flake8-simplify + "I", # isort +] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/utilities/cql-to-d2/tests/expected_output/test_1.d2 b/utilities/cql-to-d2/tests/expected_output/test_1.d2 new file mode 100644 index 000000000..ca8bad2f5 --- /dev/null +++ b/utilities/cql-to-d2/tests/expected_output/test_1.d2 @@ -0,0 +1,19 @@ +sample_table: { + shape: sql_table + tooltip: |md + + | + + column_name_1: (int) {constraint: [K]} + column_name_2: (int) {constraint: [P↑]} + column_name_3: (int) {constraint: [P↓]} + column_name_4: (text) {constraint: [S]} + column_name_5: (int) + column_name_6: (bigint) {constraint: [++]} + "[column_name_7]": (int) + "{column_name_8}": (int) + "": (int) + "*column_name_10*": (custom_int) + "(column_name_11)": (int, set) + column_name_12: (int) +} \ No newline at end of file diff --git a/utilities/cql-to-d2/tests/input/test_1.cql b/utilities/cql-to-d2/tests/input/test_1.cql new file mode 100644 index 000000000..bf89d226b --- /dev/null +++ b/utilities/cql-to-d2/tests/input/test_1.cql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS sample_table ( + column_name_1 int, + column_name_2 int, + column_name_3 int, + column_name_4 text static, + column_name_5 int, + column_name_6 counter, + column_name_7 list, + column_name_8 set, + column_name_9 map, + column_name_10 custom_int, + column_name_11 tuple>, + column_name_12 int, + + PRIMARY KEY (column_name_1, column_name_2, column_name_3) +) WITH CLUSTERING ORDER BY (column_name_3 DESC); \ No newline at end of file