Skip to content

Commit

Permalink
feat(create-cart-diagram): add create-cart-diagram pipelien
Browse files Browse the repository at this point in the history
  • Loading branch information
yannforget committed Nov 3, 2024
1 parent 73deb2e commit 1ee941e
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/push-create-cart-diagram-meg.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Push create-cart-diagram (meghalaya)

on:
push:
paths:
- ".github/workflows/push-create-cart-diagram-meg.yml"
- "create-cart-diagram/**"

jobs:
deploy:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v2

- uses: actions/setup-python@v2
with:
python-version: "3.11"

- name: Configure OpenHEXA CLI
uses: blsq/openhexa-cli-action@v1
with:
workspace: "pathways-meghalaya-558459"
token: ${{ secrets.OH_TOKEN_MEG }}
- name: Push pipeline to OpenHEXA
run: |
openhexa pipelines push create-cart-diagram \
-n ${{ github.sha }} \
-l "https://github.com/BLSQ/pathways-typing-pipelines/commit/${{ github.sha }}" \
--yes
151 changes: 151 additions & 0 deletions create-cart-diagram/pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import json
from datetime import datetime
from pathlib import Path
from typing import Optional

from openhexa.sdk import Dataset, current_run, parameter, pipeline, workspace
from pathways.typing.mermaid import cart_diagram
from pathways.typing.tree import build_binary_tree, merge_trees


@pipeline("create-cart-diagram", name="Create CART diagram")
@parameter(
"cart_outputs",
name="CART outputs",
help="OpenHEXA dataset containing JSON CART outputs",
type=Dataset,
required=True,
)
@parameter(
"version_name",
name="Dataset version",
help="You can optionally specify the dataset version to use. If not specified, the latest version will be used.",
type=str,
required=False,
)
@parameter(
"output_dir",
name="Output directory",
help="If not specified, outputs will be saved into `workspace/typing/data/output/cart_diagram`",
type=str,
required=False,
)
def create_cart_diagram(
cart_outputs: Dataset, version_name: Optional[str], output_dir: Optional[str]
):
"""Create a CART diagram from CART outputs."""

data = load_dataset(dataset=cart_outputs, version_name=version_name)

if output_dir:
output_dir = Path(workspace.files_path, output_dir)
else:
output_dir = Path(
workspace.files_path,
"typing",
"data",
"output",
"cart_diagram",
data["version"],
datetime.now().strftime("%Y-%m-%d_%H:%M:%S"),
)

generate_diagram(
urban_cart=data["urban"],
rural_cart=data["rural"],
output_dir=output_dir,
version_name=data["version"],
)


@create_cart_diagram.task
def load_dataset(dataset: Dataset, version_name: str | None = None) -> dict:
"""Load urban and rural JSON files from dataset.
Parameters
----------
dataset : Dataset
The dataset containing the urban and rural JSON files.
version_name : str, optional
The name of the dataset version to use. If not specified, the latest version is used.
Return
------
dict
A dictionary containing the urban and rural JSON files (with `urban` and `rural` keys).
"""
ds: Dataset = None

# if a dataset version has been specified, use it
# use the latest dataset version by default
if version_name:
for version in dataset.versions:
if version.name == version_name:
ds = version
break

if ds is None:
msg = f"Dataset version `{version_name}` not found"
current_run.log_error(msg)
raise FileNotFoundError(msg)

else:
ds = dataset.latest_version

# load urban & rural json files from dataset
urban: list[dict] = None
rural: list[dict] = None
for f in ds.files:
if f.filename == "urban_frame.json":
urban = json.loads(f.read().decode())
if f.filename == "rural_frame.json":
rural = json.loads(f.read().decode())

if urban is None:
msg = "Urban JSON file not found in dataset"
current_run.log_error(msg)
raise FileNotFoundError(msg)
if rural is None:
msg = "Rural JSON file not found in dataset"
current_run.log_error(msg)
raise FileNotFoundError(msg)

return {"urban": urban, "rural": rural, "version": ds.name}


@create_cart_diagram.task
def generate_diagram(
urban_cart: list[dict], rural_cart: list[dict], output_dir: Path, version_name: str
):
"""Generate a mermaid diagram from urban and rural CART outputs.
Both trees are merged into a single tree before generating the diagram.
Parameters
----------
urban_cart : list[dict]
The urban CART output (nodes as list of dicts)
rural_cart : list[dict]
The rural CART output (nodes as list of dicts)
output_dir : Path
The output directory to save the diagram
version_name : str
The name of the dataset version
"""
urban = build_binary_tree(urban_cart, strata="urban")
current_run.log_info(f"Loaded urban CART ({len(urban)} nodes)")
rural = build_binary_tree(rural_cart, strata="rural")
current_run.log_info(f"Loaded rural CART ({len(rural)} nodes)")
root = merge_trees(urban, rural)
current_run.log_info("Merged urban and rural CARTs")

mermaid = cart_diagram(root)
current_run.log_info(
f"Generated CART mermaid diagram ({len(mermaid.split('\n'))} lines)"
)

fp = output_dir / f"{version_name}_diagram.txt"
with open(fp, "w") as f:
f.write(mermaid)

current_run.add_file_output(fp.absolute().as_posix())
1 change: 1 addition & 0 deletions create-cart-diagram/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pathways-typing @ git+https://github.com/BLSQ/pathways-typing@main

0 comments on commit 1ee941e

Please sign in to comment.