Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add code and on-demand workflow to manage across bridge metadata from a gsheet source of truth #921

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/chain_metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: On-Demand Chain Metadata Updates
run-name: ${{ github.event.created_at }}
on:
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
run-daily-tasks:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v2
- name: Set up Python
run: uv python install
- name: Install the project
run: uv sync --all-extras --dev
- name: Chain Metadata
run: uv run opdata chains chain_metadata_updates
env:
OPLABS_ENV: prod
OP_ANALYTICS_VAULT: ${{ secrets.OP_ANALYTICS_VAULT }}

10 changes: 6 additions & 4 deletions .github/workflows/uploads_api_daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ jobs:
run: uv python install
- name: Install the project
run: uv sync --all-extras --dev
- name: Run tasks
run: |
OPLABS_ENV=prod uv run opdata pulls l2beat
OPLABS_ENV=prod uv run opdata pulls dfl_stables
- name: L2Beat
run: OPLABS_ENV=prod uv run opdata pulls l2beat
env:
OP_ANALYTICS_VAULT: ${{ secrets.OP_ANALYTICS_VAULT }}
- name: DefiLlama
run: OPLABS_ENV=prod uv run opdata pulls dfl_stables
env:
OP_ANALYTICS_VAULT: ${{ secrets.OP_ANALYTICS_VAULT }}
8 changes: 8 additions & 0 deletions ddl/clickhouse_goldsky/default.across_bridge_metadata.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS default.across_bridge_metadata_v2 (
chain_name String,
display_name String,
mainnet_chain_id String,
spokepool_address String
)
ENGINE = SharedMergeTree
ORDER BY chain_name
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .client import insert_arrow, run_goldsky_query, run_oplabs_query
from .client import insert_arrow, run_goldsky_query, run_goldsky_statement, run_oplabs_query
6 changes: 6 additions & 0 deletions packages/op-coreutils/src/op_coreutils/clickhouse/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ def init_client(instance: ClickHouseInstance):
raise NotImplementedError()


def run_goldsky_statement(statement):
"""A statement does not return results."""
client = init_client("GOLDSKY")
client.query(statement)


def run_goldsky_query(
query: str,
parameters: dict[str, Any] | None = None,
Expand Down
3 changes: 1 addition & 2 deletions packages/op-coreutils/src/op_coreutils/gsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,9 @@ def get_worksheet(location_name: str, worksheet_name: str):
locations, client = init_client()

if location_name not in locations:
log.warn(
raise ValueError(
f"Location {location_name} is not present in _GSHEETS_LOCATIONS. Will skip writing."
)
return

sh = client.open_by_url(locations[location_name])
worksheet = sh.worksheet(worksheet_name)
Expand Down
79 changes: 79 additions & 0 deletions packages/op-datasets/src/op_datasets/chains/across_bridge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import polars as pl

from op_coreutils.gsheets import read_gsheet
from op_coreutils.clickhouse import insert_arrow, run_goldsky_statement

DATABASE = "default"
TABLE = "across_bridge_metadata"


def upload_across_bridge_addresses(chains_df: pl.DataFrame):
"""Upload across bridge metadata to ClickHouse.

- Load the data from the gsheet source of truth.
- Verify it is consitent with Chain Metadata.
- Upload to ClickHouse.
"""
# Load and verify that the data is consistent with our Chain Metadata source of truth.
df = load_across_bridge_addresses(chains_df)

# In ClickHouse we store the mainnet_chain_id as a string.
clickhouse_df = df.select(
pl.col("chain_name"),
pl.col("display_name"),
pl.col("mainnet_chain_id").cast(pl.String),
pl.col("spokepool_address"),
)

# Truncate is necessary so we avoid duplicates when inserting values.
run_goldsky_statement(f"TRUNCATE TABLE {DATABASE}.{TABLE}")

insert_arrow(
instance="GOLDSKY",
database=DATABASE,
table=TABLE,
df_arrow=clickhouse_df.to_arrow(),
)


def load_across_bridge_addresses(chains_df: pl.DataFrame) -> pl.DataFrame:
# Read CSV from Google Sheets Input
raw_records = read_gsheet(
location_name="across_bridge",
worksheet_name="[INPUT -ADMIN MANAGED]",
)
raw_df = pl.DataFrame(raw_records, infer_schema_length=len(raw_records))

# Ensure the dataframe is as we expect.
assert raw_df.schema == {
"chain_name": pl.String,
"display_name": pl.String,
"mainnet_chain_id": pl.Int64,
"spokepool_address": pl.String,
}

# Ensure the information matches the chain metadata for goldsky_chains.
joined_df = raw_df.join(
chains_df.rename(
dict(
display_name="chain_metadata_display_name",
mainnet_chain_id="chain_metadata_mainnet_chain_id",
)
),
left_on="chain_name",
right_on="chain_name",
validate="1:1",
)

filtered_df = joined_df.filter(
(pl.col("display_name") != pl.col("chain_metadata_display_name"))
| (pl.col("mainnet_chain_id") != pl.col("chain_metadata_mainnet_chain_id"))
)

if len(filtered_df) > 0:
print(filtered_df)
raise ValueError(
"Across Bridge Addresses gsheet is inconsistent with chain metadata source of truth."
)

return raw_df
13 changes: 9 additions & 4 deletions packages/op-datasets/src/op_datasets/chains/chain_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,15 @@ def goldsky_chains(path: str | None = None):


def filter_to_goldsky_chains(clean_df: pl.DataFrame) -> pl.DataFrame:
return clean_df.filter(pl.col("oplabs_db_schema").is_not_null()).select(
"chain_name",
"mainnet_chain_id",
"oplabs_db_schema",
return (
clean_df.filter(pl.col("oplabs_db_schema").is_not_null())
.select(
"chain_name",
"display_name",
"mainnet_chain_id",
"oplabs_db_schema",
)
.sort("chain_name")
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

def register(func):
REGISTERED_AUDITS[func.__name__] = func
return func


VALID_HASH = r"^0x[\da-f]{64}$"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

def register_model(func):
REGISTERED_INTERMEDIATE_MODELS[func.__name__] = func
return func


def load_model_definitions():
Expand Down
17 changes: 11 additions & 6 deletions src/op_analytics/cli/subcommands/chains/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from op_coreutils.clickhouse import run_goldsky_query
from op_coreutils.gsheets import update_gsheet
from op_coreutils.logger import structlog
from op_datasets.chains.across_bridge import upload_across_bridge_addresses
from op_datasets.chains.chain_metadata import (
filter_to_goldsky_chains,
load_chain_metadata,
Expand All @@ -13,8 +14,8 @@
from op_datasets.etl.ingestion import ingest
from op_datasets.etl.ingestion.batches import split_block_range
from op_datasets.etl.intermediate import compute_intermediate
from op_datasets.utils.blockrange import BlockRange
from op_datasets.schemas import ONCHAIN_CURRENT_VERSION
from op_datasets.utils.blockrange import BlockRange
from rich import print
from typing_extensions import Annotated

Expand Down Expand Up @@ -72,12 +73,12 @@ def goldsky_sql(


@app.command()
def update_chain_metadata_gsheet():
"""Upload chain_metadata_raw.csv to Google Sheets.
def chain_metadata_updates():
"""Run various chain metadata related updates.

The chain_metadata_raw.csv file is maintained manually by the OP Labs team. This function
accepts a local CSV file with raw chain metadata. It loads the data, cleans it up and uploads
it to Google Sheets.
- Upload chain_metadata_raw.csv to Google Sheets.
- Update the OP Analytics Chain Metadata [ADMIN MANAGED] google sheet.
- Update the Across Superchain Bridge Addresses [ADMIN MANAGED] google sheet.

TODO: Decide if we want to uplaod to Dune, Clickhouse, BigQuery. or op-analytics-static repo.
"""
Expand All @@ -99,6 +100,10 @@ def update_chain_metadata_gsheet():
dataframe=to_pandas(goldsky_df),
)

# Upload the across bridge addresses.
# Makes sure they are consistent with Chain Metadata.
upload_across_bridge_addresses(goldsky_df)


@app.command()
def verify_goldsky_tables():
Expand Down
Loading