From 1ecaca87642d9ab57320116e99c1eb7085ff56a8 Mon Sep 17 00:00:00 2001 From: Pedro M Duarte Date: Wed, 23 Oct 2024 16:15:36 -0400 Subject: [PATCH 1/3] Add code and on-demand workflow to manage across bridge metadata from a gsheet source of truth --- .github/workflows/chain_metadata.yml | 24 +++++++ .github/workflows/uploads_api_daily.yml | 10 +-- .../default.across_bridge_metadata.sql | 8 +++ .../op-coreutils/src/op_coreutils/gsheets.py | 3 +- .../src/op_datasets/chains/across_bridge.py | 65 +++++++++++++++++++ .../src/op_datasets/chains/chain_metadata.py | 13 ++-- .../cli/subcommands/chains/app.py | 17 +++-- 7 files changed, 124 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/chain_metadata.yml create mode 100644 ddl/clickhouse_goldsky/default.across_bridge_metadata.sql create mode 100644 packages/op-datasets/src/op_datasets/chains/across_bridge.py diff --git a/.github/workflows/chain_metadata.yml b/.github/workflows/chain_metadata.yml new file mode 100644 index 0000000000..4e5f5edd5d --- /dev/null +++ b/.github/workflows/chain_metadata.yml @@ -0,0 +1,24 @@ +name: On-Demand Chain Metadata Updates +run-name: ${{ github.event.created_at }} +on: + workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +jobs: + run-daily-tasks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install uv + uses: astral-sh/setup-uv@v2 + - name: Set up Python + run: uv python install + - name: Install the project + run: uv sync --all-extras --dev + - name: Chain Metadata + run: uv run opdata chains chain_metadata_updates + env: + OPLABS_ENV: prod + OP_ANALYTICS_VAULT: ${{ secrets.OP_ANALYTICS_VAULT }} + diff --git a/.github/workflows/uploads_api_daily.yml b/.github/workflows/uploads_api_daily.yml index 4079efcf20..8e5ceeae00 100644 --- a/.github/workflows/uploads_api_daily.yml +++ b/.github/workflows/uploads_api_daily.yml @@ -18,9 +18,11 @@ jobs: run: uv python install - name: Install the project run: uv sync --all-extras --dev - - name: Run tasks - run: | - OPLABS_ENV=prod uv run opdata pulls l2beat - OPLABS_ENV=prod uv run opdata pulls dfl_stables + - name: L2Beat + run: OPLABS_ENV=prod uv run opdata pulls l2beat + env: + OP_ANALYTICS_VAULT: ${{ secrets.OP_ANALYTICS_VAULT }} + - name: DefiLlama + run: OPLABS_ENV=prod uv run opdata pulls dfl_stables env: OP_ANALYTICS_VAULT: ${{ secrets.OP_ANALYTICS_VAULT }} diff --git a/ddl/clickhouse_goldsky/default.across_bridge_metadata.sql b/ddl/clickhouse_goldsky/default.across_bridge_metadata.sql new file mode 100644 index 0000000000..b3b20a1ad1 --- /dev/null +++ b/ddl/clickhouse_goldsky/default.across_bridge_metadata.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS default.across_bridge_metadata_v2 ( + chain_name String, + display_name String, + mainnet_chain_id UInt32, + spokepool_address String +) +ENGINE = SharedMergeTree +ORDER BY chain_name \ No newline at end of file diff --git a/packages/op-coreutils/src/op_coreutils/gsheets.py b/packages/op-coreutils/src/op_coreutils/gsheets.py index 2aea6cf1f3..113704ba82 100644 --- a/packages/op-coreutils/src/op_coreutils/gsheets.py +++ b/packages/op-coreutils/src/op_coreutils/gsheets.py @@ -47,10 +47,9 @@ def get_worksheet(location_name: str, worksheet_name: str): locations, client = init_client() if location_name not in locations: - log.warn( + raise ValueError( f"Location {location_name} is not present in _GSHEETS_LOCATIONS. Will skip writing." ) - return sh = client.open_by_url(locations[location_name]) worksheet = sh.worksheet(worksheet_name) diff --git a/packages/op-datasets/src/op_datasets/chains/across_bridge.py b/packages/op-datasets/src/op_datasets/chains/across_bridge.py new file mode 100644 index 0000000000..deb63c1711 --- /dev/null +++ b/packages/op-datasets/src/op_datasets/chains/across_bridge.py @@ -0,0 +1,65 @@ +import polars as pl + +from op_coreutils.gsheets import read_gsheet +from op_coreutils.clickhouse import insert_arrow + + +def upload_across_bridge_addresses(chains_df: pl.DataFrame): + """Upload across bridge metadata to ClickHouse. + + - Load the data from the gsheet source of truth. + - Verify it is consitent with Chain Metadata. + - Upload to ClickHouse. + """ + # Load and verify that the data is consistent with our Chain Metadata source of truth. + df = load_across_bridge_addresses(chains_df) + + insert_arrow( + instance="GOLDSKY", + database="default", + table="across_bridge_metadata_v2", + df_arrow=df.to_arrow(), + ) + + +def load_across_bridge_addresses(chains_df: pl.DataFrame) -> pl.DataFrame: + # Read CSV from Google Sheets Input + raw_records = read_gsheet( + location_name="across_bridge", + worksheet_name="[INPUT -ADMIN MANAGED]", + ) + raw_df = pl.DataFrame(raw_records, infer_schema_length=len(raw_records)) + + # Ensure the dataframe is as we expect. + assert raw_df.schema == { + "chain_name": pl.String, + "display_name": pl.String, + "mainnet_chain_id": pl.Int64, + "spokepool_address": pl.String, + } + + # Ensure the information matches the chain metadata for goldsky_chains. + joined_df = raw_df.join( + chains_df.rename( + dict( + display_name="chain_metadata_display_name", + mainnet_chain_id="chain_metadata_mainnet_chain_id", + ) + ), + left_on="chain_name", + right_on="chain_name", + validate="1:1", + ) + + filtered_df = joined_df.filter( + (pl.col("display_name") != pl.col("chain_metadata_display_name")) + | (pl.col("mainnet_chain_id") != pl.col("chain_metadata_mainnet_chain_id")) + ) + + if len(filtered_df) > 0: + print(filtered_df) + raise ValueError( + "Across Bridge Addresses gsheet is inconsistent with chain metadata source of truth." + ) + + return raw_df diff --git a/packages/op-datasets/src/op_datasets/chains/chain_metadata.py b/packages/op-datasets/src/op_datasets/chains/chain_metadata.py index a720b0c276..f4a0fbcec1 100644 --- a/packages/op-datasets/src/op_datasets/chains/chain_metadata.py +++ b/packages/op-datasets/src/op_datasets/chains/chain_metadata.py @@ -88,10 +88,15 @@ def goldsky_chains(path: str | None = None): def filter_to_goldsky_chains(clean_df: pl.DataFrame) -> pl.DataFrame: - return clean_df.filter(pl.col("oplabs_db_schema").is_not_null()).select( - "chain_name", - "mainnet_chain_id", - "oplabs_db_schema", + return ( + clean_df.filter(pl.col("oplabs_db_schema").is_not_null()) + .select( + "chain_name", + "display_name", + "mainnet_chain_id", + "oplabs_db_schema", + ) + .sort("chain_name") ) diff --git a/src/op_analytics/cli/subcommands/chains/app.py b/src/op_analytics/cli/subcommands/chains/app.py index bec6e7a0b2..f812b2ea7b 100644 --- a/src/op_analytics/cli/subcommands/chains/app.py +++ b/src/op_analytics/cli/subcommands/chains/app.py @@ -5,6 +5,7 @@ from op_coreutils.clickhouse import run_goldsky_query from op_coreutils.gsheets import update_gsheet from op_coreutils.logger import structlog +from op_datasets.chains.across_bridge import upload_across_bridge_addresses from op_datasets.chains.chain_metadata import ( filter_to_goldsky_chains, load_chain_metadata, @@ -13,8 +14,8 @@ from op_datasets.etl.ingestion import ingest from op_datasets.etl.ingestion.batches import split_block_range from op_datasets.etl.intermediate import compute_intermediate -from op_datasets.utils.blockrange import BlockRange from op_datasets.schemas import ONCHAIN_CURRENT_VERSION +from op_datasets.utils.blockrange import BlockRange from rich import print from typing_extensions import Annotated @@ -72,12 +73,12 @@ def goldsky_sql( @app.command() -def update_chain_metadata_gsheet(): - """Upload chain_metadata_raw.csv to Google Sheets. +def chain_metadata_updates(): + """Run various chain metadata related updates. - The chain_metadata_raw.csv file is maintained manually by the OP Labs team. This function - accepts a local CSV file with raw chain metadata. It loads the data, cleans it up and uploads - it to Google Sheets. + - Upload chain_metadata_raw.csv to Google Sheets. + - Update the OP Analytics Chain Metadata [ADMIN MANAGED] google sheet. + - Update the Across Superchain Bridge Addresses [ADMIN MANAGED] google sheet. TODO: Decide if we want to uplaod to Dune, Clickhouse, BigQuery. or op-analytics-static repo. """ @@ -99,6 +100,10 @@ def update_chain_metadata_gsheet(): dataframe=to_pandas(goldsky_df), ) + # Upload the across bridge addresses. + # Makes sure they are consistent with Chain Metadata. + upload_across_bridge_addresses(goldsky_df) + @app.command() def verify_goldsky_tables(): From 91864906730f337e3256cddd2fc44a444f842281 Mon Sep 17 00:00:00 2001 From: Pedro M Duarte Date: Wed, 23 Oct 2024 17:01:26 -0400 Subject: [PATCH 2/3] Fix remaining issues with across bridge table --- .../default.across_bridge_metadata.sql | 2 +- .../src/op_coreutils/clickhouse/__init__.py | 2 +- .../src/op_coreutils/clickhouse/client.py | 6 +++++ .../src/op_datasets/chains/across_bridge.py | 22 +++++++++++++++---- .../etl/ingestion/audits/audits.py | 1 + .../op_datasets/etl/intermediate/registry.py | 1 + 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/ddl/clickhouse_goldsky/default.across_bridge_metadata.sql b/ddl/clickhouse_goldsky/default.across_bridge_metadata.sql index b3b20a1ad1..809aeb829b 100644 --- a/ddl/clickhouse_goldsky/default.across_bridge_metadata.sql +++ b/ddl/clickhouse_goldsky/default.across_bridge_metadata.sql @@ -1,7 +1,7 @@ CREATE TABLE IF NOT EXISTS default.across_bridge_metadata_v2 ( chain_name String, display_name String, - mainnet_chain_id UInt32, + mainnet_chain_id String, spokepool_address String ) ENGINE = SharedMergeTree diff --git a/packages/op-coreutils/src/op_coreutils/clickhouse/__init__.py b/packages/op-coreutils/src/op_coreutils/clickhouse/__init__.py index 6b936c88b0..f49f762c0e 100644 --- a/packages/op-coreutils/src/op_coreutils/clickhouse/__init__.py +++ b/packages/op-coreutils/src/op_coreutils/clickhouse/__init__.py @@ -1 +1 @@ -from .client import insert_arrow, run_goldsky_query, run_oplabs_query +from .client import insert_arrow, run_goldsky_query, run_goldsky_statement, run_oplabs_query diff --git a/packages/op-coreutils/src/op_coreutils/clickhouse/client.py b/packages/op-coreutils/src/op_coreutils/clickhouse/client.py index 567d716d23..56c73864f4 100644 --- a/packages/op-coreutils/src/op_coreutils/clickhouse/client.py +++ b/packages/op-coreutils/src/op_coreutils/clickhouse/client.py @@ -65,6 +65,12 @@ def init_client(instance: ClickHouseInstance): raise NotImplementedError() +def run_goldsky_statement(statement): + """A statement does not return results.""" + client = init_client("GOLDSKY") + client.query(statement) + + def run_goldsky_query( query: str, parameters: dict[str, Any] | None = None, diff --git a/packages/op-datasets/src/op_datasets/chains/across_bridge.py b/packages/op-datasets/src/op_datasets/chains/across_bridge.py index deb63c1711..668e52e4ee 100644 --- a/packages/op-datasets/src/op_datasets/chains/across_bridge.py +++ b/packages/op-datasets/src/op_datasets/chains/across_bridge.py @@ -1,7 +1,10 @@ import polars as pl from op_coreutils.gsheets import read_gsheet -from op_coreutils.clickhouse import insert_arrow +from op_coreutils.clickhouse import insert_arrow, run_goldsky_statement + +DATABASE = "default" +TABLE = "across_bridge_metadata_v2" def upload_across_bridge_addresses(chains_df: pl.DataFrame): @@ -14,11 +17,22 @@ def upload_across_bridge_addresses(chains_df: pl.DataFrame): # Load and verify that the data is consistent with our Chain Metadata source of truth. df = load_across_bridge_addresses(chains_df) + # In ClickHouse we store the mainnet_chain_id as a string. + clickhouse_df = df.select( + pl.col("chain_name"), + pl.col("display_name"), + pl.col("mainnet_chain_id").cast(pl.String), + pl.col("spokepool_address"), + ) + + # Truncate is necessary so we avoid duplicates when inserting values. + run_goldsky_statement(f"TRUNCATE TABLE {DATABASE}.{TABLE}") + insert_arrow( instance="GOLDSKY", - database="default", - table="across_bridge_metadata_v2", - df_arrow=df.to_arrow(), + database=DATABASE, + table=TABLE, + df_arrow=clickhouse_df.to_arrow(), ) diff --git a/packages/op-datasets/src/op_datasets/etl/ingestion/audits/audits.py b/packages/op-datasets/src/op_datasets/etl/ingestion/audits/audits.py index 27ed6ff8f7..3f533394ee 100644 --- a/packages/op-datasets/src/op_datasets/etl/ingestion/audits/audits.py +++ b/packages/op-datasets/src/op_datasets/etl/ingestion/audits/audits.py @@ -8,6 +8,7 @@ def register(func): REGISTERED_AUDITS[func.__name__] = func + return func VALID_HASH = r"^0x[\da-f]{64}$" diff --git a/packages/op-datasets/src/op_datasets/etl/intermediate/registry.py b/packages/op-datasets/src/op_datasets/etl/intermediate/registry.py index c7177bbe2d..bb3c16c962 100644 --- a/packages/op-datasets/src/op_datasets/etl/intermediate/registry.py +++ b/packages/op-datasets/src/op_datasets/etl/intermediate/registry.py @@ -10,6 +10,7 @@ def register_model(func): REGISTERED_INTERMEDIATE_MODELS[func.__name__] = func + return func def load_model_definitions(): From 927589b73834368416eb174fe4f40d5005698387 Mon Sep 17 00:00:00 2001 From: Pedro M Duarte Date: Wed, 23 Oct 2024 17:09:47 -0400 Subject: [PATCH 3/3] Remove the v2 suffix, we can use this going forwards --- packages/op-datasets/src/op_datasets/chains/across_bridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/op-datasets/src/op_datasets/chains/across_bridge.py b/packages/op-datasets/src/op_datasets/chains/across_bridge.py index 668e52e4ee..6555c8db7e 100644 --- a/packages/op-datasets/src/op_datasets/chains/across_bridge.py +++ b/packages/op-datasets/src/op_datasets/chains/across_bridge.py @@ -4,7 +4,7 @@ from op_coreutils.clickhouse import insert_arrow, run_goldsky_statement DATABASE = "default" -TABLE = "across_bridge_metadata_v2" +TABLE = "across_bridge_metadata" def upload_across_bridge_addresses(chains_df: pl.DataFrame):