From a688a123202c1d503ce4e86b767f2646dfefbea9 Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Tue, 28 May 2024 09:21:38 -0400 Subject: [PATCH] rebase to main (#1549) * Ensures factory deployers and deployers are separated (#1511) * Dagster: Fixes dag associations (#1510) * Update dag associations with production dbt * Adds all dbt targets as well * Dagster-Dbt: Fix handling of target paths (#1514) Dagster: Fix handling of target paths * minor fixes to mart models (#1509) * add: artifact_source to artifacts_by_project_v1 * fix: hide rows with no metrics * rf4: updates to eigentrust models * rf4: updates to eigentrust models (#1515) * feat: deploy docusaurus docs via GitHub Actions to Cloudflare (#1516) * Testing the GitHub actions workflow instead of Vercel * fix: install pnpm before node in deploy-docusaurus-docs (#1518) * feat: deploy job for dbt generated docs (#1519) * fix: use real BQ credentials in deploy-dbt-docs (#1520) * fix: use the `indexer` environment for dbt docs deploy (#1523) * fix: ignore workspace warning in deploy-dbt-docs (#1524) * add: optimist sbt and refactor trust model (#1521) * add optimist sbt and refactor trust model * refactor: grab fid : address mappings direct from farcaster * add: whether address is passport user or not * fix: modify non-farcaster verifications * fix: simplify user checks * fix: artifact name to address * fix: external-prs better error messages (#1527) * Adds an .env.example so folks know how to configure this for local testing * Reports in an error message what the valid commands are if you specified an invalid one * Adds a bunch of comments in places * add: deployer_v1 to API (#1525) * add optimist sbt and refactor trust model * refactor: grab fid : address mappings direct from farcaster * add: whether address is passport user or not * fix: modify non-farcaster verifications * fix: simplify user checks * feat: deployers_v1 * contracts model that includes deployments from factories * add: artifact source field * fix: union statement * fix: roll back hasura (#1528) * fix: roll back hasura * set sync_to_db to false * feat: pretty print results from external-prs (#1529) * Add a summary message to ossd validation * Separate messages by address/name * Provide a way to share informational messages, not just errors * Fix the status title for the GitHub app * fix: update to actions/checkout@v4 (#1530) * add: contracts model to hasura (#1531) * add: contracts model to hasura * add: contract query * reduce contracts_v1 dimensions (#1532) * reduce contracts_v1 dimensions * make distinct * fix: misaligned union * filter null deployers from mart * add: power user metric (#1534) * add: power user metric * chore: add metric to consolidate model * fix: linting in new metric * chore: update rf4 metrics blog post * feat: additional rf4 repo eligibility checks (#1535) * feat: additional rf4 repo eligibility checks * chore: add repo checks to overall project checks * fix: clean up todos * fix: handling empty arrays * feat: add ANY_EVM semantics to deployer crawling (#1540) * Checks whether the oss-directory network is any_evm, includes if so * Fix the join to join on both source and namespace * fix: bump oss-directory 0.0.13 in external-prs (#1542) * fix: removes artifact_type from artifacts, moves to a separate table (#1543) * Previously, we were DISTINCT on source, namespace, name, AND type. * This means that the same address was shown multiple times per artifact_type * This refactor creates an intermediate model called int_all_artifacts that we can use * artifact_type is removed from artifacts_v1 and artifacts_by_project_v1 * there's a new int_artifact_types table that we can use to get types * fix: update to v4 of GitHub actions setup (#1544) * fix: Hasura deploy needs Python (#1547) --------- Co-authored-by: Reuven Gonzales Co-authored-by: Raymond Cheng --- .github/workflows/ci-default.yml | 16 +- .github/workflows/deploy-dbt-docs.yml | 71 ++++++ .github/workflows/deploy-docusaurus-docs.yml | 56 +++++ .github/workflows/deploy-hasura.yml | 13 +- .../workflows/external-prs-handle-comment.yml | 2 +- .../workflows/refresh-test-credentials.yml | 2 +- .../setup-external-pr-tools/action.yml | 4 +- .github/workflows/test-deploy-clean.yml | 2 +- .github/workflows/test-deploy-owners.yml | 8 +- .github/workflows/test-deploy/action.yml | 2 +- .../warehouse-meltano-ecosystems-ost.yml | 2 +- .../warehouse-publish-docker-containers.yml | 2 +- .../workflows/warehouse-run-data-pipeline.yml | 2 +- .../2024-05-16-impact-metrics-rf4/index.md | 8 +- .../cloudsql/tables/contracts_v1.yaml | 22 ++ .../databases/cloudsql/tables/tables.yaml | 1 + dbt_project.yml | 8 + ops/external-prs/.env.example | 7 + ops/external-prs/package.json | 4 +- ops/external-prs/src/base.ts | 19 +- ops/external-prs/src/cli.ts | 18 +- ops/external-prs/src/ossd/index.ts | 140 ++++++----- .../src/ossd/messages/validation-errors.md | 6 - .../src/ossd/messages/validation-message.md | 23 ++ ops/k8s-apps/base/dagster/dagster.yaml | 6 + pnpm-lock.yaml | 96 ++++---- .../contract_invocation_events_with_l1.sql | 2 +- .../models/filtered_blockchain_events.sql | 2 +- .../blockchain/int_derived_contracts.sql | 5 +- ...nt_optimism_contract_invocation_events.sql | 2 +- .../blockchain/int_optimism_transactions.sql | 2 +- .../directory/int_all_artifacts.sql | 218 ++++++++++++++++++ .../directory/int_artifact_types.sql | 8 + .../intermediate/directory/int_artifacts.sql | 4 - .../directory/int_artifacts_by_project.sql | 197 +--------------- .../directory/int_artifacts_by_user.sql | 10 +- .../intermediate/directory/int_contracts.sql | 114 +++++++++ .../metrics/int_code_metrics_by_project.sql | 1 + .../int_onchain_metrics_by_project.sql | 1 + .../metrics/int_repo_metrics_by_project.sql | 26 ++- .../directory/artifacts_by_project_v1.sql | 2 +- .../models/marts/directory/artifacts_v1.sql | 1 - .../models/marts/directory/contracts_v1.sql | 13 ++ .../metrics/rf4_monthly_active_addresses.sql | 5 +- .../metrics/rf4_power_user_addresses.sql | 43 ++++ .../rf4_trusted_monthly_active_users.sql | 5 +- .../rf4_events_daily_to_project.sql | 9 +- .../rf4_impact_metrics_by_project.sql | 10 +- .../rf4_impact_metrics_by_project__schema.yml | 7 +- .../superchain/rf4_project_verification.sql | 59 +++-- .../superchain/rf4_repo_stats_by_project.sql | 96 +++++--- .../marts/superchain/rf4_trusted_users.sql | 132 ++++++++--- warehouse/dbt/models/playground_sources.yml | 4 + .../karma3/stg_karma3__globaltrust.sql | 13 +- .../staging/karma3/stg_karma3__localtrust.sql | 20 +- warehouse/dbt/models/static_data_sources.yml | 7 + warehouse/dbt/models/superchain_sources.yml | 3 + warehouse/oso_dagster/assets.py | 138 ++++++++--- warehouse/oso_dagster/constants.py | 67 +++--- warehouse/oso_dagster/definitions.py | 20 +- warehouse/oso_dagster/factories/gcs.py | 22 +- warehouse/oso_dagster/goldsky.py | 8 +- warehouse/oso_dagster/schedules.py | 2 - 63 files changed, 1299 insertions(+), 519 deletions(-) create mode 100644 .github/workflows/deploy-dbt-docs.yml create mode 100644 .github/workflows/deploy-docusaurus-docs.yml create mode 100644 apps/hasura/metadata/databases/cloudsql/tables/contracts_v1.yaml create mode 100644 ops/external-prs/.env.example delete mode 100644 ops/external-prs/src/ossd/messages/validation-errors.md create mode 100644 ops/external-prs/src/ossd/messages/validation-message.md create mode 100644 warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql create mode 100644 warehouse/dbt/models/intermediate/directory/int_artifact_types.sql create mode 100644 warehouse/dbt/models/intermediate/directory/int_contracts.sql create mode 100644 warehouse/dbt/models/marts/directory/contracts_v1.sql create mode 100644 warehouse/dbt/models/marts/superchain/metrics/rf4_power_user_addresses.sql create mode 100644 warehouse/dbt/models/static_data_sources.yml diff --git a/.github/workflows/ci-default.yml b/.github/workflows/ci-default.yml index 52d8cb855..eaed2b3ff 100644 --- a/.github/workflows/ci-default.yml +++ b/.github/workflows/ci-default.yml @@ -9,7 +9,7 @@ env: NODE_ENV: ${{ vars.NODE_ENV }} PLASMIC_PROJECT_ID: ${{ vars.PLASMIC_PROJECT_ID }} PLASMIC_PROJECT_API_TOKEN: ${{ vars.PLASMIC_PROJECT_API_TOKEN }} - NEXT_PUBLIC_DOMAIN: ${{ vars.NEXT_PUBLIC_DOMAIN }} + NEXT_PUBLIC_DOMAIN: "www.opensource.observer" NEXT_PUBLIC_DB_GRAPHQL_URL: ${{ vars.NEXT_PUBLIC_DB_GRAPHQL_URL }} OSO_API_KEY: ${{ secrets.OSO_API_KEY }} NEXT_PUBLIC_ALGOLIA_APPLICATION_ID: "test" @@ -60,7 +60,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # Check out pull request's HEAD commit instead of the merge commit to # prevent gitlint from failing due to too long commit message titles, @@ -83,7 +83,7 @@ jobs: PUBLIC_VARS_TEST - name: Setup pnpm - uses: pnpm/action-setup@v2 + uses: pnpm/action-setup@v4 with: version: 9 run_install: | @@ -91,7 +91,7 @@ jobs: args: [--frozen-lockfile, --strict-peer-dependencies] - name: Set up Node.js - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: node-version: "20.x" cache: "pnpm" @@ -108,9 +108,9 @@ jobs: version: nightly-87bc53fc6c874bd4c92d97ed180b949e3a36d78c - name: Authenticate to google with a test-dummy user - uses: 'google-github-actions/auth@v2' + uses: "google-github-actions/auth@v2" with: - credentials_json: '${{ env.GOOGLE_TEST_DUMMY_CREDENTIALS_JSON }}' + credentials_json: "${{ env.GOOGLE_TEST_DUMMY_CREDENTIALS_JSON }}" create_credentials_file: true - name: Setup dbt profile @@ -119,7 +119,7 @@ jobs: - name: Run supabase local run: | - bash .github/scripts/run-supabase-local.sh apps/frontend + bash .github/scripts/run-supabase-local.sh apps/frontend - name: Check if algolia is empty, set the variable to some dummy value if it is shell: bash @@ -139,7 +139,7 @@ jobs: - name: Lint run: | - pnpm lint + pnpm lint # Always run this step so that all linting errors can be seen at once. if: always() diff --git a/.github/workflows/deploy-dbt-docs.yml b/.github/workflows/deploy-dbt-docs.yml new file mode 100644 index 000000000..a77d2da22 --- /dev/null +++ b/.github/workflows/deploy-dbt-docs.yml @@ -0,0 +1,71 @@ +# NOTE: This name appears in GitHub's Checks API and in workflow's status badge. +name: deploy-dbt-docs +env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + # Google variables + GOOGLE_PROJECT_ID: "opensource-observer" + +# Trigger the workflow when: +on: + # A push occurs to one of the matched branches. + push: + branches: + - main + paths: + - warehouse/dbt/** + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + build-deploy: + # NOTE: This name appears in GitHub's Checks API. + name: build-deploy + environment: indexer + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9 + run_install: | + - recursive: true + args: [--frozen-lockfile, --strict-peer-dependencies] + + - name: Set up Node.js 20 + uses: actions/setup-node@v4 + with: + cache: "pnpm" + node-version: "20.x" + + - name: "Setup Python, Poetry and Dependencies" + uses: packetcoders/action-setup-cache-python-poetry@main + with: + python-version: 3.12 + poetry-version: 1.7.1 + + - name: Login to google + uses: "google-github-actions/auth@v2" + with: + credentials_json: "${{ secrets.GOOGLE_CREDENTIALS_JSON }}" + create_credentials_file: true + + - name: Setup dbt profile + run: | + bash .github/scripts/create-dbt-profile.sh ${GOOGLE_APPLICATION_CREDENTIALS} + + - name: Build + run: pnpm build:dbt:docs + + # This is necessary because wrangler tries to install at monorepo root + - run: echo "ignore-workspace-root-check=true" >> .npmrc + + - name: Publish + uses: cloudflare/wrangler-action@v3 + with: + apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} + command: pages deploy target --project-name=models-opensource-observer diff --git a/.github/workflows/deploy-docusaurus-docs.yml b/.github/workflows/deploy-docusaurus-docs.yml new file mode 100644 index 000000000..2bbaba3ef --- /dev/null +++ b/.github/workflows/deploy-docusaurus-docs.yml @@ -0,0 +1,56 @@ +# NOTE: This name appears in GitHub's Checks API and in workflow's status badge. +name: deploy-docusaurus-docs +env: + CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} + DOCS_URL: ${{ vars.DOCS_URL }} + DOCS_ALGOLIA_APP_ID: ${{ vars.DOCS_ALGOLIA_APP_ID }} + DOCS_ALGOLIA_API_KEY: ${{ vars.DOCS_ALGOLIA_API_KEY }} + DOCS_ALGOLIA_INDEX: ${{ vars.DOCS_ALGOLIA_INDEX }} + DOCS_SEGMENT_WRITE_KEY: ${{ vars.DOCS_SEGMENT_WRITE_KEY }} + +# Trigger the workflow when: +on: + # A push occurs to one of the matched branches. + push: + branches: + - main + paths: + - apps/docs/** + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + build-deploy: + # NOTE: This name appears in GitHub's Checks API. + name: build-deploy + environment: deploy + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + with: + version: 9 + run_install: | + - recursive: true + args: [--frozen-lockfile, --strict-peer-dependencies] + + - name: Set up Node.js 20 + uses: actions/setup-node@v4 + with: + cache: "pnpm" + node-version: "20.x" + + - name: Build + run: pnpm build:docs + + - name: Publish + uses: cloudflare/wrangler-action@v3 + with: + apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} + workingDirectory: "apps/docs" + command: pages deploy build --project-name=docs-opensource-observer diff --git a/.github/workflows/deploy-hasura.yml b/.github/workflows/deploy-hasura.yml index 5615db552..da2b68af7 100644 --- a/.github/workflows/deploy-hasura.yml +++ b/.github/workflows/deploy-hasura.yml @@ -27,12 +27,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Setup pnpm - uses: pnpm/action-setup@v2 + uses: pnpm/action-setup@v4 with: version: 9 run_install: | @@ -40,11 +40,17 @@ jobs: args: [--frozen-lockfile, --strict-peer-dependencies] - name: Set up Node.js 20 - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: cache: "pnpm" node-version: "20.x" + - name: "Setup Python, Poetry and Dependencies" + uses: packetcoders/action-setup-cache-python-poetry@main + with: + python-version: 3.12 + poetry-version: 1.7.1 + - name: Login to google uses: 'google-github-actions/auth@v2' with: @@ -57,5 +63,6 @@ jobs: - name: Build run: pnpm build:hasura + - name: Deploy run: pnpm deploy:hasura diff --git a/.github/workflows/external-prs-handle-comment.yml b/.github/workflows/external-prs-handle-comment.yml index 0c47f9f73..06846246e 100644 --- a/.github/workflows/external-prs-handle-comment.yml +++ b/.github/workflows/external-prs-handle-comment.yml @@ -20,7 +20,7 @@ jobs: if: github.event.issue.pull_request && contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "CONTRIBUTOR"]'), github.event.comment.author_association) steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 diff --git a/.github/workflows/refresh-test-credentials.yml b/.github/workflows/refresh-test-credentials.yml index 01781d31b..e344b9d21 100644 --- a/.github/workflows/refresh-test-credentials.yml +++ b/.github/workflows/refresh-test-credentials.yml @@ -30,7 +30,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 diff --git a/.github/workflows/setup-external-pr-tools/action.yml b/.github/workflows/setup-external-pr-tools/action.yml index 75b63468b..504cb80d7 100644 --- a/.github/workflows/setup-external-pr-tools/action.yml +++ b/.github/workflows/setup-external-pr-tools/action.yml @@ -7,7 +7,7 @@ runs: using: "composite" steps: - name: Setup pnpm - uses: pnpm/action-setup@v2 + uses: pnpm/action-setup@v4 with: version: 9 run_install: | @@ -15,7 +15,7 @@ runs: args: [--frozen-lockfile, --strict-peer-dependencies] - name: Set up Node.js 20 - uses: actions/setup-node@v3 + uses: actions/setup-node@v4 with: cache: "pnpm" node-version: "20.x" diff --git a/.github/workflows/test-deploy-clean.yml b/.github/workflows/test-deploy-clean.yml index af5250793..82bb045f7 100644 --- a/.github/workflows/test-deploy-clean.yml +++ b/.github/workflows/test-deploy-clean.yml @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 diff --git a/.github/workflows/test-deploy-owners.yml b/.github/workflows/test-deploy-owners.yml index 20d21e64c..95aa5d23e 100644 --- a/.github/workflows/test-deploy-owners.yml +++ b/.github/workflows/test-deploy-owners.yml @@ -31,13 +31,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Setup external pr tools uses: ./.github/workflows/setup-external-pr-tools - + - name: Initialize check run: | cd ops/external-prs && @@ -48,9 +48,9 @@ jobs: echo "${{ github.event.pull_request.author_association }}" - name: Login to google - uses: 'google-github-actions/auth@v2' + uses: "google-github-actions/auth@v2" with: - credentials_json: '${{ secrets.GOOGLE_BQ_ADMIN_CREDENTIALS_JSON }}' + credentials_json: "${{ secrets.GOOGLE_BQ_ADMIN_CREDENTIALS_JSON }}" create_credentials_file: true if: ${{ contains(fromJson('["OWNER", "MEMBER", "COLLABORATOR", "CONTRIBUTOR"]'), github.event.pull_request.author_association) }} diff --git a/.github/workflows/test-deploy/action.yml b/.github/workflows/test-deploy/action.yml index 66a5df73c..d273b4d32 100644 --- a/.github/workflows/test-deploy/action.yml +++ b/.github/workflows/test-deploy/action.yml @@ -46,7 +46,7 @@ runs: # cd ops/external-prs && pnpm tools oso test-deploy is-eligible $GITHUB_OUTPUT - name: checkout the PR - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # Check out pull request's HEAD commit instead of the merge commit to # prevent gitlint from failing due to too long commit message titles, diff --git a/.github/workflows/warehouse-meltano-ecosystems-ost.yml b/.github/workflows/warehouse-meltano-ecosystems-ost.yml index 04f52efa5..a18855117 100644 --- a/.github/workflows/warehouse-meltano-ecosystems-ost.yml +++ b/.github/workflows/warehouse-meltano-ecosystems-ost.yml @@ -28,7 +28,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 diff --git a/.github/workflows/warehouse-publish-docker-containers.yml b/.github/workflows/warehouse-publish-docker-containers.yml index b499e2672..e1c694b04 100644 --- a/.github/workflows/warehouse-publish-docker-containers.yml +++ b/.github/workflows/warehouse-publish-docker-containers.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 diff --git a/.github/workflows/warehouse-run-data-pipeline.yml b/.github/workflows/warehouse-run-data-pipeline.yml index e342bdf18..2a49ea99d 100644 --- a/.github/workflows/warehouse-run-data-pipeline.yml +++ b/.github/workflows/warehouse-run-data-pipeline.yml @@ -48,7 +48,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 diff --git a/apps/docs/blog/2024-05-16-impact-metrics-rf4/index.md b/apps/docs/blog/2024-05-16-impact-metrics-rf4/index.md index b6e8352cf..2b2b71d7e 100644 --- a/apps/docs/blog/2024-05-16-impact-metrics-rf4/index.md +++ b/apps/docs/blog/2024-05-16-impact-metrics-rf4/index.md @@ -26,7 +26,7 @@ The round is expected to receive applications from hundreds of projects building At Open Source Observer, our objective is to help the Optimism community arrive at up to 20 credible impact metrics that can be applied to projects with contracts on the Superchain. -This page explains where the metrics come from and includes a working list of all metrics under consideration for badgeholders. We will update it regularly, at least until the start of voting (June 23), to reflect the evolution of metrics. The first version metrics was released on 2024-05-16 and the most recent version (below) was released on 2024-05-22. +This page explains where the metrics come from and includes a working list of all metrics under consideration for badgeholders. We will update it regularly, at least until the start of voting (June 23), to reflect the evolution of metrics. The first version metrics was released on 2024-05-16 and the most recent version (below) was released on 2024-05-26. @@ -180,3 +180,9 @@ Recurring addresses are a proxy for recurring users. It is especially relevant t Count of trusted users who have interacted with the project in at least 3 separate months over the RF4 scope period (October 2023 - June 2024). Many crypto natives are curious to try out new protocols. But churn and user retention are major issues. Recurring users represent the most loyal and committed segment of a project's user base. This metric considers users who have interacted with a project over the course of at least three distinct calendar months during the RF4 scope period. Thus, it is intended to reflect sustained interest and ongoing engagement over time. A high count of recurring users signals strong project loyalty and a good user experience, and helps separate the fads from the future. + +### Power User Addresses + +Count of 'power user' addresses that have interacted with the project over the RF4 scope period (October 2023 - June 2024). + +This metric reflects the degree which a project has attracted attention from the most active and engaged users on the Superchain. A `power user` is defined as an address that has made at least 100 transactions, across at least 10 different projects, on at least 30 days, over the RF4 scope period. A project is counted by this metric if has at least one interaction from a power user. Power users are critical early adopters for the ecosystem. diff --git a/apps/hasura/metadata/databases/cloudsql/tables/contracts_v1.yaml b/apps/hasura/metadata/databases/cloudsql/tables/contracts_v1.yaml new file mode 100644 index 000000000..8df40878c --- /dev/null +++ b/apps/hasura/metadata/databases/cloudsql/tables/contracts_v1.yaml @@ -0,0 +1,22 @@ +table: + name: contracts_v1 + schema: public +select_permissions: + - role: anonymous + permission: + columns: "*" + filter: {} + allow_aggregations: false + comment: "" + - role: user + permission: + columns: "*" + filter: {} + allow_aggregations: false + comment: "" + - role: developer + permission: + columns: "*" + filter: {} + allow_aggregations: true + comment: "" diff --git a/apps/hasura/metadata/databases/cloudsql/tables/tables.yaml b/apps/hasura/metadata/databases/cloudsql/tables/tables.yaml index 8d3c4689d..08131ab61 100644 --- a/apps/hasura/metadata/databases/cloudsql/tables/tables.yaml +++ b/apps/hasura/metadata/databases/cloudsql/tables/tables.yaml @@ -2,6 +2,7 @@ - "!include artifacts_v1.yaml" - "!include code_metrics_by_project_v1.yaml" - "!include collections_v1.yaml" +- "!include contracts_v1.yaml" - "!include event_indexing_status_by_project_v1.yaml" - "!include event_types_v1.yaml" - "!include events_daily_to_artifact.yaml" diff --git a/dbt_project.yml b/dbt_project.yml index 38c1bed9e..0a524da02 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -28,3 +28,11 @@ models: +materialized: view marts: +materialized: table + + +sources: + opensource_observer: + playground: + +enabled: "{{ target.name in ['playground', 'dev'] | as_bool }}" + base_playground: + +enabled: "{{ target.name in ['base_playground'] | as_bool }}" diff --git a/ops/external-prs/.env.example b/ops/external-prs/.env.example new file mode 100644 index 000000000..0419ec087 --- /dev/null +++ b/ops/external-prs/.env.example @@ -0,0 +1,7 @@ +# .env + +# GitHub App ID, which you can find here +# https://github.com/organizations/opensource-observer/settings/apps/oso-prs +PR_TOOLS_GITHUB_APP_ID= +# Base64 encoded private key for the GitHub App +PR_TOOLS_GITHUB_APP_PRIVATE_KEY= \ No newline at end of file diff --git a/ops/external-prs/package.json b/ops/external-prs/package.json index 21c643565..48d8195ab 100644 --- a/ops/external-prs/package.json +++ b/ops/external-prs/package.json @@ -27,6 +27,7 @@ }, "keywords": [], "devDependencies": { + "@types/lodash": "^4.17.4", "@types/node": "^20.11.17", "dotenv": "^16.4.1", "ts-node": "^10.9.1", @@ -45,9 +46,10 @@ "duckdb": "^0.10.1", "envfile": "^7.1.0", "libsodium-wrappers": "^0.7.13", + "lodash": "^4.17.21", "mustache": "^4.2.0", "octokit": "^3.1.0", - "oss-directory": "^0.0.12", + "oss-directory": "^0.0.13", "tmp-promise": "^3.0.3", "ts-dedent": "^2.2.0", "winston": "^3.11.0", diff --git a/ops/external-prs/src/base.ts b/ops/external-prs/src/base.ts index 6e4d32f31..0ebd14007 100644 --- a/ops/external-prs/src/base.ts +++ b/ops/external-prs/src/base.ts @@ -70,6 +70,11 @@ export class GHAppUtils { }); } + /** + * Set a status comment on a PR + * This will try to keep updating the same comment if it exists + * You can have multiple comments by setting a `messageId` + **/ async setStatusComment(pr: number, message: string, messageId?: string) { messageId = messageId || "external-pr-status-comment"; const messageIdText = ``; @@ -92,6 +97,7 @@ export class GHAppUtils { }); console.log(appCommentRefs); + // If this app has never commented on this PR, just create it if (appCommentRefs.length === 0) { await this.octo.rest.issues.createComment({ owner: this.repo.owner, @@ -116,11 +122,13 @@ export class GHAppUtils { comments.push(appComment); } + // Look for the messageIdText const matchingComments = comments.filter((c) => { const body = c.data.body || ""; return body.trimStart().indexOf(messageIdText) === 0; }); + // Just create it if it doesn't exist yet if (matchingComments.length === 0) { await this.octo.rest.issues.createComment({ owner: this.repo.owner, @@ -130,6 +138,8 @@ export class GHAppUtils { }); return; } + + // Delete any duplicate comments with the same messageIdText if (matchingComments.length > 1) { logger.warn( "multiple matching comments found. This isn't treated as an error. Deleting extra comments", @@ -143,6 +153,7 @@ export class GHAppUtils { }); } + // Update the existing comment await this.octo.rest.issues.updateComment({ owner: this.repo.owner, repo: this.repo.name, @@ -169,6 +180,7 @@ export class GHAppUtils { commentId: comment.data.id, authorAssosication: comment.data.author_association, author: comment.data.user?.login, + content: comment.data.body, }); const login = comment.data.user?.login; @@ -200,7 +212,12 @@ export class GHAppUtils { const handler = handlers[match[1]]; if (!handler) { - throw new NoCommandError(`invalid command "${match[1]}`); + logger.warn( + `Valid commands include ${Object.keys(handlers) + .map((x) => `'${x}'`) + .join(", ")}`, + ); + throw new NoCommandError(`invalid command /${match[1]}`); } const issueUrl = comment.data.issue_url; const url = new URL(issueUrl); diff --git a/ops/external-prs/src/cli.ts b/ops/external-prs/src/cli.ts index ad65523ff..1a88aafd0 100644 --- a/ops/external-prs/src/cli.ts +++ b/ops/external-prs/src/cli.ts @@ -20,11 +20,19 @@ type BeforeClientArgs = ArgumentsCamelCase<{ }>; interface InitializePRCheck extends BaseArgs { + // Commit SHA sha: string; + // GitHub user login: string; checkName: string; } +/** + * Checks if the user has write access. + * If yes, we signal that we've already queued a job. + * Otherwise, signal that we need admin approval. + * This is typically run with the initiator of the pull request + **/ async function initializePrCheck(args: InitializePRCheck) { logger.info({ message: "initializing the PR check", @@ -49,8 +57,9 @@ async function initializePrCheck(args: InitializePRCheck) { head_sha: args.sha, status: CheckStatus.Queued, output: { - title: "Test deployment queued", - summary: "Test deployment queued", + title: "Test workflow has been queued", + summary: + "Test workflow has been queued. Please check the corresponding owners workflow for the latest job status.", }, }); } else { @@ -62,7 +71,7 @@ async function initializePrCheck(args: InitializePRCheck) { conclusion: CheckConclusion.ActionRequired, output: { title: `Approval required for ${args.checkName}`, - summary: `Approval required for the ${args.checkName} check.`, + summary: `Approval required for the ${args.checkName} check. Repo admins can run '/${args.checkName} LATEST_COMMIT_SHA'. Remember to use the latest commit SHA, or validation will fail.`, }, }); } @@ -96,8 +105,9 @@ const cli = yargs(hideBin(process.argv)) demandOption: true, }) .middleware(async (args: BeforeClientArgs) => { + // Get base64-encoded private key from the environment const buf = Buffer.from(args.githubAppPrivateKey as string, "base64"); // Ta-da - + // Log into GitHub Octokit const app = new App({ appId: args.githubAppId as string, privateKey: buf.toString("utf-8"), diff --git a/ops/external-prs/src/ossd/index.ts b/ops/external-prs/src/ossd/index.ts index ec25b4cc1..7417c3fe0 100644 --- a/ops/external-prs/src/ossd/index.ts +++ b/ops/external-prs/src/ossd/index.ts @@ -6,6 +6,7 @@ import { logger } from "../utils/logger.js"; import { BaseArgs, CommmentCommandHandler } from "../base.js"; import { loadData, Project, Collection } from "oss-directory"; import duckdb from "duckdb"; +import _ from "lodash"; import * as util from "util"; import * as fs from "fs"; import * as fsPromise from "fs/promises"; @@ -43,7 +44,9 @@ function jsonlExport(path: string, arr: Array): Promise { } interface ParseCommentArgs extends BaseArgs { + // Comment ID comment: number; + // Output filename output: string; login: string; } @@ -588,8 +591,38 @@ class OSSDirectoryPullRequest { }); await this.loadValidators(urls); - const validationErrors: { address: string; error: string }[] = []; + // Embedded data structure for storing validation results + type ValidationItem = { + name: string; + messages: string[]; + errors: string[]; + }; + const results: Record = {}; + // Add a name to the results + const ensureNameInResult = (name: string) => { + const item = results[name]; + if (!item) { + results[name] = { + name, + messages: [], + errors: [], + }; + } + }; + // Add an informational message + /** + const addMessageToResult = (name: string, message: string) => { + ensureNameInResult(name); + results[name].messages.push(message); + }; + */ + // Add an error + const addErrorToResult = (name: string, message: string) => { + ensureNameInResult(name); + results[name].errors.push(message); + }; + // Run on-chain validations for (const item of this.changes.artifacts.toValidate.blockchain) { const address = item.address; for (const network of item.networks) { @@ -609,70 +642,58 @@ class OSSDirectoryPullRequest { }); if (item.tags.indexOf("eoa") !== -1) { if (!(await validator.isEOA(address))) { - validationErrors.push({ - address: address, - error: "is not an EOA", - }); + addErrorToResult(address, "is not an EOA"); } } if (item.tags.indexOf("contract") !== -1) { if (!(await validator.isContract(address))) { - validationErrors.push({ - address: address, - error: "is not a Contract", - }); + addErrorToResult(address, "is not a Contract"); } } if (item.tags.indexOf("deployer") !== -1) { if (!(await validator.isDeployer(address))) { - validationErrors.push({ - address: address, - error: "is not a Deployer", - }); + addErrorToResult(address, "is not a Deployer"); } } } } - if (validationErrors.length !== 0) { - logger.info({ - message: "found validation errors", - count: validationErrors.length, - }); + // Summarize results + const items: ValidationItem[] = _.values(results); + const numErrors = _.sumBy( + items, + (item: ValidationItem) => item.errors.length, + ); + const summaryMessage = + numErrors > 0 + ? `⛔ Found ${numErrors} errors ⛔` + : items.length > 0 + ? "⚠️ Please review validation items before approving ⚠️" + : "✅ Good to go as long as status checks pass"; + const commentBody = await renderMustacheFromFile( + relativeDir("messages", "validation-message.md"), + { + sha: args.sha, + summaryMessage, + validationItems: items, + }, + ); - await args.appUtils.setStatusComment( - args.pr, - await renderMustacheFromFile( - relativeDir("messages", "validation-errors.md"), - { - validationErrors: validationErrors, - sha: args.sha, - }, - ), - ); - - await args.appUtils.setCheckStatus({ - conclusion: CheckConclusion.Failure, - name: "validate", - head_sha: args.sha, - status: CheckStatus.Completed, - output: { - title: "PR Validation", - summary: `Failed to validate with ${validationErrors.length} errors`, - }, - }); - } else { - await args.appUtils.setCheckStatus({ - conclusion: CheckConclusion.Success, - name: "validate", - head_sha: args.sha, - status: CheckStatus.Completed, - output: { - title: "PR Validation", - summary: "Successfully validated", - }, - }); - } + // Update the PR comment + await args.appUtils.setStatusComment(args.pr, commentBody); + // Update the PR status + await args.appUtils.setCheckStatus({ + conclusion: + numErrors > 0 ? CheckConclusion.Failure : CheckConclusion.Success, + name: "validate", + head_sha: args.sha, + status: CheckStatus.Completed, + output: { + title: + numErrors > 0 ? summaryMessage : "Successfully validated all items", + summary: commentBody, + }, + }); } } @@ -686,6 +707,11 @@ async function validatePR(args: ValidatePRArgs) { await pr.validate(args); } +/** + * This command is called by external-prs-handle-comment as a check + * for whether we should run the validation logic, + * based on whether a valid command was called. + **/ async function parseOSSDirectoryComments(args: ParseCommentArgs) { const enableValidation: CommmentCommandHandler = async ( command, @@ -712,15 +738,19 @@ async function parseOSSDirectoryComments(args: ParseCommentArgs) { }); }; + const commandHandlers = { + // /validate + validate: enableValidation, + }; + try { const output = await args.appUtils.parseCommentForCommand( args.comment, - { - validate: enableValidation, - }, + commandHandlers, ); await output.commit(args.output); - } catch (_e) { + } catch (e) { + logger.debug("Error", e); await GithubOutput.write(args.output, { deploy: "false", }); diff --git a/ops/external-prs/src/ossd/messages/validation-errors.md b/ops/external-prs/src/ossd/messages/validation-errors.md deleted file mode 100644 index 645335799..000000000 --- a/ops/external-prs/src/ossd/messages/validation-errors.md +++ /dev/null @@ -1,6 +0,0 @@ -Validation of commit `{{sha}}` failed with the following errors: - -{{#validationErrors}} - -- `{{address}}` {{error}} - {{/validationErrors}} diff --git a/ops/external-prs/src/ossd/messages/validation-message.md b/ops/external-prs/src/ossd/messages/validation-message.md new file mode 100644 index 000000000..a4b4fec34 --- /dev/null +++ b/ops/external-prs/src/ossd/messages/validation-message.md @@ -0,0 +1,23 @@ +## Validation Results + +{{summaryMessage}} + +commit `{{sha}}` + +--- + +{{#validationItems}} + +### {{name}} + +{{#errors}} + +- ❌ {{.}} + {{/errors}} + +{{#messages}} + +- 👉 {{.}} + {{/messages}} + +{{/validationItems}} diff --git a/ops/k8s-apps/base/dagster/dagster.yaml b/ops/k8s-apps/base/dagster/dagster.yaml index 1aa6b75ff..91deee4cb 100644 --- a/ops/k8s-apps/base/dagster/dagster.yaml +++ b/ops/k8s-apps/base/dagster/dagster.yaml @@ -60,6 +60,8 @@ spec: env: - name: DAGSTER_DBT_GENERATE_AND_AUTH_GCP value: "1" + - name: DAGSTER_DBT_TARGET_BASE_DIR + value: /tmp/dbt-targets envConfigMaps: - name: dagster-oso-extra-env resources: @@ -73,6 +75,8 @@ spec: env: - name: DAGSTER_DBT_GENERATE_AND_AUTH_GCP value: "1" + - name: DAGSTER_DBT_TARGET_BASE_DIR + value: /tmp/dbt-targets envConfigMaps: - name: dagster-oso-extra-env resources: @@ -92,6 +96,8 @@ spec: env: - name: DAGSTER_DBT_GENERATE_AND_AUTH_GCP value: "1" + - name: DAGSTER_DBT_TARGET_BASE_DIR + value: /tmp/dbt-targets envConfigMaps: - name: dagster-oso-extra-env port: 3030 diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ecd8d16e4..40d4d2e64 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -31,7 +31,7 @@ importers: version: 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) '@docusaurus/preset-classic': specifier: 3.1.1 - version: 3.1.1(@algolia/client-search@4.23.3)(@types/react@18.2.79)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)(typescript@5.3.3) + version: 3.1.1(@algolia/client-search@4.23.3)(@types/react@18.3.3)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)(typescript@5.3.3) '@docusaurus/theme-common': specifier: 3.1.1 version: 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) @@ -40,10 +40,10 @@ importers: version: 1.0.6 '@mdx-js/react': specifier: ^3.0.0 - version: 3.0.0(@types/react@18.2.79)(react@18.2.0) + version: 3.0.0(@types/react@18.3.3)(react@18.2.0) '@plasmicapp/react-web': specifier: ^0.2.337 - version: 0.2.337(@types/react@18.2.79)(react-dom@18.2.0(react@18.2.0))(react@18.2.0) + version: 0.2.337(@types/react@18.3.3)(react-dom@18.2.0(react@18.2.0))(react@18.2.0) clsx: specifier: ^2.1.0 version: 2.1.0 @@ -378,6 +378,9 @@ importers: libsodium-wrappers: specifier: ^0.7.13 version: 0.7.13 + lodash: + specifier: ^4.17.21 + version: 4.17.21 mustache: specifier: ^4.2.0 version: 4.2.0 @@ -385,8 +388,8 @@ importers: specifier: ^3.1.0 version: 3.1.0 oss-directory: - specifier: ^0.0.12 - version: 0.0.12(ts-node@10.9.1(@types/node@20.11.17)(typescript@5.3.3))(typescript@5.3.3) + specifier: ^0.0.13 + version: 0.0.13(ts-node@10.9.1(@types/node@20.11.17)(typescript@5.3.3))(typescript@5.3.3) tmp-promise: specifier: ^3.0.3 version: 3.0.3 @@ -403,6 +406,9 @@ importers: specifier: ^17.7.2 version: 17.7.2 devDependencies: + '@types/lodash': + specifier: ^4.17.4 + version: 4.17.4 '@types/node': specifier: ^20.11.17 version: 20.11.17 @@ -1392,8 +1398,8 @@ packages: resolution: {integrity: sha512-Chk32uHMg6TnQdvw2e9IlqPpFX/6NLuK0Ys2PqLb7/gL5uFn9mXvK715FGLlOLQrcO4qIkNHkvPGktzzXexsFw==} engines: {node: '>=6.9.0'} - '@babel/runtime@7.24.5': - resolution: {integrity: sha512-Nms86NXrsaeU9vbBJKni6gXiEXZ4CVpYVzEjDH9Sb8vmZ3UljyA1GSOJl/6LGPO8EHLuSF9H+IxNXHPX8QHJ4g==} + '@babel/runtime@7.24.6': + resolution: {integrity: sha512-Ja18XcETdEl5mzzACGd+DKgaGJzPTCow7EglgwTmHdwokzDFYh/MHua6lU6DV/hjF2IaOJ4oX2nqnjG7RElKOw==} engines: {node: '>=6.9.0'} '@babel/template@7.22.15': @@ -4235,6 +4241,9 @@ packages: '@types/lodash@4.17.0': resolution: {integrity: sha512-t7dhREVv6dbNj0q17X12j7yDG4bD/DHYX7o5/DbDxobP0HnGPgpRz2Ej77aL7TZT3DSw13fqUTj8J4mMnqa7WA==} + '@types/lodash@4.17.4': + resolution: {integrity: sha512-wYCP26ZLxaT3R39kiN2+HcJ4kTd3U1waI/cY7ivWYqFP6pW3ZNpvi6Wd6PHZx7T/t8z0vlkXMg3QYLa7DZ/IJQ==} + '@types/lru-cache@5.1.1': resolution: {integrity: sha512-ssE3Vlrys7sdIzs5LOxCzTVMsU7i9oa/IaW92wF32JFb3CVczqOkru2xspuKczHEbG3nvmPY7IFqVmGGHdNbYw==} @@ -4337,8 +4346,8 @@ packages: '@types/react@18.2.64': resolution: {integrity: sha512-MlmPvHgjj2p3vZaxbQgFUQFvD8QiZwACfGqEdDSWou5yISWxDQ4/74nCAwsUiX7UFLKZz3BbVSPj+YxeoGGCfg==} - '@types/react@18.2.79': - resolution: {integrity: sha512-RwGAGXPl9kSXwdNTafkOEuFrTBD5SA2B3iEB96xi8+xu5ddUa/cpvyVCSNn+asgLCTHkb5ZxN8gbuibYJi4s1w==} + '@types/react@18.3.3': + resolution: {integrity: sha512-hti/R0pS0q1/xx+TsI73XIqk26eBsISZ2R0wUijXIngRK9R/e7Xw/cXVxQK7R5JjW+SV4zGcn5hXjudkN/pLIw==} '@types/readable-stream@2.3.15': resolution: {integrity: sha512-oM5JSKQCcICF1wvGgmecmHldZ48OZamtMxcGGVICOJA8o8cahXC1zEVAif8iwoc5j8etxFaRFnf095+CDsuoFQ==} @@ -6864,6 +6873,7 @@ packages: glob@7.2.0: resolution: {integrity: sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==} + deprecated: Glob versions prior to v9 are no longer supported glob@7.2.3: resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==} @@ -6871,6 +6881,7 @@ packages: glob@8.1.0: resolution: {integrity: sha512-r8hpEjiQEYlF2QU0df3dS+nxxSIreXQS1qRhMJM0Q5NDdR386C7jb7Hwwod8Fgiuex+k0GFjgft18yvxm5XoCQ==} engines: {node: '>=12'} + deprecated: Glob versions prior to v9 are no longer supported global-dirs@3.0.1: resolution: {integrity: sha512-NBcGGFbBA9s1VzD41QXDG+3++t9Mn5t1FpLdhESY6oKY4gYTFpX4wO3sqGUa0Srjtbfj3szX0RnemmrVRUdULA==} @@ -8918,6 +8929,11 @@ packages: engines: {node: '>=16'} hasBin: true + oss-directory@0.0.13: + resolution: {integrity: sha512-jSAGOAq2m9HcnpL1v0Wk5WKLQe4ZsIAFHWpbbdrfbsEG/M2cSsMzTkxVXSHKI8ex7eHYMie8LPnhR9Um27w7pQ==} + engines: {node: '>=16'} + hasBin: true + oss-directory@0.0.7: resolution: {integrity: sha512-xIgkdK8IiI2ho5BX12HnspYDYO+c1CluASJdlIXGyaIWPHVftu6U3l3CeyZq+mMbPQFpvGgJCHCSoC+Ajo8dbw==} engines: {node: '>=16'} @@ -12935,7 +12951,7 @@ snapshots: dependencies: regenerator-runtime: 0.14.1 - '@babel/runtime@7.24.5': + '@babel/runtime@7.24.6': dependencies: regenerator-runtime: 0.14.1 @@ -13070,14 +13086,14 @@ snapshots: '@docsearch/css@3.5.2': {} - '@docsearch/react@3.5.2(@algolia/client-search@4.23.3)(@types/react@18.2.79)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)': + '@docsearch/react@3.5.2(@algolia/client-search@4.23.3)(@types/react@18.3.3)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)': dependencies: '@algolia/autocomplete-core': 1.9.3(@algolia/client-search@4.23.3)(algoliasearch@4.22.1)(search-insights@2.13.0) '@algolia/autocomplete-preset-algolia': 1.9.3(@algolia/client-search@4.23.3)(algoliasearch@4.22.1) '@docsearch/css': 3.5.2 algoliasearch: 4.22.1 optionalDependencies: - '@types/react': 18.2.79 + '@types/react': 18.3.3 react: 18.2.0 react-dom: 18.2.0(react@18.2.0) search-insights: 2.13.0 @@ -13480,7 +13496,7 @@ snapshots: - vue-template-compiler - webpack-cli - '@docusaurus/preset-classic@3.1.1(@algolia/client-search@4.23.3)(@types/react@18.2.79)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)(typescript@5.3.3)': + '@docusaurus/preset-classic@3.1.1(@algolia/client-search@4.23.3)(@types/react@18.3.3)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)(typescript@5.3.3)': dependencies: '@docusaurus/core': 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) '@docusaurus/plugin-content-blog': 3.1.1(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) @@ -13491,9 +13507,9 @@ snapshots: '@docusaurus/plugin-google-gtag': 3.1.1(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) '@docusaurus/plugin-google-tag-manager': 3.1.1(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) '@docusaurus/plugin-sitemap': 3.1.1(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) - '@docusaurus/theme-classic': 3.1.1(@types/react@18.2.79)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) + '@docusaurus/theme-classic': 3.1.1(@types/react@18.3.3)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) '@docusaurus/theme-common': 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) - '@docusaurus/theme-search-algolia': 3.1.1(@algolia/client-search@4.23.3)(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(@types/react@18.2.79)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)(typescript@5.3.3) + '@docusaurus/theme-search-algolia': 3.1.1(@algolia/client-search@4.23.3)(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(@types/react@18.3.3)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)(typescript@5.3.3) '@docusaurus/types': 3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0) react: 18.2.0 react-dom: 18.2.0(react@18.2.0) @@ -13519,11 +13535,11 @@ snapshots: '@docusaurus/react-loadable@5.5.2(react@18.2.0)': dependencies: - '@types/react': 18.2.64 + '@types/react': 18.3.3 prop-types: 15.8.1 react: 18.2.0 - '@docusaurus/theme-classic@3.1.1(@types/react@18.2.79)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3)': + '@docusaurus/theme-classic@3.1.1(@types/react@18.3.3)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3)': dependencies: '@docusaurus/core': 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) '@docusaurus/mdx-loader': 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(react-dom@18.2.0(react@18.2.0))(react@18.2.0) @@ -13537,7 +13553,7 @@ snapshots: '@docusaurus/utils': 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0)) '@docusaurus/utils-common': 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0)) '@docusaurus/utils-validation': 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0)) - '@mdx-js/react': 3.0.0(@types/react@18.2.79)(react@18.2.0) + '@mdx-js/react': 3.0.0(@types/react@18.3.3)(react@18.2.0) clsx: 2.1.0 copy-text-to-clipboard: 3.2.0 infima: 0.2.0-alpha.43 @@ -13607,9 +13623,9 @@ snapshots: - vue-template-compiler - webpack-cli - '@docusaurus/theme-search-algolia@3.1.1(@algolia/client-search@4.23.3)(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(@types/react@18.2.79)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)(typescript@5.3.3)': + '@docusaurus/theme-search-algolia@3.1.1(@algolia/client-search@4.23.3)(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(@types/react@18.3.3)(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0)(typescript@5.3.3)': dependencies: - '@docsearch/react': 3.5.2(@algolia/client-search@4.23.3)(@types/react@18.2.79)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0) + '@docsearch/react': 3.5.2(@algolia/client-search@4.23.3)(@types/react@18.3.3)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(search-insights@2.13.0) '@docusaurus/core': 3.1.1(@docusaurus/types@3.1.1(react-dom@18.2.0(react@18.2.0))(react@18.2.0))(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) '@docusaurus/logger': 3.1.1 '@docusaurus/plugin-content-docs': 3.1.1(eslint@8.56.0)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)(typescript@5.3.3) @@ -15004,10 +15020,10 @@ snapshots: transitivePeerDependencies: - supports-color - '@mdx-js/react@3.0.0(@types/react@18.2.79)(react@18.2.0)': + '@mdx-js/react@3.0.0(@types/react@18.3.3)(react@18.2.0)': dependencies: '@types/mdx': 2.0.10 - '@types/react': 18.2.79 + '@types/react': 18.3.3 react: 18.2.0 '@metamask/eth-sig-util@4.0.1': @@ -15065,7 +15081,7 @@ snapshots: '@mui/private-theming@5.15.14(@types/react@18.2.48)(react@18.2.0)': dependencies: - '@babel/runtime': 7.24.5 + '@babel/runtime': 7.24.6 '@mui/utils': 5.15.14(@types/react@18.2.48)(react@18.2.0) prop-types: 15.8.1 react: 18.2.0 @@ -15083,7 +15099,7 @@ snapshots: '@mui/styled-engine@5.15.14(@emotion/react@11.11.3(@types/react@18.2.48)(react@18.2.0))(@emotion/styled@11.11.0(@emotion/react@11.11.3(@types/react@18.2.48)(react@18.2.0))(@types/react@18.2.48)(react@18.2.0))(react@18.2.0)': dependencies: - '@babel/runtime': 7.24.5 + '@babel/runtime': 7.24.6 '@emotion/cache': 11.11.0 csstype: 3.1.3 prop-types: 15.8.1 @@ -15105,7 +15121,7 @@ snapshots: '@mui/system@5.15.15(@emotion/react@11.11.3(@types/react@18.2.48)(react@18.2.0))(@emotion/styled@11.11.0(@emotion/react@11.11.3(@types/react@18.2.48)(react@18.2.0))(@types/react@18.2.48)(react@18.2.0))(@types/react@18.2.48)(react@18.2.0)': dependencies: - '@babel/runtime': 7.24.5 + '@babel/runtime': 7.24.6 '@mui/private-theming': 5.15.14(@types/react@18.2.48)(react@18.2.0) '@mui/styled-engine': 5.15.14(@emotion/react@11.11.3(@types/react@18.2.48)(react@18.2.0))(@emotion/styled@11.11.0(@emotion/react@11.11.3(@types/react@18.2.48)(react@18.2.0))(@types/react@18.2.48)(react@18.2.0))(react@18.2.0) '@mui/types': 7.2.14(@types/react@18.2.48) @@ -15145,7 +15161,7 @@ snapshots: '@mui/utils@5.15.14(@types/react@18.2.48)(react@18.2.0)': dependencies: - '@babel/runtime': 7.24.5 + '@babel/runtime': 7.24.6 '@types/prop-types': 15.7.12 prop-types: 15.8.1 react: 18.2.0 @@ -15774,7 +15790,7 @@ snapshots: dependencies: react: 18.2.0 - '@plasmicapp/react-web@0.2.337(@types/react@18.2.79)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)': + '@plasmicapp/react-web@0.2.337(@types/react@18.3.3)(react-dom@18.2.0(react@18.2.0))(react@18.2.0)': dependencies: '@plasmicapp/auth-react': 0.0.21(react@18.2.0) '@plasmicapp/data-sources': 0.1.155(react-dom@18.2.0(react@18.2.0))(react@18.2.0) @@ -15808,7 +15824,7 @@ snapshots: fast-deep-equal: 3.1.3 react: 18.2.0 react-dom: 18.2.0(react@18.2.0) - valtio: 1.13.2(@types/react@18.2.79)(react@18.2.0) + valtio: 1.13.2(@types/react@18.3.3)(react@18.2.0) transitivePeerDependencies: - '@types/react' @@ -17411,6 +17427,8 @@ snapshots: '@types/lodash@4.17.0': {} + '@types/lodash@4.17.4': {} + '@types/lru-cache@5.1.1': {} '@types/luxon@3.3.1': {} @@ -17496,7 +17514,7 @@ snapshots: '@types/react-router@5.1.20': dependencies: '@types/history': 4.7.11 - '@types/react': 18.2.64 + '@types/react': 18.3.3 '@types/react-transition-group@4.4.10': dependencies: @@ -17514,7 +17532,7 @@ snapshots: '@types/scheduler': 0.16.8 csstype: 3.1.3 - '@types/react@18.2.79': + '@types/react@18.3.3': dependencies: '@types/prop-types': 15.7.12 csstype: 3.1.3 @@ -19351,9 +19369,9 @@ snapshots: dequal@2.0.3: {} - derive-valtio@0.1.0(valtio@1.13.2(@types/react@18.2.79)(react@18.2.0)): + derive-valtio@0.1.0(valtio@1.13.2(@types/react@18.3.3)(react@18.2.0)): dependencies: - valtio: 1.13.2(@types/react@18.2.79)(react@18.2.0) + valtio: 1.13.2(@types/react@18.3.3)(react@18.2.0) destroy@1.2.0: {} @@ -23539,9 +23557,9 @@ snapshots: os-tmpdir@1.0.2: {} - oss-directory@0.0.12(ts-node@10.9.1(@types/node@20.11.17)(typescript@5.3.3))(typescript@5.3.3): + oss-directory@0.0.12(ts-node@10.9.1(@types/node@20.12.7)(typescript@5.3.3))(typescript@5.3.3): dependencies: - '@ethereum-attestation-service/eas-sdk': 1.4.0(ts-node@10.9.1(@types/node@20.11.17)(typescript@5.3.3))(typescript@5.3.3) + '@ethereum-attestation-service/eas-sdk': 1.4.0(ts-node@10.9.1(@types/node@20.12.7)(typescript@5.3.3))(typescript@5.3.3) ajv: 8.12.0 ajv-formats: 2.1.1(ajv@8.12.0) chalk: 5.3.0 @@ -23563,9 +23581,9 @@ snapshots: - typescript - utf-8-validate - oss-directory@0.0.12(ts-node@10.9.1(@types/node@20.12.7)(typescript@5.3.3))(typescript@5.3.3): + oss-directory@0.0.13(ts-node@10.9.1(@types/node@20.11.17)(typescript@5.3.3))(typescript@5.3.3): dependencies: - '@ethereum-attestation-service/eas-sdk': 1.4.0(ts-node@10.9.1(@types/node@20.12.7)(typescript@5.3.3))(typescript@5.3.3) + '@ethereum-attestation-service/eas-sdk': 1.4.0(ts-node@10.9.1(@types/node@20.11.17)(typescript@5.3.3))(typescript@5.3.3) ajv: 8.12.0 ajv-formats: 2.1.1(ajv@8.12.0) chalk: 5.3.0 @@ -26059,13 +26077,13 @@ snapshots: spdx-correct: 3.2.0 spdx-expression-parse: 3.0.1 - valtio@1.13.2(@types/react@18.2.79)(react@18.2.0): + valtio@1.13.2(@types/react@18.3.3)(react@18.2.0): dependencies: - derive-valtio: 0.1.0(valtio@1.13.2(@types/react@18.2.79)(react@18.2.0)) + derive-valtio: 0.1.0(valtio@1.13.2(@types/react@18.3.3)(react@18.2.0)) proxy-compare: 2.6.0 use-sync-external-store: 1.2.0(react@18.2.0) optionalDependencies: - '@types/react': 18.2.79 + '@types/react': 18.3.3 react: 18.2.0 value-equal@1.0.1: {} diff --git a/warehouse/dbt/macros/models/contract_invocation_events_with_l1.sql b/warehouse/dbt/macros/models/contract_invocation_events_with_l1.sql index aa825d3dd..70248334c 100644 --- a/warehouse/dbt/macros/models/contract_invocation_events_with_l1.sql +++ b/warehouse/dbt/macros/models/contract_invocation_events_with_l1.sql @@ -18,7 +18,7 @@ with blockchain_artifacts as ( when artifact_type = 'EOA' then 1 else 0 end as artifact_rank - from {{ ref('int_artifacts_by_project') }} + from {{ ref('int_all_artifacts') }} where artifact_source = "{{ upper_network_name }}" ) group by artifact_source_id diff --git a/warehouse/dbt/macros/models/filtered_blockchain_events.sql b/warehouse/dbt/macros/models/filtered_blockchain_events.sql index 01196d3e5..6544a8bb1 100644 --- a/warehouse/dbt/macros/models/filtered_blockchain_events.sql +++ b/warehouse/dbt/macros/models/filtered_blockchain_events.sql @@ -1,7 +1,7 @@ {% macro filtered_blockchain_events(artifact_source, source_name, source_table) %} with known_addresses as ( select distinct `artifact_source_id` as `address` - from {{ ref("int_artifacts_by_project") }} + from {{ ref("int_all_artifacts") }} where LOWER(artifact_source) = LOWER('{{ artifact_source }}') ), known_to as ( select events.* diff --git a/warehouse/dbt/models/intermediate/blockchain/int_derived_contracts.sql b/warehouse/dbt/models/intermediate/blockchain/int_derived_contracts.sql index 81f599c15..73c743d22 100644 --- a/warehouse/dbt/models/intermediate/blockchain/int_derived_contracts.sql +++ b/warehouse/dbt/models/intermediate/blockchain/int_derived_contracts.sql @@ -9,7 +9,8 @@ with {% for network in networks %} select factories.block_timestamp as block_timestamp, factories.transaction_hash as transaction_hash, - deployers.deployer_address as deployer_address, + factories.originating_address as deployer_address, + deployers.deployer_address as factory_deployer_address, factories.contract_address as contract_address from {{ ref("stg_%s__factories" % network) }} as factories inner join {{ ref("stg_%s__deployers" % network) }} as deployers @@ -19,6 +20,7 @@ with {% for network in networks %} block_timestamp, transaction_hash, deployer_address, + null as factory_deployer_address, contract_address from {{ ref("stg_%s__deployers" % network) }} ){% if not loop.last %},{% endif %} @@ -32,6 +34,7 @@ with {% for network in networks %} transaction_hash, "{{ network.upper() }}" as network, deployer_address, + factory_deployer_address, contract_address from {{ network }}_factories_and_deployers {% endfor %} diff --git a/warehouse/dbt/models/intermediate/blockchain/int_optimism_contract_invocation_events.sql b/warehouse/dbt/models/intermediate/blockchain/int_optimism_contract_invocation_events.sql index f23a715a4..273c89c11 100644 --- a/warehouse/dbt/models/intermediate/blockchain/int_optimism_contract_invocation_events.sql +++ b/warehouse/dbt/models/intermediate/blockchain/int_optimism_contract_invocation_events.sql @@ -33,7 +33,7 @@ with blockchain_artifacts as ( when artifact_type = 'EOA' then 1 else 0 end as artifact_rank - from {{ ref('int_artifacts_by_project') }} + from {{ ref('int_all_artifacts') }} where LOWER(artifact_source) = LOWER('OPTIMISM') ) group by artifact_source_id diff --git a/warehouse/dbt/models/intermediate/blockchain/int_optimism_transactions.sql b/warehouse/dbt/models/intermediate/blockchain/int_optimism_transactions.sql index 9a127a445..900935705 100644 --- a/warehouse/dbt/models/intermediate/blockchain/int_optimism_transactions.sql +++ b/warehouse/dbt/models/intermediate/blockchain/int_optimism_transactions.sql @@ -13,7 +13,7 @@ }} with known_addresses as ( select distinct `artifact_source_id` as `address` - from {{ ref("int_artifacts_by_project") }} + from {{ ref("int_all_artifacts") }} where `artifact_source` = 'OPTIMISM' ), {% if target.name == 'production' %} diff --git a/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql b/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql new file mode 100644 index 000000000..f88c9c63e --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_all_artifacts.sql @@ -0,0 +1,218 @@ +{# + This model is responsible for generating a list of all artifacts associated with a project. + This includes repositories, npm packages, blockchain addresses, and contracts. + + Note: This will create a separate row for each artifact_type, which is de-duplicated + in int_artifacts_by_project + Note: Currently, the source and namespace for blockchain artifacts are the same. This may change + in the future. +#} + +with all_repos as ( + {# + Currently this is just Github. + oss-directory needs some refactoring to support multiple repository providers + #} + select + "GITHUB" as artifact_source, + "REPOSITORY" as artifact_type, + projects.project_id, + repos.owner as artifact_namespace, + repos.name as artifact_name, + repos.url as artifact_url, + CAST(repos.id as STRING) as artifact_source_id + from + {{ ref('stg_ossd__current_projects') }} as projects + cross join + UNNEST(JSON_QUERY_ARRAY(projects.github)) as github + inner join + {{ ref('stg_ossd__current_repositories') }} as repos + on + LOWER(CONCAT("https://github.com/", repos.owner)) + = LOWER(JSON_VALUE(github.url)) + or LOWER(repos.url) = LOWER(JSON_VALUE(github.url)) +), + +all_npm_raw as ( + select + "NPM" as artifact_source, + "PACKAGE" as artifact_type, + projects.project_id, + JSON_VALUE(npm.url) as artifact_source_id, + case + when + JSON_VALUE(npm.url) like "https://npmjs.com/package/%" + then SUBSTR(JSON_VALUE(npm.url), 28) + when + JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%" + then SUBSTR(JSON_VALUE(npm.url), 31) + end as artifact_name, + JSON_VALUE(npm.url) as artifact_url + from + {{ ref('stg_ossd__current_projects') }} as projects + cross join + UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm +), + +all_npm as ( + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_name, + artifact_url, + SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)] + as artifact_namespace + from all_npm_raw +), + +ossd_blockchain as ( + select + projects.project_id, + tag as artifact_type, + network as artifact_namespace, + network as artifact_source, + JSON_VALUE(blockchains.address) as artifact_source_id, + JSON_VALUE(blockchains.address) as artifact_name, + JSON_VALUE(blockchains.address) as artifact_url + from + {{ ref('stg_ossd__current_projects') }} as projects + cross join + UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains + cross join + UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network + cross join + UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag +), + +all_deployers as ( + select + *, + "MAINNET" as artifact_namespace, + "ETHEREUM" as artifact_source + from {{ ref("stg_ethereum__deployers") }} + union all + select + *, + "ARBITRUM_ONE" as artifact_namespace, + "ARBITRUM_ONE" as artifact_source + from {{ ref("stg_arbitrum__deployers") }} + union all + {# Includes all deployers of a contract #} + select + block_timestamp, + transaction_hash, + deployer_address, + contract_address, + UPPER(network) as artifact_namespace, + UPPER(network) as artifact_source + from {{ ref("int_derived_contracts") }} + union all + {# Includes all factory deployers of a contract #} + select + block_timestamp, + transaction_hash, + factory_deployer_address as deployer_address, + contract_address, + UPPER(network) as artifact_namespace, + UPPER(network) as artifact_source + from {{ ref("int_derived_contracts") }} +), + +discovered_contracts as ( + select + "CONTRACT" as artifact_type, + ob.project_id, + ad.contract_address as artifact_source_id, + ob.artifact_source, + ob.artifact_namespace, + ad.contract_address as artifact_name, + ad.contract_address as artifact_url + from ossd_blockchain as ob + inner join all_deployers as ad + on + ob.artifact_source_id = ad.deployer_address + {# + We currently do not really have a notion of namespace in + oss-directory. We may need to change this when that time comes + #} + and UPPER(ob.artifact_source) in (UPPER(ad.artifact_source), "ANY_EVM") + and UPPER(ob.artifact_namespace) in ( + UPPER(ad.artifact_namespace), "ANY_EVM" + ) + and UPPER(ob.artifact_type) in ("EOA", "DEPLOYER", "FACTORY") +), + +all_artifacts as ( + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + all_repos + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + ossd_blockchain + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + discovered_contracts + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + all_npm +), + +all_normalized_artifacts as ( + select distinct + project_id, + LOWER(artifact_source_id) as artifact_source_id, + {# + artifact_source and artifact_type are considered internal constants hence + we apply an UPPER transform + #} + UPPER(artifact_source) as artifact_source, + UPPER(artifact_type) as artifact_type, + LOWER(artifact_namespace) as artifact_namespace, + LOWER(artifact_name) as artifact_name, + LOWER(artifact_url) as artifact_url + from all_artifacts +) + +select + project_id, + {{ oso_id("a.artifact_source", "a.artifact_source_id") }} as `artifact_id`, + artifact_source_id, + artifact_source, + artifact_namespace, + artifact_name, + artifact_url, + artifact_type +from all_normalized_artifacts as a diff --git a/warehouse/dbt/models/intermediate/directory/int_artifact_types.sql b/warehouse/dbt/models/intermediate/directory/int_artifact_types.sql new file mode 100644 index 000000000..9bb025e1f --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_artifact_types.sql @@ -0,0 +1,8 @@ +select distinct + artifact_id, + artifact_source_id, + artifact_source, + artifact_namespace, + artifact_name, + artifact_type +from {{ ref('int_all_artifacts') }} diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts.sql index ccf77d345..623729c0d 100644 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts.sql +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts.sql @@ -7,7 +7,6 @@ with all_artifacts as ( select artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_url, artifact_name @@ -16,7 +15,6 @@ with all_artifacts as ( select artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_url, MAX_BY(artifact_name, last_used) as artifact_name @@ -24,7 +22,6 @@ with all_artifacts as ( group by artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_url ) @@ -33,7 +30,6 @@ select distinct {{ oso_id("artifact_source", "artifact_source_id") }} as artifact_id, artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_name, artifact_url diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_project.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_project.sql index 14fc78b43..e76433c40 100644 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_project.sql +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_project.sql @@ -1,198 +1,9 @@ -{# - This model is responsible for generating a list of all artifacts associated with a project. - This includes repositories, npm packages, blockchain addresses, and contracts. - - Currently, the source and namespace for blockchain artifacts are the same. This may change - in the future. -#} - -with all_repos as ( - {# - Currently this is just Github. - oss-directory needs some refactoring to support multiple repository providers - #} - select - "GITHUB" as artifact_source, - "REPOSITORY" as artifact_type, - projects.project_id, - repos.owner as artifact_namespace, - repos.name as artifact_name, - repos.url as artifact_url, - CAST(repos.id as STRING) as artifact_source_id - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.github)) as github - inner join - {{ ref('stg_ossd__current_repositories') }} as repos - on - LOWER(CONCAT("https://github.com/", repos.owner)) - = LOWER(JSON_VALUE(github.url)) - or LOWER(repos.url) = LOWER(JSON_VALUE(github.url)) -), - -all_npm_raw as ( - select - "NPM" as artifact_source, - "PACKAGE" as artifact_type, - projects.project_id, - JSON_VALUE(npm.url) as artifact_source_id, - case - when - JSON_VALUE(npm.url) like "https://npmjs.com/package/%" - then SUBSTR(JSON_VALUE(npm.url), 28) - when - JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%" - then SUBSTR(JSON_VALUE(npm.url), 31) - end as artifact_name, - JSON_VALUE(npm.url) as artifact_url - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm -), - -all_npm as ( - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_name, - artifact_url, - SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)] - as artifact_namespace - from all_npm_raw -), - -ossd_blockchain as ( - select - projects.project_id, - tag as artifact_type, - network as artifact_namespace, - network as artifact_source, - JSON_VALUE(blockchains.address) as artifact_source_id, - JSON_VALUE(blockchains.address) as artifact_name, - JSON_VALUE(blockchains.address) as artifact_url - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains - cross join - UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network - cross join - UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag -), - -all_deployers as ( - select - *, - "MAINNET" as artifact_namespace, - "ETHEREUM" as artifact_source - from {{ ref("stg_ethereum__deployers") }} - union all - select - *, - "ARBITRUM_ONE" as artifact_namespace, - "ARBITRUM_ONE" as artifact_source - from {{ ref("stg_arbitrum__deployers") }} - union all - select - block_timestamp, - transaction_hash, - deployer_address, - contract_address, - UPPER(network) as artifact_namespace, - UPPER(network) as artifact_source - from {{ ref("int_derived_contracts") }} -), - -discovered_contracts as ( - select - "CONTRACT" as artifact_type, - ob.project_id, - ad.contract_address as artifact_source_id, - ob.artifact_namespace, - ob.artifact_namespace as artifact_source, - ad.contract_address as artifact_name, - ad.contract_address as artifact_url - from ossd_blockchain as ob - inner join all_deployers as ad - on - ob.artifact_source_id = ad.deployer_address - and UPPER(ob.artifact_namespace) = UPPER(ad.artifact_namespace) - and UPPER(ob.artifact_type) in ("EOA", "DEPLOYER", "FACTORY") -), - -all_artifacts as ( - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_namespace, - artifact_name, - artifact_url - from - all_repos - union all - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_namespace, - artifact_name, - artifact_url - from - ossd_blockchain - union all - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_namespace, - artifact_name, - artifact_url - from - discovered_contracts - union all - select - project_id, - artifact_source_id, - artifact_source, - artifact_type, - artifact_namespace, - artifact_name, - artifact_url - from - all_npm -), - -all_unique_artifacts as ( - select distinct - project_id, - LOWER(artifact_source_id) as artifact_source_id, - {# - artifact_source and artifact_type are considered internal constants hence - we apply an UPPER transform - #} - UPPER(artifact_source) as artifact_source, - UPPER(artifact_type) as artifact_type, - LOWER(artifact_namespace) as artifact_namespace, - LOWER(artifact_name) as artifact_name, - LOWER(artifact_url) as artifact_url - from all_artifacts -) - -select +select distinct project_id, + artifact_id, artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_name, - artifact_url, - {{ oso_id("a.artifact_source", "a.artifact_source_id") }} as `artifact_id` -from all_unique_artifacts as a + artifact_url +from {{ ref('int_all_artifacts') }} diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql index 69836a48b..1cbe14b83 100644 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts_by_user.sql @@ -11,9 +11,10 @@ with farcaster_users as ( int_users.user_source, int_users.user_source_id, int_users.display_name, - stg_farcaster__addresses.address as artifact_name, int_artifacts.artifact_id, - int_artifacts.artifact_source + int_artifacts.artifact_source, + int_artifacts.artifact_namespace, + stg_farcaster__addresses.address as artifact_name from {{ ref('int_users') }} inner join {{ ref('stg_farcaster__addresses') }} on int_users.user_source_id = stg_farcaster__addresses.fid @@ -27,7 +28,8 @@ select user_source, user_source_id, display_name, - artifact_name, artifact_id, - artifact_source + artifact_source, + artifact_namespace, + artifact_name from farcaster_users diff --git a/warehouse/dbt/models/intermediate/directory/int_contracts.sql b/warehouse/dbt/models/intermediate/directory/int_contracts.sql new file mode 100644 index 000000000..5fa83bf1a --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_contracts.sql @@ -0,0 +1,114 @@ +with deployers as ( + select + *, + 'OPTIMISM' as artifact_source + from {{ ref('stg_optimism__deployers') }} + union all + select + *, + 'BASE' as artifact_source + from {{ ref('stg_base__deployers') }} + union all + select + *, + 'FRAX' as artifact_source + from {{ ref('stg_frax__deployers') }} + union all + select + *, + 'MODE' as artifact_source + from {{ ref('stg_mode__deployers') }} + union all + select + *, + 'ZORA' as artifact_source + from {{ ref('stg_zora__deployers') }} +), + +factories as ( + select + *, + 'OPTIMISM' as artifact_source + from {{ ref('stg_optimism__factories') }} + union all + select + *, + 'BASE' as artifact_source + from {{ ref('stg_base__factories') }} + union all + select + *, + 'FRAX' as artifact_source + from {{ ref('stg_frax__factories') }} + union all + select + *, + 'MODE' as artifact_source + from {{ ref('stg_mode__factories') }} + union all + select + *, + 'ZORA' as artifact_source + from {{ ref('stg_zora__factories') }} +), + +contract_deployments as ( + select + artifact_source, + transaction_hash, + block_timestamp, + deployer_address as root_deployer_address, + deployer_address as created_by_address, + contract_address, + deployer_address as originating_eoa_address, + 'EOA' as creator_type, + case + when contract_address in ( + select distinct factory_address + from factories + ) then 'FACTORY' + else 'CONTRACT' + end as contract_type + from deployers +), + +factory_deployments as ( + select + factories.artifact_source, + factories.transaction_hash, + factories.block_timestamp, + deployers.deployer_address as root_deployer_address, + factories.factory_address as created_by_address, + factories.contract_address, + 'FACTORY' as creator_type, + 'CONTRACT' as contract_type, + COALESCE(factories.originating_address, deployers.deployer_address) + as originating_eoa_address + from factories + inner join deployers + on factories.factory_address = deployers.contract_address +) + +select + artifact_source, + root_deployer_address, + contract_address, + contract_type, + created_by_address, + creator_type, + originating_eoa_address, + transaction_hash, + block_timestamp +from contract_deployments +union all +select + artifact_source, + root_deployer_address, + contract_address, + contract_type, + created_by_address, + creator_type, + originating_eoa_address, + transaction_hash, + block_timestamp +from factory_deployments diff --git a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql index 3bf82bba1..4d02f91fe 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql @@ -177,3 +177,4 @@ left join code_metrics on project_metadata.project_id = code_metrics.project_id and project_metadata.event_source = code_metrics.event_source +where code_metrics.event_source is not null diff --git a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql index 221a7be56..af038687d 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql @@ -176,3 +176,4 @@ from {{ ref('int_projects') }} left join aggs on int_projects.project_id = aggs.project_id +where aggs.event_source is not null diff --git a/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql index 458cf7634..0be828205 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_repo_metrics_by_project.sql @@ -34,7 +34,8 @@ repo_stats as ( MIN(time) as first_commit_time, MAX(time) as last_commit_time, COUNT(distinct TIMESTAMP_TRUNC(time, day)) as days_with_commits_count, - COUNT(distinct from_artifact_id) as contributors_to_repo_count + COUNT(distinct from_artifact_id) as contributors_to_repo_count, + SUM(amount) as commit_count from {{ ref('int_events_to_project') }} where event_type = 'COMMIT_CODE' group by @@ -44,11 +45,11 @@ repo_stats as ( select - int_artifacts_by_project.project_id, - int_artifacts_by_project.artifact_id, - int_artifacts_by_project.artifact_namespace, - int_artifacts_by_project.artifact_name, - int_artifacts_by_project.artifact_source, + int_all_artifacts.project_id, + int_all_artifacts.artifact_id, + int_all_artifacts.artifact_namespace, + int_all_artifacts.artifact_name, + int_all_artifacts.artifact_source, repo_snapshot.is_fork, repo_snapshot.fork_count, repo_snapshot.star_count, @@ -58,12 +59,13 @@ select repo_stats.first_commit_time, repo_stats.last_commit_time, repo_stats.days_with_commits_count, - repo_stats.contributors_to_repo_count -from {{ ref('int_artifacts_by_project') }} + repo_stats.contributors_to_repo_count, + repo_stats.commit_count +from {{ ref('int_all_artifacts') }} left join repo_snapshot - on int_artifacts_by_project.artifact_id = repo_snapshot.artifact_id + on int_all_artifacts.artifact_id = repo_snapshot.artifact_id left join repo_stats - on int_artifacts_by_project.artifact_id = repo_stats.artifact_id + on int_all_artifacts.artifact_id = repo_stats.artifact_id where - int_artifacts_by_project.artifact_source = 'GITHUB' - and UPPER(int_artifacts_by_project.artifact_type) = 'REPOSITORY' + int_all_artifacts.artifact_source = 'GITHUB' + and UPPER(int_all_artifacts.artifact_type) = 'REPOSITORY' diff --git a/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql b/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql index 93c7b6726..a3576c8a5 100644 --- a/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql +++ b/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql @@ -7,9 +7,9 @@ select artifacts_by_project.artifact_id, artifacts_by_project.artifact_source_id, + artifacts_by_project.artifact_source, artifacts_by_project.artifact_namespace, artifacts_by_project.artifact_name, - artifacts_by_project.artifact_type, projects.project_id, projects.project_source, projects.project_namespace, diff --git a/warehouse/dbt/models/marts/directory/artifacts_v1.sql b/warehouse/dbt/models/marts/directory/artifacts_v1.sql index 74c59827b..b58ef983c 100644 --- a/warehouse/dbt/models/marts/directory/artifacts_v1.sql +++ b/warehouse/dbt/models/marts/directory/artifacts_v1.sql @@ -10,7 +10,6 @@ select artifact_id, artifact_source_id, artifact_source, - artifact_type, artifact_namespace, artifact_name, artifact_url diff --git a/warehouse/dbt/models/marts/directory/contracts_v1.sql b/warehouse/dbt/models/marts/directory/contracts_v1.sql new file mode 100644 index 000000000..c22f748c2 --- /dev/null +++ b/warehouse/dbt/models/marts/directory/contracts_v1.sql @@ -0,0 +1,13 @@ +{{ + config(meta = { + 'sync_to_db': True + }) +}} + +select distinct + artifact_source, + root_deployer_address, + contract_address, + contract_type +from {{ ref('int_contracts') }} +where root_deployer_address is not null diff --git a/warehouse/dbt/models/marts/superchain/metrics/rf4_monthly_active_addresses.sql b/warehouse/dbt/models/marts/superchain/metrics/rf4_monthly_active_addresses.sql index 1b06de34d..689682872 100644 --- a/warehouse/dbt/models/marts/superchain/metrics/rf4_monthly_active_addresses.sql +++ b/warehouse/dbt/models/marts/superchain/metrics/rf4_monthly_active_addresses.sql @@ -1,3 +1,4 @@ +{# TODO: double check the math on total_months #} with txns as ( select project_id, @@ -21,9 +22,7 @@ maas as ( ), total_months as ( - select - {# TODO: double check this math #} - (DATE_DIFF(max_month, min_month, day) + 30) / 30 as months + select (DATE_DIFF(max_month, min_month, day) + 30) / 30 as months from ( select MIN(bucket_month) as min_month, diff --git a/warehouse/dbt/models/marts/superchain/metrics/rf4_power_user_addresses.sql b/warehouse/dbt/models/marts/superchain/metrics/rf4_power_user_addresses.sql new file mode 100644 index 000000000..5d557dcf6 --- /dev/null +++ b/warehouse/dbt/models/marts/superchain/metrics/rf4_power_user_addresses.sql @@ -0,0 +1,43 @@ +with txns as ( + select + project_id, + from_artifact_name, + bucket_day, + amount + from {{ ref('rf4_events_daily_to_project') }} + where + event_type = 'CONTRACT_INVOCATION_SUCCESS_DAILY_COUNT' + and bucket_day >= '2023-10-01' +), + +address_stats as ( + select + from_artifact_name, + COUNT(distinct bucket_day) as days_count, + COUNT(distinct project_id) as project_count, + SUM(amount) as txns_count + from txns + group by + from_artifact_name +), + +power_users as ( + select from_artifact_name + from address_stats + where + days_count >= 30 + and project_count >= 10 + and txns_count >= 100 +) + +select + txns.project_id, + 'power_user_addresses' as metric, + COUNT(distinct txns.from_artifact_name) as amount +from txns +left join power_users + on txns.from_artifact_name = power_users.from_artifact_name +where + power_users.from_artifact_name is not null +group by + txns.project_id diff --git a/warehouse/dbt/models/marts/superchain/metrics/rf4_trusted_monthly_active_users.sql b/warehouse/dbt/models/marts/superchain/metrics/rf4_trusted_monthly_active_users.sql index 88071a43c..b61b742c2 100644 --- a/warehouse/dbt/models/marts/superchain/metrics/rf4_trusted_monthly_active_users.sql +++ b/warehouse/dbt/models/marts/superchain/metrics/rf4_trusted_monthly_active_users.sql @@ -1,3 +1,4 @@ +{# TODO: double check the math on total_months #} with txns as ( select project_id, @@ -22,9 +23,7 @@ maus as ( ), total_months as ( - select - {# TODO: double check this math #} - (DATE_DIFF(max_month, min_month, day) + 30) / 30 as months + select (DATE_DIFF(max_month, min_month, day) + 30) / 30 as months from ( select MIN(bucket_month) as min_month, diff --git a/warehouse/dbt/models/marts/superchain/rf4_events_daily_to_project.sql b/warehouse/dbt/models/marts/superchain/rf4_events_daily_to_project.sql index 7676c93fc..3b070926e 100644 --- a/warehouse/dbt/models/marts/superchain/rf4_events_daily_to_project.sql +++ b/warehouse/dbt/models/marts/superchain/rf4_events_daily_to_project.sql @@ -40,15 +40,18 @@ select events.event_source, events.event_type, events.amount, - rf4_trusted_users.user_id as trusted_user_id + case + when rf4_trusted_users.is_trusted_user is true + then rf4_trusted_users.address + end as trusted_user_id from events -left join {{ ref('artifacts_v1') }} as to_artifacts +left join {{ ref('int_artifact_types') }} as to_artifacts on events.to_artifact_id = to_artifacts.artifact_id left join {{ ref('artifacts_v1') }} as from_artifacts on events.from_artifact_id = from_artifacts.artifact_id left join {{ ref('projects_v1') }} on events.project_id = projects_v1.project_id left join {{ ref('rf4_trusted_users') }} - on from_artifacts.artifact_name = rf4_trusted_users.artifact_name + on from_artifacts.artifact_name = rf4_trusted_users.address where to_artifacts.artifact_type = 'CONTRACT' diff --git a/warehouse/dbt/models/marts/superchain/rf4_impact_metrics_by_project.sql b/warehouse/dbt/models/marts/superchain/rf4_impact_metrics_by_project.sql index a9668fb0b..33410d64b 100644 --- a/warehouse/dbt/models/marts/superchain/rf4_impact_metrics_by_project.sql +++ b/warehouse/dbt/models/marts/superchain/rf4_impact_metrics_by_project.sql @@ -24,6 +24,8 @@ with metrics as ( select * from {{ ref('rf4_trusted_recurring_users') }} union all select * from {{ ref('rf4_recurring_addresses') }} + union all + select * from {{ ref('rf4_power_user_addresses') }} ), pivot_metrics as ( @@ -61,7 +63,10 @@ pivot_metrics as ( ) as trusted_recurring_users, MAX( case when metric = 'recurring_addresses' then amount else 0 end - ) as recurring_addresses + ) as recurring_addresses, + MAX( + case when metric = 'power_user_addresses' then amount else 0 end + ) as power_user_addresses from metrics group by project_id ) @@ -83,7 +88,8 @@ select pivot_metrics.monthly_active_addresses, pivot_metrics.trusted_monthly_active_users, pivot_metrics.recurring_addresses, - pivot_metrics.trusted_recurring_users + pivot_metrics.trusted_recurring_users, + pivot_metrics.power_user_addresses from pivot_metrics left join {{ ref('projects_v1') }} on pivot_metrics.project_id = projects_v1.project_id diff --git a/warehouse/dbt/models/marts/superchain/rf4_impact_metrics_by_project__schema.yml b/warehouse/dbt/models/marts/superchain/rf4_impact_metrics_by_project__schema.yml index 393dc4f35..3aaddde5d 100644 --- a/warehouse/dbt/models/marts/superchain/rf4_impact_metrics_by_project__schema.yml +++ b/warehouse/dbt/models/marts/superchain/rf4_impact_metrics_by_project__schema.yml @@ -32,7 +32,7 @@ models: **Event Type**: A label for the type of event that has been aggregated. Examples include: `CONTRACT_INVOCATION_DAILY_COUNT`, `CONTRACT_INVOCATION_SUCCESS_DAILY_COUNT`, `CONTRACT_INVOCATION_DAILY_L2_GAS_USED`. - name: amount description: > - **Amount**: The amount or value associated with teh event. This is a count or sum depending on the event type. + **Amount**: The amount or value associated with the event. This is a count or sum depending on the event type. - name: trusted_user_id description: > **Trusted User ID**: A unique identifier for a trusted user (generated by OSO). If the `from_artifact_name` is not a trusted user, this field will be null. @@ -106,3 +106,8 @@ models: **Trusted Recurring Users**: Count of trusted users who have interacted with the project in at least 3 separate months over the RF4 scope period (October 2023 - June 2024). Many crypto natives are curious to try out new protocols. But churn and user retention are major issues. Recurring users represent the most loyal and committed segment of a project's user base. This metric considers users who have interacted with a project over the course of at least three distinct calendar months during the RF4 scope period. Thus, it is intended to reflect sustained interest and ongoing engagement over time. A high count of recurring users signals strong project loyalty and a good user experience, and helps separate the fads from the future. + - name: power_user_addresses + description: > + **Power User Addresses**: Count of 'power user' addresses that have interacted with the project over the RF4 scope period (October 2023 - June 2024). + + This metric reflects the degree which a project has attracted attention from the most active and engaged users on the Superchain. A `power user` is defined as an address that has made at least 100 transactions, across at least 10 different projects, on at least 30 days, over the RF4 scope period. A project is counted by this metric if has at least one interaction from a power user. Power users are critical early adopters for the ecosystem. diff --git a/warehouse/dbt/models/marts/superchain/rf4_project_verification.sql b/warehouse/dbt/models/marts/superchain/rf4_project_verification.sql index e8728fdfe..0ac2f649b 100644 --- a/warehouse/dbt/models/marts/superchain/rf4_project_verification.sql +++ b/warehouse/dbt/models/marts/superchain/rf4_project_verification.sql @@ -5,10 +5,23 @@ - Review thresholds for unique_addresses, date_first_transaction, and days_with_onchain_activity_in_range. - Filter on contracts that are linked to Retro Funding applications (from Agora data) - - Integrate with repo_stats_by_project to check licensing and other repo requirements. #} -with project_stats as ( +with repo_stats as ( + select + project_id, + ARRAY_AGG(repo_name) as eligible_repos + from ( + select + project_id, + CONCAT(artifact_namespace, '/', artifact_name) as repo_name + from {{ ref('rf4_repo_stats_by_project') }} + where approval_status = 'approved' + ) + group by project_id +), + +onchain_stats as ( select project_id, project_name, @@ -23,23 +36,38 @@ with project_stats as ( from {{ ref('rf4_events_daily_to_project') }} where event_type = 'CONTRACT_INVOCATION_SUCCESS_DAILY_COUNT' - and bucket_day >= '2024-01-01' + and bucket_day >= '2023-10-01' group by project_id, project_name ), -tests as ( +project_stats as ( + select + onchain_stats.*, + COALESCE(repo_stats.eligible_repos, array[]) as eligible_repos + from onchain_stats + left join repo_stats + on onchain_stats.project_id = repo_stats.project_id + left join {{ ref('projects_by_collection_v1') }} + on onchain_stats.project_id = projects_by_collection_v1.project_id + where + projects_by_collection_v1.collection_name = 'op-onchain' +), + +checks as ( select project_id, project_name, + eligible_repos, unique_addresses, date_first_transaction, days_with_onchain_activity_in_range, - unique_addresses >= 420 as test_unique_addresses, - date_first_transaction < '2024-03-01' as test_date_first_transaction, + ARRAY_LENGTH(eligible_repos) >= 1 as check_eligible_repos, + unique_addresses >= 420 as check_unique_addresses, + date_first_transaction < '2024-03-01' as check_date_first_transaction, days_with_onchain_activity_in_range >= 10 - as test_days_with_onchain_activity_in_range + as check_days_with_onchain_activity_in_range from project_stats ) @@ -49,9 +77,14 @@ select unique_addresses, date_first_transaction, days_with_onchain_activity_in_range, - test_unique_addresses, - test_date_first_transaction, - test_days_with_onchain_activity_in_range, - test_unique_addresses and test_date_first_transaction - and test_days_with_onchain_activity_in_range as test_all -from tests + check_eligible_repos, + check_unique_addresses, + check_date_first_transaction, + check_days_with_onchain_activity_in_range, + ( + check_eligible_repos + and check_unique_addresses + and check_date_first_transaction + and check_days_with_onchain_activity_in_range + ) as meets_all_requirements +from checks diff --git a/warehouse/dbt/models/marts/superchain/rf4_repo_stats_by_project.sql b/warehouse/dbt/models/marts/superchain/rf4_repo_stats_by_project.sql index 12f3b7685..66baee2fe 100644 --- a/warehouse/dbt/models/marts/superchain/rf4_repo_stats_by_project.sql +++ b/warehouse/dbt/models/marts/superchain/rf4_repo_stats_by_project.sql @@ -1,42 +1,68 @@ +{# TODO: Review licenses https://spdx.org/licenses/ for OSI Approved #} +{# TODO: update with actual collection for RF4 #} +with repo_snapshot as ( + select + project_id, + artifact_namespace, + artifact_name, + is_fork, + fork_count, + star_count, + first_commit_time, + last_commit_time, + days_with_commits_count, + commit_count, + language, + license_spdx_id, + case + when license_spdx_id in ( + 'MIT', 'MIT-0', 'Apache-2.0', 'Unlicense', + 'BSD-2-Clause', 'BSD-3-Clause', 'BSD-3-Clause-Clear', + 'AGPL-3.0', 'GPL-3.0', 'LGPL-3.0', 'GPL-2.0', 'MPL-2.0', 'LGPL-2.1', + 'OFL-1.1', 'EPL-1.0', 'EPL-2.0', 'OFL-1.1', 'EUPL-1.2', 'OSL-3.0', + 'ISC', '0BSD', 'NCSA', 'Zlib' + ) then 'Permissive' + when license_spdx_id in ( + 'BSD-4-Clause', 'WTFPL', + 'CC0-1.0', 'CC-BY-SA-4.0', 'CC-BY-4.0' + ) then 'Restrictive' + when license_spdx_id = 'NOASSERTION' + then 'Custom' + else 'Unspecified' + end as license_type + from {{ ref('int_repo_metrics_by_project') }} +) + select - int_repo_metrics_by_project.project_id, - int_repo_metrics_by_project.artifact_id, + repo_snapshot.project_id, projects_v1.project_name, - int_repo_metrics_by_project.artifact_namespace, - int_repo_metrics_by_project.artifact_name, - int_repo_metrics_by_project.is_fork, - int_repo_metrics_by_project.fork_count, - int_repo_metrics_by_project.star_count, - --int_repo_metrics_by_project.first_commit_time, - --int_repo_metrics_by_project.last_commit_time, - --int_repo_metrics_by_project.days_with_commits_count, - --int_repo_metrics_by_project.contributors_to_repo_count, - int_repo_metrics_by_project.language, - int_repo_metrics_by_project.license_spdx_id, + repo_snapshot.artifact_namespace, + repo_snapshot.artifact_name, + repo_snapshot.is_fork, + repo_snapshot.fork_count, + repo_snapshot.star_count, + repo_snapshot.first_commit_time, + repo_snapshot.last_commit_time, + repo_snapshot.days_with_commits_count, + repo_snapshot.commit_count, + repo_snapshot.language, + repo_snapshot.license_spdx_id, + repo_snapshot.license_type, case - {# TODO: Review licenses https://spdx.org/licenses/ for OSI Approved #} - when int_repo_metrics_by_project.license_spdx_id in ( - 'MIT', 'MIT-0', 'Apache-2.0', 'Unlicense', - 'BSD-2-Clause', 'BSD-3-Clause', 'BSD-3-Clause-Clear', - 'AGPL-3.0', 'GPL-3.0', 'LGPL-3.0', 'GPL-2.0', 'MPL-2.0', 'LGPL-2.1', - 'OFL-1.1', 'EPL-1.0', 'EPL-2.0', 'OFL-1.1', 'EUPL-1.2', 'OSL-3.0', - 'ISC', '0BSD', 'NCSA', 'Zlib' - ) then 'Permissive' - when int_repo_metrics_by_project.license_spdx_id in ( - 'BSD-4-Clause', 'WTFPL', - 'CC0-1.0', 'CC-BY-SA-4.0', 'CC-BY-4.0' - ) then 'Restrictive' - when int_repo_metrics_by_project.license_spdx_id = 'NOASSERTION' - then 'Custom' - else 'Unspecified' - end as license_type -from {{ ref('int_repo_metrics_by_project') }} + when ( + repo_snapshot.commit_count >= 10 + and repo_snapshot.days_with_commits_count >= 3 + and repo_snapshot.first_commit_time < '2024-05-01' + and repo_snapshot.star_count >= 10 + and repo_snapshot.language in ('Solidity', 'JavaScript', 'TypeScript') + ) then 'approved' + else 'review' + end as approval_status +from repo_snapshot left join {{ ref('projects_v1') }} - on int_repo_metrics_by_project.project_id = projects_v1.project_id + on repo_snapshot.project_id = projects_v1.project_id left join {{ ref('projects_by_collection_v1') }} - on - int_repo_metrics_by_project.project_id - = projects_by_collection_v1.project_id + on repo_snapshot.project_id = projects_by_collection_v1.project_id where - {# TODO: update with actual collection for RF4 #} projects_by_collection_v1.collection_name = 'op-onchain' + and repo_snapshot.license_type != 'Unspecified' diff --git a/warehouse/dbt/models/marts/superchain/rf4_trusted_users.sql b/warehouse/dbt/models/marts/superchain/rf4_trusted_users.sql index a1890934c..5c4473d57 100644 --- a/warehouse/dbt/models/marts/superchain/rf4_trusted_users.sql +++ b/warehouse/dbt/models/marts/superchain/rf4_trusted_users.sql @@ -1,41 +1,121 @@ -with eigentrust_top_users as ( +with farcaster_users as ( + select + fid as farcaster_id, + address, + CAST( + fid < '20939' + as int64 + ) as farcaster_prepermissionless + from {{ ref('stg_farcaster__addresses') }} +), + +eigentrust_top_users as ( {# draft model for testing #} - select farcaster_id + select + 1 as eigentrust_verification, + CAST(farcaster_id as string) as farcaster_id from {{ ref('stg_karma3__globaltrust') }} where - snapshot_time = '2024-05-01' + snapshot_time = '2024-05-21' and strategy_id = 1 order by eigentrust_rank desc limit 50000 ), -user_model as ( +web_of_trust as ( + {# draft model for testing #} select - artifacts_by_user.user_id, - artifacts_by_user.user_source, - artifacts_by_user.user_source_id, - artifacts_by_user.artifact_name, - CAST( - eigentrust_top_users.farcaster_id - is not null as bool - ) as eigentrust_verification, + 1 as vitalik_verification, + CAST(fof_id as string) as farcaster_id + from ( + select + l2.peer_farcaster_id as fof_id, + COUNT(distinct l1.peer_farcaster_id) as edge_count + from {{ ref('stg_karma3__localtrust') }} as l1 + left join {{ ref('stg_karma3__localtrust') }} as l2 + on l1.peer_farcaster_id = l2.farcaster_id + where + l1.farcaster_id = 5650 + and l1.strategy_id = 1 + and l2.strategy_id = 1 + group by l2.peer_farcaster_id + ) + where edge_count > 1 +), + +optimist_nft_holders as ( + select + optimist_address as address, + 1 as optimist_nft_verification + from {{ source("static_data_sources", "optimist_nft_holders") }} +), + +passport_scores as ( + select + passport_address as address, + 1 as passport_user, CAST( - passport_scores.evidence_rawscore - >= passport_scores.evidence_threshold as bool + COALESCE(evidence_rawscore >= evidence_threshold, false) as int64 ) as passport_verification - from {{ ref('int_artifacts_by_user') }} as artifacts_by_user - left join {{ ref('stg_passport__scores') }} as passport_scores - on artifacts_by_user.artifact_name = passport_scores.passport_address + from {{ ref('stg_passport__scores') }} +), + +all_addresses as ( + select distinct address + from ( + select address from farcaster_users + union all + select address from passport_scores + union all + select address from optimist_nft_holders + ) +), + +trusted_user_model as ( + select + all_addresses.address, + CAST(farcaster_users.farcaster_id is not null as int64) + as farcaster_user, + COALESCE(farcaster_users.farcaster_prepermissionless, 0) + as farcaster_prepermissionless, + COALESCE(eigentrust_top_users.eigentrust_verification, 0) + as eigentrust_verification, + COALESCE(web_of_trust.vitalik_verification, 0) + as vitalik_verification, + COALESCE(passport_scores.passport_user, 0) + as passport_user, + COALESCE(passport_scores.passport_verification, 0) + as passport_verification, + COALESCE(optimist_nft_holders.optimist_nft_verification, 0) + as optimist_nft_verification + from all_addresses + left join farcaster_users + on all_addresses.address = farcaster_users.address left join eigentrust_top_users - on artifacts_by_user.user_source_id = eigentrust_top_users.farcaster_id + on farcaster_users.farcaster_id = eigentrust_top_users.farcaster_id + left join web_of_trust + on farcaster_users.farcaster_id = web_of_trust.farcaster_id + left join passport_scores + on all_addresses.address = passport_scores.address + left join optimist_nft_holders + on all_addresses.address = optimist_nft_holders.address ) select - user_id, - user_source, - user_source_id, - artifact_name -from user_model -where - passport_verification is true - or eigentrust_verification is true + address, + farcaster_user, + farcaster_prepermissionless, + eigentrust_verification, + vitalik_verification, + passport_user, + passport_verification, + optimist_nft_verification, + ( + farcaster_user + + farcaster_prepermissionless + + eigentrust_verification + + vitalik_verification + + passport_verification + + optimist_nft_verification + ) > 1 as is_trusted_user +from trusted_user_model diff --git a/warehouse/dbt/models/playground_sources.yml b/warehouse/dbt/models/playground_sources.yml index 5bbfab14e..d0a6f38cf 100644 --- a/warehouse/dbt/models/playground_sources.yml +++ b/warehouse/dbt/models/playground_sources.yml @@ -1,5 +1,9 @@ sources: - name: playground + # +enabled: | + # {%- if target.name in ['playground', 'dev'] -%} true + # {%- else -%} false + # {%- endif -%} database: opensource-observer diff --git a/warehouse/dbt/models/staging/karma3/stg_karma3__globaltrust.sql b/warehouse/dbt/models/staging/karma3/stg_karma3__globaltrust.sql index 822538aa1..442f50bb0 100644 --- a/warehouse/dbt/models/staging/karma3/stg_karma3__globaltrust.sql +++ b/warehouse/dbt/models/staging/karma3/stg_karma3__globaltrust.sql @@ -3,9 +3,20 @@ for Farcaster IDs #} +{{ + config( + materialized='table', + partition_by={ + "field": "snapshot_time", + "data_type": "timestamp", + "granularity": "day" + } + ) +}} + select strategy_id, - CAST(i as string) as farcaster_id, + i as farcaster_id, CAST(v as numeric) as eigentrust_rank, CAST(date as timestamp) as snapshot_time from {{ source("karma3", "globaltrust") }} diff --git a/warehouse/dbt/models/staging/karma3/stg_karma3__localtrust.sql b/warehouse/dbt/models/staging/karma3/stg_karma3__localtrust.sql index 5f3c5aa6f..33900d1ee 100644 --- a/warehouse/dbt/models/staging/karma3/stg_karma3__localtrust.sql +++ b/warehouse/dbt/models/staging/karma3/stg_karma3__localtrust.sql @@ -1,11 +1,27 @@ {# Get all Karma3 EigenTrust scores - for Farcaster IDs + for Farcaster IDs - Local Trust #} +{{ + config( + materialized='table', + partition_by={ + "field": "farcaster_id", + "data_type": "int64", + "range": { + "start": 0, + "end": 1000000, + "interval": 25000 + } + } + ) +}} + select strategy_id, - CAST(i as string) as farcaster_id, + i as farcaster_id, + j as peer_farcaster_id, CAST(v as numeric) as eigentrust_rank, CAST(date as timestamp) as snapshot_time from {{ source("karma3", "localtrust") }} diff --git a/warehouse/dbt/models/static_data_sources.yml b/warehouse/dbt/models/static_data_sources.yml new file mode 100644 index 000000000..b82b29562 --- /dev/null +++ b/warehouse/dbt/models/static_data_sources.yml @@ -0,0 +1,7 @@ +sources: + - name: static_data_sources + database: opensource-observer + schema: static_data_sources + tables: + - name: optimist_nft_holders + identifier: optimist_nft_holders \ No newline at end of file diff --git a/warehouse/dbt/models/superchain_sources.yml b/warehouse/dbt/models/superchain_sources.yml index fb9d9f4e2..ae78b3cb4 100644 --- a/warehouse/dbt/models/superchain_sources.yml +++ b/warehouse/dbt/models/superchain_sources.yml @@ -5,3 +5,6 @@ sources: tables: - name: optimism_traces identifier: optimism_traces + meta: + dagster: + asset_key: ["optimism", "traces"] diff --git a/warehouse/oso_dagster/assets.py b/warehouse/oso_dagster/assets.py index 22150f67f..bb9da8367 100644 --- a/warehouse/oso_dagster/assets.py +++ b/warehouse/oso_dagster/assets.py @@ -1,10 +1,10 @@ import os -from typing import Any, Mapping -from dagster import AssetExecutionContext, AssetKey, asset +from typing import Any, Mapping, Dict, List +from dagster import AssetExecutionContext, AssetKey, asset, AssetsDefinition from dagster_dbt import DbtCliResource, dbt_assets, DagsterDbtTranslator from google.cloud.bigquery.schema import SchemaField -from .constants import main_dbt_manifest_path +from .constants import main_dbt_manifests, main_dbt_project_dir from .goldsky import ( GoldskyConfig, goldsky_asset, @@ -13,19 +13,77 @@ class CustomDagsterDbtTranslator(DagsterDbtTranslator): - def __init__(self, prefix: str): + def __init__( + self, + prefix: str, + internal_schema_map: Dict[str, str], + ): self._prefix = prefix + self._internal_schema_map = internal_schema_map def get_asset_key(self, dbt_resource_props: Mapping[str, Any]) -> AssetKey: - return super().get_asset_key(dbt_resource_props).with_prefix(self._prefix) + asset_key = super().get_asset_key(dbt_resource_props) + final_key = asset_key.with_prefix(self._prefix) + # This is a temporary hack to get ossd as a top level item in production + if ( + dbt_resource_props.get("source_name", "") == "ossd" + and dbt_resource_props["schema"] == "oso" + and dbt_resource_props.get("identifier", "").endswith("_ossd") + ): + return asset_key + if dbt_resource_props["resource_type"] == "source": + schema = dbt_resource_props["schema"] + if schema in self._internal_schema_map: + new_key = self._internal_schema_map[schema][:] + new_key.append(dbt_resource_props["identifier"]) + final_key = AssetKey(new_key) + else: + final_key = asset_key + return final_key -@dbt_assets( - manifest=main_dbt_manifest_path, - dagster_dbt_translator=CustomDagsterDbtTranslator("main"), +def dbt_assets_from_manifests_map( + project_dir: str, + manifests: Dict[str, str], + internal_map: Dict[str, List[str]] = None, +) -> List[AssetsDefinition]: + if not internal_map: + internal_map = {} + assets: List[AssetsDefinition] = [] + for target, manifest_path in manifests.items(): + + translator = CustomDagsterDbtTranslator(["dbt", target], internal_map) + + @dbt_assets( + name=f"{target}_dbt", + manifest=manifest_path, + dagster_dbt_translator=translator, + ) + def _generated_dbt_assets(context: AssetExecutionContext, **kwargs): + dbt = DbtCliResource(project_dir=os.fspath(project_dir), target=target) + yield from dbt.cli(["build"], context=context).stream() + + assets.append(_generated_dbt_assets) + + return assets + + +# @dbt_assets( +# manifest=production_dbt_manifest_path, +# dagster_dbt_translator=CustomDagsterDbtTranslator("oso"), +# ) +# def production_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): +# yield from main_dbt.cli(["build"], context=context).stream() + +all_dbt_assets = dbt_assets_from_manifests_map( + main_dbt_project_dir, + main_dbt_manifests, + { + "oso": ["dbt", "production"], + "oso_base_playground": ["dbt", "base_playground"], + "oso_playground": ["dbt", "playground"], + }, ) -def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): - yield from main_dbt.cli(["build"], context=context).stream() # @dbt_assets( @@ -37,8 +95,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): base_blocks = goldsky_asset( - "base_blocks", GoldskyConfig( + key_prefix="base", + name="blocks", source_name="base-blocks", project_id="opensource-observer", destination_table_name="base_blocks", @@ -51,8 +110,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) base_transactions = goldsky_asset( - "base_transactions", GoldskyConfig( + key_prefix="base", + name="transactions", source_name="base-enriched_transactions", project_id="opensource-observer", destination_table_name="base_transactions", @@ -66,8 +126,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) base_traces = goldsky_asset( - "base_traces", GoldskyConfig( + key_prefix="base", + name="traces", source_name="base-traces", project_id="opensource-observer", destination_table_name="base_traces", @@ -81,8 +142,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) frax_blocks = goldsky_asset( - "frax_blocks", GoldskyConfig( + key_prefix="frax", + name="blocks", source_name="frax-blocks", project_id="opensource-observer", destination_table_name="frax_blocks", @@ -96,8 +158,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) frax_transactions = goldsky_asset( - "frax_transactions", GoldskyConfig( + key_prefix="frax", + name="transactions", source_name="frax-receipt_transactions", project_id="opensource-observer", destination_table_name="frax_transactions", @@ -112,8 +175,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) frax_traces = goldsky_asset( - "frax_traces", GoldskyConfig( + key_prefix="frax", + name="traces", source_name="frax-traces", project_id="opensource-observer", destination_table_name="frax_traces", @@ -127,8 +191,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) mode_blocks = goldsky_asset( - "mode_blocks", GoldskyConfig( + key_prefix="mode", + name="blocks", source_name="mode-blocks", project_id="opensource-observer", destination_table_name="mode_blocks", @@ -142,8 +207,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) mode_transactions = goldsky_asset( - "mode_transactions", GoldskyConfig( + key_prefix="mode", + name="transactions", source_name="mode-receipt_transactions", project_id="opensource-observer", destination_table_name="mode_transactions", @@ -158,8 +224,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) mode_traces = goldsky_asset( - "mode_traces", GoldskyConfig( + key_prefix="mode", + name="traces", source_name="mode-traces", project_id="opensource-observer", destination_table_name="mode_traces", @@ -173,8 +240,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) optimism_traces = goldsky_asset( - "optimism_traces", GoldskyConfig( + key_prefix="optimism", + name="traces", source_name="optimism-traces", project_id="opensource-observer", destination_table_name="optimism_traces", @@ -188,8 +256,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) pgn_blocks = goldsky_asset( - "pgn_blocks", GoldskyConfig( + key_prefix="pgn", + name="blocks", source_name="pgn-blocks", project_id="opensource-observer", destination_table_name="pgn_blocks", @@ -203,8 +272,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) pgn_transactions = goldsky_asset( - "pgn_transactions", GoldskyConfig( + key_prefix="pgn", + name="transactions", source_name="pgn-enriched_transactions", project_id="opensource-observer", destination_table_name="pgn_transactions", @@ -219,8 +289,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) pgn_traces = goldsky_asset( - "pgn_traces", GoldskyConfig( + key_prefix="pgn", + name="traces", source_name="pgn-traces", project_id="opensource-observer", destination_table_name="pgn_traces", @@ -234,8 +305,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) zora_blocks = goldsky_asset( - "zora_blocks", GoldskyConfig( + key_prefix="zora", + name="blocks", source_name="zora-blocks", project_id="opensource-observer", destination_table_name="zora_blocks", @@ -249,8 +321,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) zora_transactions = goldsky_asset( - "zora_transactions", GoldskyConfig( + key_prefix="zora", + name="transactions", source_name="zora-enriched_transactions", project_id="opensource-observer", destination_table_name="zora_transactions", @@ -265,8 +338,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) zora_traces = goldsky_asset( - "zora_traces", GoldskyConfig( + key_prefix="zora", + name="traces", source_name="zora-traces", project_id="opensource-observer", destination_table_name="zora_traces", @@ -281,8 +355,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): karma3_globaltrust = interval_gcs_import_asset( - "karma3_globaltrust", IntervalGCSAsset( + key_prefix="karma3", + name="globaltrust", project_id="opensource-observer", bucket_name="oso-dataset-transfer-bucket", path_base="openrank", @@ -297,8 +372,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) karma3_globaltrust_config = interval_gcs_import_asset( - "karma3_globaltrust_config", IntervalGCSAsset( + key_prefix="karma3", + name="globaltrust_config", project_id="opensource-observer", bucket_name="oso-dataset-transfer-bucket", path_base="openrank", @@ -313,8 +389,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) karma3_localtrust = interval_gcs_import_asset( - "karma3_localtrust", IntervalGCSAsset( + key_prefix="karma3", + name="localtrust", project_id="opensource-observer", bucket_name="oso-dataset-transfer-bucket", path_base="openrank", @@ -329,8 +406,9 @@ def main_dbt_assets(context: AssetExecutionContext, main_dbt: DbtCliResource): ) gitcoin_passport_scores = interval_gcs_import_asset( - "gitcoin_passport_scores", IntervalGCSAsset( + key_prefix="gitcoin", + name="passport_scores", project_id="opensource-observer", bucket_name="oso-dataset-transfer-bucket", path_base="passport", diff --git a/warehouse/oso_dagster/constants.py b/warehouse/oso_dagster/constants.py index 0f30a8f57..fe9eb45fb 100644 --- a/warehouse/oso_dagster/constants.py +++ b/warehouse/oso_dagster/constants.py @@ -1,11 +1,12 @@ import os from pathlib import Path +from typing import Dict, List +import pathlib import requests from dagster_dbt import DbtCliResource main_dbt_project_dir = Path(__file__).joinpath("..", "..", "..").resolve() -main_dbt = DbtCliResource(project_dir=os.fspath(main_dbt_project_dir)) # Leaving this for now as it allows a separate source related dbt model # source_dbt_project_dir = Path(__file__).joinpath("..", "..", "source_dbt").resolve() @@ -75,32 +76,38 @@ def generate_profile_and_auth(): ) -# If DAGSTER_DBT_PARSE_PROJECT_ON_LOAD is set, a manifest will be created at run time. -# Otherwise, we expect a manifest to be present in the project's target directory. -if os.getenv("DAGSTER_DBT_PARSE_PROJECT_ON_LOAD") or os.getenv( - "DAGSTER_DBT_GENERATE_AND_AUTH_GCP" -): - if os.getenv("DAGSTER_DBT_GENERATE_AND_AUTH_GCP"): - generate_profile_and_auth() - main_dbt_manifest_path = ( - main_dbt.cli( - ["--quiet", "parse"], - target_path=Path("target"), - ) - .wait() - .target_path.joinpath("manifest.json") - ) - # source_dbt_manifest_path = ( - # source_dbt.cli( - # ["--quiet", "parse"], - # target_path=Path("target"), - # ) - # .wait() - # .target_path.joinpath("manifest.json") - # ) - # print(f"THE PATH {source_dbt_manifest_path}") -else: - main_dbt_manifest_path = main_dbt_project_dir.joinpath("target", "manifest.json") - # source_dbt_manifest_path = source_dbt_project_dir.joinpath( - # "target", "manifest.json" - # ) +def load_dbt_manifests(targets: List[str]) -> Dict[str, str]: + manifests: Dict[str, str] = dict() + dbt_target_base_dir = os.getenv("DAGSTER_DBT_TARGET_BASE_DIR") + + # If DAGSTER_DBT_PARSE_PROJECT_ON_LOAD is set, a manifest will be created at + # run time. Otherwise, we error for now. Eventually the manifests should be + # generated during container creation so this doesn't need to run every time + # a docker container loads. + if os.getenv("DAGSTER_DBT_PARSE_PROJECT_ON_LOAD") or os.getenv( + "DAGSTER_DBT_GENERATE_AND_AUTH_GCP" + ): + if os.getenv("DAGSTER_DBT_GENERATE_AND_AUTH_GCP"): + generate_profile_and_auth() + for target in targets: + target_path = Path(dbt_target_base_dir, target) + # Ensure the dbt_target_base_dir exists + pathlib.Path(dbt_target_base_dir).mkdir(parents=True, exist_ok=True) + + dbt = DbtCliResource( + project_dir=os.fspath(main_dbt_project_dir), target=target + ) + manifests[target] = ( + dbt.cli( + ["--quiet", "parse"], + target_path=target_path, + ) + .wait() + .target_path.joinpath("manifest.json") + ) + else: + raise NotImplementedError("Currently we must generate dbt manifests") + return manifests + + +main_dbt_manifests = load_dbt_manifests(["production", "base_playground", "playground"]) diff --git a/warehouse/oso_dagster/definitions.py b/warehouse/oso_dagster/definitions.py index d52dbc91a..500145597 100644 --- a/warehouse/oso_dagster/definitions.py +++ b/warehouse/oso_dagster/definitions.py @@ -5,11 +5,12 @@ from dagster_dbt import DbtCliResource from dagster_gcp import BigQueryResource, GCSResource -from .constants import main_dbt_project_dir +from .constants import main_dbt_project_dir, main_dbt_manifests from .schedules import schedules from .cbt import CBTResource from .factories import load_assets_factories_from_modules from . import assets +from .assets import all_dbt_assets from dotenv import load_dotenv @@ -27,17 +28,22 @@ def load_definitions(): asset_factories = load_assets_factories_from_modules([assets]) asset_defs = load_assets_from_modules([assets]) + resources = { + "gcs": gcs, + "cbt": cbt, + "bigquery": bigquery, + } + for target in main_dbt_manifests: + resources[f"{target}_dbt"] = DbtCliResource( + project_dir=os.fspath(main_dbt_project_dir), target=target + ) + return Definitions( assets=asset_defs + asset_factories.assets, schedules=schedules, jobs=asset_factories.jobs, sensors=asset_factories.sensors, - resources={ - "main_dbt": DbtCliResource(project_dir=os.fspath(main_dbt_project_dir)), - "bigquery": bigquery, - "gcs": gcs, - "cbt": cbt, - }, + resources=resources, ) diff --git a/warehouse/oso_dagster/factories/gcs.py b/warehouse/oso_dagster/factories/gcs.py index 41d60cc72..c75d25ef8 100644 --- a/warehouse/oso_dagster/factories/gcs.py +++ b/warehouse/oso_dagster/factories/gcs.py @@ -1,11 +1,12 @@ import re from enum import Enum +from typing import Optional, Sequence +from dataclasses import dataclass, field import arrow from google.api_core.exceptions import NotFound from google.cloud.bigquery.job import CopyJobConfig import pandas as pd -from dataclasses import dataclass from dagster import ( asset, asset_sensor, @@ -40,6 +41,8 @@ class SourceMode(Enum): @dataclass(kw_only=True) class BaseGCSAsset: + name: str + key_prefix: Optional[str | Sequence[str]] = "" project_id: str bucket_name: str path_base: str @@ -48,6 +51,7 @@ class BaseGCSAsset: raw_dataset_name: str clean_dataset_name: str format: str = "CSV" + asset_kwargs: dict = field(default_factory=lambda: {}) @dataclass(kw_only=True) @@ -61,11 +65,11 @@ def parse_interval_prefix(interval: Interval, prefix: str) -> arrow.Arrow: return arrow.get(prefix, "YYYYMMDD") -def interval_gcs_import_asset(key: str, config: IntervalGCSAsset, **kwargs): +def interval_gcs_import_asset(config: IntervalGCSAsset): # Find all of the "intervals" in the bucket and load them into the `raw_sources` dataset # Run these sources through a secondary dbt model into `clean_sources` - @asset(key=key, **kwargs) + @asset(name=config.name, key_prefix=config.key_prefix, **config.asset_kwargs) def gcs_asset( context: AssetExecutionContext, bigquery: BigQueryResource, gcs: GCSResource ) -> MaterializeResult: @@ -192,18 +196,18 @@ def gcs_asset( } ) - @op(name=f"{key}_clean_up_op") + @op(name=f"{config.name}_clean_up_op") def gcs_clean_up_op(context: OpExecutionContext, config: dict): context.log.info(f"Running clean up for {key}") print(config) - @job(name=f"{key}_clean_up_job") + @job(name=f"{config.name}_clean_up_job") def gcs_clean_up_job(): gcs_clean_up_op() @asset_sensor( asset_key=gcs_asset.key, - name=f"{key}_clean_up_sensor", + name=f"{config.name}_clean_up_sensor", job=gcs_clean_up_job, default_status=DefaultSensorStatus.RUNNING, ) @@ -214,7 +218,11 @@ def gcs_clean_up_sensor( yield RunRequest( run_key=context.cursor, run_config=RunConfig( - ops={f"{key}_clean_up_op": {"config": {"asset_event": asset_event}}} + ops={ + f"{config.name}_clean_up_op": { + "config": {"asset_event": asset_event} + } + } ), ) diff --git a/warehouse/oso_dagster/goldsky.py b/warehouse/oso_dagster/goldsky.py index 78c7a1cd4..b977f4ec0 100644 --- a/warehouse/oso_dagster/goldsky.py +++ b/warehouse/oso_dagster/goldsky.py @@ -12,7 +12,7 @@ from dask.distributed import get_worker from dask_kubernetes.operator import make_cluster_spec from dataclasses import dataclass, field -from typing import List, Mapping, Tuple, Callable +from typing import List, Mapping, Tuple, Callable, Optional, Sequence import heapq from dagster import asset, AssetExecutionContext from dagster_gcp import BigQueryResource, GCSResource @@ -38,6 +38,8 @@ @dataclass(kw_only=True) class GoldskyConfig: # This is the name of the asset within the goldsky directory path in gcs + name: str + key_prefix: Optional[str | Sequence[str]] = "" project_id: str source_name: str destination_table_name: str @@ -615,8 +617,8 @@ def blocking_update_pointer_table( context.log.info(rows) -def goldsky_asset(name: str, config: GoldskyConfig) -> AssetFactoryResponse: - @asset(name=name) +def goldsky_asset(config: GoldskyConfig) -> AssetFactoryResponse: + @asset(name=config.name, key_prefix=config.key_prefix) def generated_asset( context: AssetExecutionContext, bigquery: BigQueryResource, diff --git a/warehouse/oso_dagster/schedules.py b/warehouse/oso_dagster/schedules.py index 5b1a12209..dbbfd0e30 100644 --- a/warehouse/oso_dagster/schedules.py +++ b/warehouse/oso_dagster/schedules.py @@ -4,8 +4,6 @@ from dagster_dbt import build_schedule_from_dbt_selection -from .assets import main_dbt_assets - schedules = [ # build_schedule_from_dbt_selection( # [opensource_observer_dbt_assets],