diff --git a/.github/scripts/publish-docker-containers.sh b/.github/scripts/publish-docker-containers.sh new file mode 100644 index 000000000..740b127ef --- /dev/null +++ b/.github/scripts/publish-docker-containers.sh @@ -0,0 +1,68 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "${SCRIPT_DIR}/../../" +REPO_DIR=$(pwd) + +# For now if a plugin has both a pyproject.toml and a package.json. The python +# will be used by nature of the order of docker image publishing +python_plugins="$(find ./warehouse/cloudquery-* -type f -name 'pyproject.toml' -exec sh -c 'dirname $0' {} \;)" +ts_plugins="$(find ./warehouse/cloudquery-* -type f -name 'package.json' -exec sh -c 'dirname $0' {} \;)" +tag="$(git rev-parse HEAD)" + +build_base_image() { + language="$1" + tag="$2" + base_image="ghcr.io/opensource-observer/${language}-base:${tag}" + dockerfile_path="./docker/cloudquery/${language}-base.Dockerfile" + docker build -t "${base_image}" -f "${dockerfile_path}" . + echo $base_image +} + +# Build the base images +py_base_image=$(build_base_image py $tag) +ts_base_image=$(build_base_image ts $tag) +prefix="cloudquery-" + +for path in $ts_plugins; do + plugin_name=$(basename $path) + # Remove the cloudquery prefix + plugin_name=${plugin_name#"$prefix"} + + plugin_image="ghcr.io/opensource-observer/cloudquery-${plugin_name}:${tag}" + + echo "Building ${plugin_name} plugin" + docker build -t ${plugin_image} \ + --build-arg PLUGIN_NAME=${plugin_name} \ + --build-arg BASE_IMAGE=${ts_base_image} \ + -f docker/cloudquery/ts.Dockerfile \ + . + echo "Publishing the plugin to ${plugin_image}" + docker push ${plugin_image} +done + +for path in $python_plugins; do + plugin_name=$(basename $path) + # Remove the cloudquery prefix + plugin_name=${plugin_name#"$prefix"} + + plugin_cmd=$(echo $plugin_name | sed "s/-/_/g") + plugin_image="ghcr.io/opensource-observer/cloudquery-${plugin_name}:${tag}" + + # Skip the example + if [[ $plugin_name = "example_plugin" ]]; then + continue + fi + echo "Building ${plugin_name} plugin" + + docker build -t ${plugin_image} \ + --build-arg PLUGIN_NAME=${plugin_name} \ + --build-arg PLUGIN_CMD=${plugin_cmd} \ + --build-arg BASE_IMAGE=${ts_base_image} \ + -f docker/cloudquery/py.Dockerfile \ + . + + echo "Publishing the plugin to ${plugin_image}" + docker push ${plugin_image} +done \ No newline at end of file diff --git a/.github/workflows/warehouse-publish-docker-containers.yml b/.github/workflows/warehouse-publish-docker-containers.yml new file mode 100644 index 000000000..a8b9b7720 --- /dev/null +++ b/.github/workflows/warehouse-publish-docker-containers.yml @@ -0,0 +1,40 @@ +name: warehouse-publish-cloudquery-plugins +env: + X_GITHUB_GRAPHQL_API: ${{ vars.X_GITHUB_GRAPHQL_API }} + X_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +# This workflow only runs when a commit is completed on main. +on: + # Allows you to run this workflow manually from the Actions tab + push: + branches: + - main + +jobs: + warehouse-publish-docker-containers: + name: warehouse-publish-docker-containers + environment: indexer + runs-on: ubuntu-latest + + permissions: + packages: write + + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: 'Login to GitHub Container Registry' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Package and publish cloudquery plugins + run: bash .github/scripts/publish-cloudquery-plugins.sh + + - name: Package and publish other docker containers + run: bash .github/scripts/publish-docker-containers-plugins.sh + \ No newline at end of file diff --git a/a.out b/a.out new file mode 100644 index 000000000..e69de29bb diff --git a/boop.txt b/boop.txt new file mode 100644 index 000000000..b1b3d95ff --- /dev/null +++ b/boop.txt @@ -0,0 +1,40 @@ +01:07:33 Running with dbt=1.7.9 +01:07:33 Registered adapter: bigquery=1.7.6 +01:07:33 Found 112 models, 33 sources, 0 exposures, 0 metrics, 463 macros, 0 groups, 0 semantic models +01:07:33 +01:07:34 Concurrency: 32 threads (target='production') +01:07:34 +01:07:34 Compiled node 'playground__ossd_collections' is: + + +with __dbt__cte__playground__project_filter as ( + + +SELECT * FROM UNNEST([ + "gitcoin", + "opensource-observer", + "uniswap", + "velodrome", + "ethereum-attestation-service", + "zora", + "libp2p", + "rabbit-hole", + "safe-global", + "aave" +]) as project_slug +), filtered_collections as ( + select distinct + collections.collection_name as `name`, + collections.sync_time as `sync_time` + from `opensource-observer`.`oso`.`stg_ossd__current_collections` as collections + cross join UNNEST(collections.projects) as project_name + inner join `opensource-observer`.`oso`.`stg_ossd__current_projects` as projects + on projects.project_name = project_name + where project_name IN (select * from __dbt__cte__playground__project_filter) +) + +select collections.* +from `opensource-observer`.`oso`.`collections_ossd` as collections +inner join filtered_collections as filtered + on filtered.name = collections.name + and collections._cq_sync_time = filtered.sync_time diff --git a/docker/cloudquery/py-base.Dockerfile b/docker/cloudquery/py-base.Dockerfile new file mode 100644 index 000000000..ed89157aa --- /dev/null +++ b/docker/cloudquery/py-base.Dockerfile @@ -0,0 +1,9 @@ +FROM python:3.12-bookworm + +RUN pip install poetry + +COPY . /usr/src/app + +WORKDIR /usr/src/app + +RUN poetry install diff --git a/docker/cloudquery/py.Dockerfile b/docker/cloudquery/py.Dockerfile new file mode 100644 index 000000000..d08df2762 --- /dev/null +++ b/docker/cloudquery/py.Dockerfile @@ -0,0 +1,7 @@ +ARG BASE_IMAGE=ghcr.io/opensource-observer/cloudquery-py-base:latest + +FROM ${BASE_IMAGE} + +ARG PLUGIN_NAME + +ENTRYPOINT [ "${PLUGIN_NAME}" ] \ No newline at end of file diff --git a/docker/cloudquery/ts-base.Dockerfile b/docker/cloudquery/ts-base.Dockerfile new file mode 100644 index 000000000..e5e477c08 --- /dev/null +++ b/docker/cloudquery/ts-base.Dockerfile @@ -0,0 +1,9 @@ +FROM node:20 as build + +RUN npm install -g pnpm@^9.0.0 + +COPY . /usr/src/app + +WORKDIR /usr/src/app + +RUN pnpm install && pnpm build:cloudquery \ No newline at end of file diff --git a/docker/cloudquery/ts.Dockerfile b/docker/cloudquery/ts.Dockerfile new file mode 100644 index 000000000..dcb4c4b46 --- /dev/null +++ b/docker/cloudquery/ts.Dockerfile @@ -0,0 +1,9 @@ +ARG BASE_IMAGE=ghcr.io/opensource-observer/cloudquery-ts-base:latest + +FROM ${BASE_IMAGE} + +ARG PLUGIN_NAME + +WORKDIR /usr/src/app/warehouse/cloudquery-${PLUGIN_NAME} + +ENTRYPOINT [ "pnpm", "node", "--loader", "ts-node/esm", "src/main.ts" ] \ No newline at end of file diff --git a/docker/images/cloudflare-tunnel/Dockerfile b/docker/images/cloudflare-tunnel/Dockerfile new file mode 100644 index 000000000..7ed0f4896 --- /dev/null +++ b/docker/images/cloudflare-tunnel/Dockerfile @@ -0,0 +1,6 @@ +FROM ghcr.io/strrl/cloudflare-tunnel-ingress-controller:latest as binary + +FROM alpine:3.19 +COPY --from=binary /usr/bin/cloudflare-tunnel-ingress-controller /usr/bin/cloudflare-tunnel-ingress-controller +COPY ./cf-tunnel-wrapper.sh /usr/bin/cf-tunnel-wrapper.sh + diff --git a/docker/images/cloudflare-tunnel/cf-tunnel-wrapper.sh b/docker/images/cloudflare-tunnel/cf-tunnel-wrapper.sh new file mode 100644 index 000000000..cdcdd7eee --- /dev/null +++ b/docker/images/cloudflare-tunnel/cf-tunnel-wrapper.sh @@ -0,0 +1,9 @@ +# This is to enable the use of kube-secrets-init +#!/bin/sh +cloudflare-tunnel-ingress-controller \ +--ingress-class=${INGRESS_CLASS} \ +--controller-class=${CONTROLLER_CLASS} \ +--cloudflare-api-token=${CLOUDFLARE_API_TOKEN} \ +--cloudflare-account-id=${CLOUDFLARE_ACCOUNT_ID} \ +--cloudflare-tunnel-name=${CLOUDFLARE_TUNNEL_NAME} \ +--namespace=${NAMESPACE} diff --git a/docker/images/dagster-dask.Dockerfile b/docker/images/dagster-dask.Dockerfile new file mode 100644 index 000000000..ee363fa25 --- /dev/null +++ b/docker/images/dagster-dask.Dockerfile @@ -0,0 +1,37 @@ +FROM ubuntu:jammy + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y python3.12 +RUN apt-get install -y curl && \ + curl -o get-pip.py https://bootstrap.pypa.io/get-pip.py && \ + python3.12 get-pip.py +RUN pip3.12 install poetry + + +RUN mkdir -p /usr/bin/app && \ + bash -c "mkdir -p /usr/bin/app/warehouse/{bq2cloudsql,oso_dagster,oso_lets_go,common}" && \ + touch /usr/bin/app/warehouse/bq2cloudsql/__init__.py && \ + touch /usr/bin/app/warehouse/bq2cloudsql/script.py && \ + touch /usr/bin/app/warehouse/oso_dagster/__init__.py && \ + touch /usr/bin/app/warehouse/oso_lets_go/__init__.py && \ + touch /usr/bin/app/warehouse/oso_lets_go/wizard.py && \ + touch /usr/bin/app/warehouse/common/__init__.py + +WORKDIR /usr/bin/app +COPY pyproject.toml poetry.lock /usr/bin/app/ +COPY warehouse/cloudquery-example-plugin /usr/bin/app/warehouse/cloudquery-example-plugin + +# Install everything onto the system path +RUN poetry config virtualenvs.create false && \ + poetry install + +RUN rm -r /usr/bin/app/warehouse + +COPY . /usr/bin/app + +RUN poetry config virtualenvs.create false && \ + poetry install \ No newline at end of file diff --git a/ops/helm-charts/oso-dagster/Chart.yaml b/ops/helm-charts/oso-dagster/Chart.yaml index 1a6b73156..c43899a1c 100644 --- a/ops/helm-charts/oso-dagster/Chart.yaml +++ b/ops/helm-charts/oso-dagster/Chart.yaml @@ -3,7 +3,7 @@ name: oso-dagster description: Extension of the dagster template type: application -version: 0.1.1 +version: 0.1.2 appVersion: "1.16.0" dependencies: - name: dagster diff --git a/ops/k8s-apps/production/custom-helm-values.yaml b/ops/k8s-apps/production/custom-helm-values.yaml index 372f46804..85c6c2f47 100644 --- a/ops/k8s-apps/production/custom-helm-values.yaml +++ b/ops/k8s-apps/production/custom-helm-values.yaml @@ -8,6 +8,4 @@ spec: secretPrefix: "gcp:secretmanager:production-dagster" dagster: global: - serviceAccountName: production-dagster - global: - serviceAccountName: production-dagster \ No newline at end of file + serviceAccountName: production-dagster \ No newline at end of file diff --git a/oso/dbtdag/__init__.py b/oso/dbtdag/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/oso/dbtdag/export.py b/oso/dbtdag/export.py new file mode 100644 index 000000000..351069909 --- /dev/null +++ b/oso/dbtdag/export.py @@ -0,0 +1,78 @@ +# Setup imports +import click +from dbt import flags +from dbt.cli.main import cli, global_flags +from dbt.cli import requires, params as p +from dbt.config.profile import read_user_config +from dbt.tracking import User, active_user +from dbt.task.list import ListTask +from dbt.graph.queue import GraphQueue + + +# We need to initialize a click command in order to parse the arguments. This +# seems to be the easiest method to get all the necessary arguments used to call +# the things we need to call +# +# WARNING: This is not a very stable set of code it seems. We are getting most +# of these from here (from the list() function in this code) +# https://github.com/dbt-labs/dbt-core/blob/e4fe839e4574187b574473596a471092267a9f2e/core/dbt/cli/main.py +# +@cli.command("export_list_task") +@click.pass_context +@global_flags +@p.exclude +@p.indirect_selection +@p.models +@p.output +@p.output_keys +@p.profile +@p.profiles_dir +@p.project_dir +@p.resource_type +@p.raw_select +@p.selector +@p.state +@p.defer_state +@p.deprecated_state +@p.target +@p.target_path +@p.vars +@requires.postflight +@requires.preflight +@requires.profile +@requires.project +@requires.runtime_config +@requires.manifest +def export_list_task(ctx, **kwargs): + print(ctx.obj["flags"]) + task = ListTask(ctx.obj["flags"], ctx.obj["runtime_config"], ctx.obj["manifest"]) + return task, True + + +def call_export_list_task(target, project_dir=None): + args = ["export_list_task", "--target", target] + + if project_dir: + args.append("--project-dir") + args.append(project_dir) + + ctx = cli.make_context( + cli.name, + ["export_list_task", "--target", target], + ) + + ctx.obj = {"manifest": None, "callbacks": []} + results, success = cli.invoke(ctx) + if not success: + raise Exception("invocation was not successful") + return results + + +def get_graph_queue_scores(target, project_dir=None): + task = call_export_list_task(target, project_dir) + + task.compile_manifest() + graph = task.graph.graph + + queue = GraphQueue(graph, task.manifest, ()) + return queue._get_scores(graph) diff --git a/test_all.sh b/test_all.sh new file mode 100644 index 000000000..dc880e198 --- /dev/null +++ b/test_all.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -euxo pipefail + +dagster asset materialize -m oso_dagster.definitions --select base_blocks +dagster asset materialize -m oso_dagster.definitions --select base_transactions +dagster asset materialize -m oso_dagster.definitions --select base_traces +dagster asset materialize -m oso_dagster.definitions --select frax_transactions +dagster asset materialize -m oso_dagster.definitions --select mode_transactions +dagster asset materialize -m oso_dagster.definitions --select pgn_transactions +dagster asset materialize -m oso_dagster.definitions --select frax_blocks +dagster asset materialize -m oso_dagster.definitions --select frax_traces +dagster asset materialize -m oso_dagster.definitions --select mode_blocks +dagster asset materialize -m oso_dagster.definitions --select mode_traces +dagster asset materialize -m oso_dagster.definitions --select pgn_blocks +dagster asset materialize -m oso_dagster.definitions --select pgn_traces +dagster asset materialize -m oso_dagster.definitions --select zora_blocks +dagster asset materialize -m oso_dagster.definitions --select zora_transactions +dagster asset materialize -m oso_dagster.definitions --select zora_traces diff --git a/warehouse/ansible-collection/integration/README.md b/warehouse/ansible-collection/integration/README.md new file mode 100644 index 000000000..90295e8bf --- /dev/null +++ b/warehouse/ansible-collection/integration/README.md @@ -0,0 +1,3 @@ +# Ansible Collection - opensource-observer.integration + +Tools for managing a direct datasore integration into opensource observer. diff --git a/warehouse/ansible-collection/integration/galaxy.yml b/warehouse/ansible-collection/integration/galaxy.yml new file mode 100644 index 000000000..537058b9c --- /dev/null +++ b/warehouse/ansible-collection/integration/galaxy.yml @@ -0,0 +1,75 @@ +### REQUIRED +# The namespace of the collection. This can be a company/brand/organization or product namespace under which all +# content lives. May only contain alphanumeric lowercase characters and underscores. Namespaces cannot start with +# underscores or numbers and cannot contain consecutive underscores +namespace: opensource-observer + +# The name of the collection. Has the same character restrictions as 'namespace' +name: integration + +# The version of the collection. Must be compatible with semantic versioning +version: 0.1.0 + +# The path to the Markdown (.md) readme file. This path is relative to the root of the collection +readme: README.md + +# A list of the collection's content authors. Can be just the name or in the format 'Full Name (url) +# @nicks:irc/im.site#channel' +authors: +- Kariba Labs + + +### OPTIONAL but strongly recommended +# A short summary description of the collection +description: A set of tools to do data integrations with your database + +# Either a single license or a list of licenses for content inside of a collection. Ansible Galaxy currently only +# accepts L(SPDX,https://spdx.org/licenses/) licenses. This key is mutually exclusive with 'license_file' +license: +- Apache-2.0 + +# The path to the license file for the collection. This path is relative to the root of the collection. This key is +# mutually exclusive with 'license' +license_file: 'LICENSE' + +# A list of tags you want to associate with the collection for indexing/searching. A tag name has the same character +# requirements as 'namespace' and 'name' +tags: [ + 'oso', + 'opensource-observer', + 'datascience' +] + +# Collections that this collection requires to be installed for it to be usable. The key of the dict is the +# collection label 'namespace.name'. The value is a version range +# L(specifiers,https://python-semanticversion.readthedocs.io/en/latest/#requirement-specification). Multiple version +# range specifiers can be set and are separated by ',' +dependencies: { + "community.postgresql": ">=2.9.0" +} + +# The URL of the originating SCM repository +repository: http://github.com/opensource-observer/oso + +# The URL to any online docs +documentation: http://docs.example.com + +# The URL to the homepage of the collection/project +homepage: http://example.com + +# The URL to the collection issue tracker +issues: http://example.com/issue/tracker + +# A list of file glob-like patterns used to filter any files or directories that should not be included in the build +# artifact. A pattern is matched from the relative path of the file or directory of the collection directory. This +# uses 'fnmatch' to match the files or directories. Some directories and files like 'galaxy.yml', '*.pyc', '*.retry', +# and '.git' are always filtered. Mutually exclusive with 'manifest' +build_ignore: [] + +# A dict controlling use of manifest directives used in building the collection artifact. The key 'directives' is a +# list of MANIFEST.in style +# L(directives,https://packaging.python.org/en/latest/guides/using-manifest-in/#manifest-in-commands). The key +# 'omit_default_directives' is a boolean that controls whether the default directives are used. Mutually exclusive +# with 'build_ignore' +# manifest: null + diff --git a/warehouse/ansible-collection/integration/meta/runtime.yml b/warehouse/ansible-collection/integration/meta/runtime.yml new file mode 100644 index 000000000..20f709edf --- /dev/null +++ b/warehouse/ansible-collection/integration/meta/runtime.yml @@ -0,0 +1,52 @@ +--- +# Collections must specify a minimum required ansible version to upload +# to galaxy +# requires_ansible: '>=2.9.10' + +# Content that Ansible needs to load from another location or that has +# been deprecated/removed +# plugin_routing: +# action: +# redirected_plugin_name: +# redirect: ns.col.new_location +# deprecated_plugin_name: +# deprecation: +# removal_version: "4.0.0" +# warning_text: | +# See the porting guide on how to update your playbook to +# use ns.col.another_plugin instead. +# removed_plugin_name: +# tombstone: +# removal_version: "2.0.0" +# warning_text: | +# See the porting guide on how to update your playbook to +# use ns.col.another_plugin instead. +# become: +# cache: +# callback: +# cliconf: +# connection: +# doc_fragments: +# filter: +# httpapi: +# inventory: +# lookup: +# module_utils: +# modules: +# netconf: +# shell: +# strategy: +# terminal: +# test: +# vars: + +# Python import statements that Ansible needs to load from another location +# import_redirection: +# ansible_collections.ns.col.plugins.module_utils.old_location: +# redirect: ansible_collections.ns.col.plugins.module_utils.new_location + +# Groups of actions/modules that take a common set of options +# action_groups: +# group_name: +# - module1 +# - module2 diff --git a/warehouse/ansible-collection/integration/playbooks/setup_postgres_replication.yml b/warehouse/ansible-collection/integration/playbooks/setup_postgres_replication.yml new file mode 100644 index 000000000..8b90cd1fa --- /dev/null +++ b/warehouse/ansible-collection/integration/playbooks/setup_postgres_replication.yml @@ -0,0 +1,10 @@ +# Sets up a postgres replication +- hosts: localhost + vars: + postgres_db: postgres + postgres_user: postgres + postgres_password: password + postgres_host: 127.0.0.1 + postgres_oso_user_password: password + roles: + - postgres \ No newline at end of file diff --git a/warehouse/ansible-collection/integration/plugins/README.md b/warehouse/ansible-collection/integration/plugins/README.md new file mode 100644 index 000000000..67a66d4cd --- /dev/null +++ b/warehouse/ansible-collection/integration/plugins/README.md @@ -0,0 +1,31 @@ +# Collections Plugins Directory + +This directory can be used to ship various plugins inside an Ansible collection. Each plugin is placed in a folder that +is named after the type of plugin it is in. It can also include the `module_utils` and `modules` directory that +would contain module utils and modules respectively. + +Here is an example directory of the majority of plugins currently supported by Ansible: + +``` +└── plugins + ├── action + ├── become + ├── cache + ├── callback + ├── cliconf + ├── connection + ├── filter + ├── httpapi + ├── inventory + ├── lookup + ├── module_utils + ├── modules + ├── netconf + ├── shell + ├── strategy + ├── terminal + ├── test + └── vars +``` + +A full list of plugin types can be found at [Working With Plugins](https://docs.ansible.com/ansible-core/2.16/plugins/plugins.html). diff --git a/warehouse/ansible-collection/integration/roles/postgres/README.md b/warehouse/ansible-collection/integration/roles/postgres/README.md new file mode 100644 index 000000000..b282b6504 --- /dev/null +++ b/warehouse/ansible-collection/integration/roles/postgres/README.md @@ -0,0 +1,31 @@ +# Role Name + +A brief description of the role goes here. + +## Requirements + +Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required. + +## Role Variables + +A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well. + +## Dependencies + +A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles. + +## Example Playbook + +Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: + + - hosts: servers + roles: + - { role: username.rolename, x: 42 } + +## License + +BSD + +## Author Information + +An optional section for the role authors to include contact information, or a website (HTML is not allowed). diff --git a/warehouse/ansible-collection/integration/roles/postgres/defaults/main.yml b/warehouse/ansible-collection/integration/roles/postgres/defaults/main.yml new file mode 100644 index 000000000..10302bc91 --- /dev/null +++ b/warehouse/ansible-collection/integration/roles/postgres/defaults/main.yml @@ -0,0 +1,8 @@ +--- +# defaults file for postgres +postgres_db: postgres +postgres_port: 5432 +postgres_oso_user_name: oso_replication +postgres_oso_user_expiration: infinity +postgres_oso_replication_slot_name: oso_replication_slot +postgres_oso_publication_name: oso_publication diff --git a/warehouse/ansible-collection/integration/roles/postgres/handlers/main.yml b/warehouse/ansible-collection/integration/roles/postgres/handlers/main.yml new file mode 100644 index 000000000..acc64a600 --- /dev/null +++ b/warehouse/ansible-collection/integration/roles/postgres/handlers/main.yml @@ -0,0 +1,2 @@ +--- +# handlers file for postgres diff --git a/warehouse/ansible-collection/integration/roles/postgres/meta/main.yml b/warehouse/ansible-collection/integration/roles/postgres/meta/main.yml new file mode 100644 index 000000000..c572acc9f --- /dev/null +++ b/warehouse/ansible-collection/integration/roles/postgres/meta/main.yml @@ -0,0 +1,52 @@ +galaxy_info: + author: your name + description: your role description + company: your company (optional) + + # If the issue tracker for your role is not on github, uncomment the + # next line and provide a value + # issue_tracker_url: http://example.com/issue/tracker + + # Choose a valid license ID from https://spdx.org - some suggested licenses: + # - BSD-3-Clause (default) + # - MIT + # - GPL-2.0-or-later + # - GPL-3.0-only + # - Apache-2.0 + # - CC-BY-4.0 + license: license (GPL-2.0-or-later, MIT, etc) + + min_ansible_version: 2.1 + + # If this a Container Enabled role, provide the minimum Ansible Container version. + # min_ansible_container_version: + + # + # Provide a list of supported platforms, and for each platform a list of versions. + # If you don't wish to enumerate all versions for a particular platform, use 'all'. + # To view available platforms and versions (or releases), visit: + # https://galaxy.ansible.com/api/v1/platforms/ + # + # platforms: + # - name: Fedora + # versions: + # - all + # - 25 + # - name: SomePlatform + # versions: + # - all + # - 1.0 + # - 7 + # - 99.99 + + galaxy_tags: [] + # List tags for your role here, one per line. A tag is a keyword that describes + # and categorizes the role. Users find roles by searching for tags. Be sure to + # remove the '[]' above, if you add tags to this list. + # + # NOTE: A tag is limited to a single word comprised of alphanumeric characters. + # Maximum 20 tags per role. + +dependencies: [] + # List your role dependencies here, one per line. Be sure to remove the '[]' above, + # if you add dependencies to this list. diff --git a/warehouse/ansible-collection/integration/roles/postgres/tasks/main.yml b/warehouse/ansible-collection/integration/roles/postgres/tasks/main.yml new file mode 100644 index 000000000..765d0bf3b --- /dev/null +++ b/warehouse/ansible-collection/integration/roles/postgres/tasks/main.yml @@ -0,0 +1,46 @@ +--- +- name: Set the wal_level to logical + community.postgresql.postgresql_set: + db: '{{ postgres_db }}' + login_host: '{{ postgres_host }}' + port: '{{ postgres_port }}' + login_user: '{{ postgres_user }}' + login_password: '{{ postgres_password }}' + name: wal_level + value: "logical" + +- name: Create a user for OSO + community.postgresql.postgresql_user: + db: '{{ postgres_db }}' + login_host: '{{ postgres_host }}' + port: '{{ postgres_port }}' + login_user: '{{ postgres_user }}' + login_password: '{{ postgres_password }}' + name: '{{ postgres_oso_user_name }}' + password: '{{ postgres_oso_user_password }}' + expires: '{{ postgres_oso_user_expiration }}' + role_attr_flags: 'REPLICATION' + state: present + +- name: Create a logical replication slot to the database + community.postgresql.postgresql_slot: + db: '{{ postgres_db }}' + login_host: '{{ postgres_host }}' + port: '{{ postgres_port }}' + login_user: '{{ postgres_user }}' + login_password: '{{ postgres_password }}' + name: '{{ postgres_oso_replication_slot_name }}' + slot_type: logical + state: present + output_plugin: pgoutput + +- name: Create a new publication for all the specified tables + community.postgresql.postgresql_publication: + db: '{{ postgres_db }}' + login_host: '{{ postgres_host }}' + port: '{{ postgres_port }}' + login_user: '{{ postgres_user }}' + login_password: '{{ postgres_password }}' + name: '{{ postgres_oso_publication_name }}' + tables: '{{ postgres_oso_publication_tables }}' + state: present diff --git a/warehouse/ansible-collection/integration/roles/postgres/tests/inventory b/warehouse/ansible-collection/integration/roles/postgres/tests/inventory new file mode 100644 index 000000000..878877b07 --- /dev/null +++ b/warehouse/ansible-collection/integration/roles/postgres/tests/inventory @@ -0,0 +1,2 @@ +localhost + diff --git a/warehouse/ansible-collection/integration/roles/postgres/tests/test.yml b/warehouse/ansible-collection/integration/roles/postgres/tests/test.yml new file mode 100644 index 000000000..728654a51 --- /dev/null +++ b/warehouse/ansible-collection/integration/roles/postgres/tests/test.yml @@ -0,0 +1,5 @@ +--- +- hosts: localhost + remote_user: root + roles: + - postgres diff --git a/warehouse/ansible-collection/integration/roles/postgres/vars/main.yml b/warehouse/ansible-collection/integration/roles/postgres/vars/main.yml new file mode 100644 index 000000000..e6e9d7c6d --- /dev/null +++ b/warehouse/ansible-collection/integration/roles/postgres/vars/main.yml @@ -0,0 +1,2 @@ +--- +# vars file for postgres diff --git a/warehouse/ansible-collection/integration/setup_postgres_replication.yml b/warehouse/ansible-collection/integration/setup_postgres_replication.yml new file mode 100644 index 000000000..6d5cde09d --- /dev/null +++ b/warehouse/ansible-collection/integration/setup_postgres_replication.yml @@ -0,0 +1,13 @@ +# Sets up a postgres replication +- hosts: localhost + vars: + postgres_db: postgres + postgres_user: postgres + postgres_password: postgres + postgres_host: 127.0.0.1 + postgres_oso_user_password: password + postgres_oso_publication_tables: + - foo + - bar + roles: + - postgres diff --git a/warehouse/playground_source/.cache/model_definition/0_95_23_15_49___db___sqlmesh_example___full_model__4157663659 b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49___db___sqlmesh_example___full_model__4157663659 new file mode 100644 index 000000000..1e4273c42 Binary files /dev/null and b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49___db___sqlmesh_example___full_model__4157663659 differ diff --git a/warehouse/playground_source/.cache/model_definition/0_95_23_15_49___db___sqlmesh_example___incremental_model__3228797744 b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49___db___sqlmesh_example___incremental_model__3228797744 new file mode 100644 index 000000000..5d9590a78 Binary files /dev/null and b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49___db___sqlmesh_example___incremental_model__3228797744 differ diff --git a/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715376661_6757677__3237680129__db b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715376661_6757677__3237680129__db new file mode 100644 index 000000000..018f28a71 Binary files /dev/null and b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715376661_6757677__3237680129__db differ diff --git a/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715376661_6757677__387428011__db b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715376661_6757677__387428011__db new file mode 100644 index 000000000..c5e5a0de3 Binary files /dev/null and b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715376661_6757677__387428011__db differ diff --git a/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715385339_6193233__3914696475__memory b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715385339_6193233__3914696475__memory new file mode 100644 index 000000000..e4e36b03f Binary files /dev/null and b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__full_model__1715385339_6193233__3914696475__memory differ diff --git a/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715376661_6757677__3237680129__db b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715376661_6757677__3237680129__db new file mode 100644 index 000000000..ea81276d5 Binary files /dev/null and b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715376661_6757677__3237680129__db differ diff --git a/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715376661_6757677__387428011__db b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715376661_6757677__387428011__db new file mode 100644 index 000000000..c6b7c7158 Binary files /dev/null and b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715376661_6757677__387428011__db differ diff --git a/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715385339_6193233__3914696475__memory b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715385339_6193233__3914696475__memory new file mode 100644 index 000000000..71e539842 Binary files /dev/null and b/warehouse/playground_source/.cache/model_definition/0_95_23_15_49__models__incremental_model__1715385339_6193233__3914696475__memory differ diff --git a/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_full_model_2210254270 b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_full_model_2210254270 new file mode 100644 index 000000000..ed3f5670f Binary files /dev/null and b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_full_model_2210254270 differ diff --git a/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_full_model_3783885124 b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_full_model_3783885124 new file mode 100644 index 000000000..9044c9ff6 Binary files /dev/null and b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_full_model_3783885124 differ diff --git a/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_incremental_model_1662842126 b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_incremental_model_1662842126 new file mode 100644 index 000000000..57e6ef6d9 Binary files /dev/null and b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_incremental_model_1662842126 differ diff --git a/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_incremental_model_3679699008 b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_incremental_model_3679699008 new file mode 100644 index 000000000..58d479b46 Binary files /dev/null and b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_incremental_model_3679699008 differ diff --git a/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test1_2806289995 b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test1_2806289995 new file mode 100644 index 000000000..8c384e7fa Binary files /dev/null and b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test1_2806289995 differ diff --git a/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test1_2848966810 b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test1_2848966810 new file mode 100644 index 000000000..6b0854905 Binary files /dev/null and b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test1_2848966810 differ diff --git a/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test2_2806289995 b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test2_2806289995 new file mode 100644 index 000000000..512b03028 Binary files /dev/null and b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test2_2806289995 differ diff --git a/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test2_2848966810 b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test2_2848966810 new file mode 100644 index 000000000..549ddf4ea Binary files /dev/null and b/warehouse/playground_source/.cache/optimized_query/0_95_23_15_49__sqlmesh_example_test2_2848966810 differ diff --git a/warehouse/playground_source/audits/.gitkeep b/warehouse/playground_source/audits/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/warehouse/playground_source/audits/assert_positive_order_ids.sql b/warehouse/playground_source/audits/assert_positive_order_ids.sql new file mode 100644 index 000000000..6e5fecad5 --- /dev/null +++ b/warehouse/playground_source/audits/assert_positive_order_ids.sql @@ -0,0 +1,8 @@ +AUDIT ( + name assert_positive_order_ids, +); + +SELECT * +FROM @this_model +WHERE + item_id < 0 diff --git a/warehouse/playground_source/config.py b/warehouse/playground_source/config.py new file mode 100644 index 000000000..1030c6897 --- /dev/null +++ b/warehouse/playground_source/config.py @@ -0,0 +1,23 @@ +import os +from sqlmesh.core.config import ( + Config, + ModelDefaultsConfig, + GatewayConfig, + BigQueryConnectionConfig, + DuckDBConnectionConfig, +) + +config = Config( + model_defaults=ModelDefaultsConfig(dialect="bigquery"), + gateways={ + "local": GatewayConfig( + state_connection=DuckDBConnectionConfig(database="db.db") + ), + "bq": GatewayConfig( + state_connection=BigQueryConnectionConfig( + keyfile=os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") + ) + ), + }, + default_gateway="local", +) diff --git a/warehouse/playground_source/db.db b/warehouse/playground_source/db.db new file mode 100644 index 000000000..0d5a08018 Binary files /dev/null and b/warehouse/playground_source/db.db differ diff --git a/warehouse/playground_source/macros/.gitkeep b/warehouse/playground_source/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/warehouse/playground_source/macros/__init__.py b/warehouse/playground_source/macros/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/warehouse/playground_source/models/.gitkeep b/warehouse/playground_source/models/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/warehouse/playground_source/models/full_model.sql b/warehouse/playground_source/models/full_model.sql new file mode 100644 index 000000000..37500b6ed --- /dev/null +++ b/warehouse/playground_source/models/full_model.sql @@ -0,0 +1,14 @@ +MODEL ( + name sqlmesh_example.full_model, + kind FULL, + cron '@daily', + grain item_id, + audits (assert_positive_order_ids), +); + +SELECT + item_id, + COUNT(DISTINCT id) AS num_orders, +FROM + sqlmesh_example.incremental_model +GROUP BY item_id diff --git a/warehouse/playground_source/models/incremental_model.sql b/warehouse/playground_source/models/incremental_model.sql new file mode 100644 index 000000000..7f9f7e5f4 --- /dev/null +++ b/warehouse/playground_source/models/incremental_model.sql @@ -0,0 +1,18 @@ +MODEL ( + name sqlmesh_example.incremental_model, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column event_date + ), + start '2020-01-01', + cron '@daily', + grain (id, event_date) +); + +SELECT + id, + item_id, + event_date, +FROM + sqlmesh_example.seed_model +WHERE + event_date BETWEEN @start_date AND @end_date diff --git a/warehouse/playground_source/models/playground_models.py b/warehouse/playground_source/models/playground_models.py new file mode 100644 index 000000000..705139ff5 --- /dev/null +++ b/warehouse/playground_source/models/playground_models.py @@ -0,0 +1,26 @@ +from sqlglot import exp + +from sqlmesh.core.model import model +from sqlmesh.core.macros import MacroEvaluator + + +def model_factory(name: str): + @model( + f"sqlmesh_example.{name}", + is_sql=True, + kind="FULL", + ) + def _model(evaluator: MacroEvaluator): + # id, + # item_id, + # event_date, + print(evaluator.columns_to_types("sqlmesh_example.seed_model")) + return exp.select("id", "item_id", "event_date").from_( + "sqlmesh_example.seed_model" + ) + + return _model + + +test1 = model_factory("test1") +test1 = model_factory("test2") diff --git a/warehouse/playground_source/models/seed_model.sql b/warehouse/playground_source/models/seed_model.sql new file mode 100644 index 000000000..c7328f51f --- /dev/null +++ b/warehouse/playground_source/models/seed_model.sql @@ -0,0 +1,12 @@ +MODEL ( + name sqlmesh_example.seed_model, + kind SEED ( + path '../seeds/seed_data.csv' + ), + columns ( + id INTEGER, + item_id INTEGER, + event_date DATE + ), + grain (id, event_date) +); diff --git a/warehouse/playground_source/old_config.yaml b/warehouse/playground_source/old_config.yaml new file mode 100644 index 000000000..a58f8a216 --- /dev/null +++ b/warehouse/playground_source/old_config.yaml @@ -0,0 +1,10 @@ +gateways: + local: + connection: + type: duckdb + database: db.db + +default_gateway: local + +model_defaults: + dialect: bigquery diff --git a/warehouse/playground_source/seeds/.gitkeep b/warehouse/playground_source/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/warehouse/playground_source/seeds/seed_data.csv b/warehouse/playground_source/seeds/seed_data.csv new file mode 100644 index 000000000..2e3902ed3 --- /dev/null +++ b/warehouse/playground_source/seeds/seed_data.csv @@ -0,0 +1,8 @@ +id,item_id,event_date +1,2,2020-01-01 +2,1,2020-01-01 +3,3,2020-01-03 +4,1,2020-01-04 +5,1,2020-01-05 +6,1,2020-01-06 +7,1,2020-01-07 diff --git a/warehouse/playground_source/tests/.gitkeep b/warehouse/playground_source/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/warehouse/playground_source/tests/test_full_model.yaml b/warehouse/playground_source/tests/test_full_model.yaml new file mode 100644 index 000000000..514baa1f6 --- /dev/null +++ b/warehouse/playground_source/tests/test_full_model.yaml @@ -0,0 +1,21 @@ +test_example_full_model: + model: sqlmesh_example.full_model + inputs: + sqlmesh_example.incremental_model: + rows: + - id: 1 + item_id: 1 + event_date: '2020-01-01' + - id: 2 + item_id: 1 + event_date: '2020-01-02' + - id: 3 + item_id: 2 + event_date: '2020-01-03' + outputs: + query: + rows: + - item_id: 1 + num_orders: 2 + - item_id: 2 + num_orders: 1