diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..702d36f --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,44 @@ +# name: Deploy +# +# on: +# push: +# branches: +# - master +# +# jobs: +# deploy: +# runs-on: ubuntu-20.04 +# environment: Deployment +# steps: +# - uses: actions/checkout@v4.1.0 +# with: +# fetch-depth: 0 +# - name: Prepare JDK 17 +# uses: actions/setup-java@v3 +# with: +# java-version: 17 +# distribution: 'temurin' +# - name: Setup Clojure +# uses: DeLaGuardo/setup-clojure@12.1 +# with: +# cli: 1.11.1.1413 +# - name: Restore cache +# uses: actions/cache@v3 +# with: +# path: | +# ~/.m2/repository +# ~/.gitlibs +# ~/.deps.clj +# key: v1-${{ hashFiles('./deps.edn') }}-deploy +# restore-keys: | +# v1-${{ hashFiles('./deps.edn') }}- +# v1- +# - name: Build Macaw +# run: clojure -T:build build +# env: +# GITHUB_SHA: ${{ env.GITHUB_SHA }} +# - name: Deploy Macaw +# run: clojure -T:build deploy +# env: +# CLOJARS_USERNAME: ${{ secrets.CLOJARS_USERNAME }} +# CLOJARS_PASSWORD: ${{ secrets.CLOJARS_PASSWORD }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..d34a965 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,114 @@ +name: Tests + +on: + push: + branches: + - master + pull_request: + +jobs: + kondo: + runs-on: ubuntu-20.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4.1.0 + - uses: DeLaGuardo/clojure-lint-action@master + with: + check-name: Run clj-kondo + clj-kondo-args: >- + --lint + src + test + github_token: ${{ secrets.GITHUB_TOKEN }} + + tests: + runs-on: ubuntu-20.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4.1.0 + - name: Prepare JDK 17 + uses: actions/setup-java@v3 + with: + java-version: 17 + distribution: 'temurin' + - name: Setup Clojure + uses: DeLaGuardo/setup-clojure@12.1 + with: + cli: 1.11.1.1413 + - name: Restore cache + uses: actions/cache@v3 + with: + path: | + ~/.m2/repository + ~/.gitlibs + ~/.deps.clj + key: v1-${{ hashFiles('./deps.edn') }}-tests + restore-keys: | + v1-${{ hashFiles('./deps.edn') }}- + v1- + - run: clojure -X:dev:test + name: Run tests + env: + CI: TRUE + + whitespace-linter: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4.1.0 + - name: Prepare JDK 17 + uses: actions/setup-java@v3 + with: + java-version: 17 + distribution: 'temurin' + - name: Setup Clojure + uses: DeLaGuardo/setup-clojure@12.1 + with: + cli: 1.11.1.1413 + - name: Restore cache + uses: actions/cache@v3 + with: + path: | + ~/.m2/repository + ~/.gitlibs + ~/.deps.clj + key: v1-${{ hashFiles('./deps.edn') }}-whitespace-linter + restore-keys: | + v1-${{ hashFiles('./deps.edn') }}- + v1- + - run: clojure -T:whitespace-linter + name: Run whitespace linter + + check: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4.1.0 + - name: Prepare JDK 17 + uses: actions/setup-java@v3 + with: + java-version: 17 + distribution: 'temurin' + - name: Setup Clojure + uses: DeLaGuardo/setup-clojure@12.1 + with: + cli: 1.11.1.1413 + - name: Restore cache + uses: actions/cache@v3 + with: + path: | + ~/.m2/repository + ~/.gitlibs + ~/.deps.clj + key: v1-${{ hashFiles('./deps.edn') }}-check + restore-keys: | + v1-${{ hashFiles('./deps.edn') }}- + v1- + - run: clojure -M:check + name: Check namespaces + +# codespell: +# runs-on: ubuntu-20.04 +# steps: +# - uses: actions/checkout@v4.1.0 +# - uses: codespell-project/actions-codespell@v2 +# with: +# ignore_words_file: .codespellignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4beb7c2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +*.class +*.jar +.\#* +/*.iml +/.clj-kondo/.cache +/.cpcache +/.eastwood +/.env +/.envrc +/.idea +/.lein-deps-sum +/.lein-env +/.lein-failures +/.lein-plugins +/.lein-repl-history +/.nrepl-port +/build.xml +/checkouts +/classes +/classes +/config.edn +/lib +/pom.xml +/pom.xml.asc +/profiles.clj +/tags +/target +\#*\# diff --git a/README.md b/README.md index 985ed84..c4e880a 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -[![License](https://img.shields.io/badge/license-Eclipse%20Public%20License-blue.svg?style=for-the-badge)](https://raw.githubusercontent.com/metabase/guacamaya/master/LICENSE) -[![GitHub last commit](https://img.shields.io/github/last-commit/metabase/second-date?style=for-the-badge)](https://github.com/metabase/guacamaya/commits/) +[![License](https://img.shields.io/badge/license-Eclipse%20Public%20License-blue.svg?style=for-the-badge)](https://raw.githubusercontent.com/metabase/macaw/master/LICENSE) +[![GitHub last commit](https://img.shields.io/github/last-commit/metabase/second-date?style=for-the-badge)](https://github.com/metabase/macaw/commits/) -[![Clojars Project](https://clojars.org/metabase/guacamaya/latest-version.svg)](https://clojars.org/metabase/guacamaya) +[![Clojars Project](https://clojars.org/metabase/macaw/latest-version.svg)](https://clojars.org/metabase/macaw) -![Guacamaya logo](./assets/logo.png) +![Macaw logo](./assets/logo.png) -# Guacamaya +# Macaw -Guacamaya is a limited Clojure wrapper for +Macaw is a limited Clojure wrapper for [JSqlParser](https://github.com/JSQLParser/JSqlParser). Similar to its parrot namesake, it's intelligent, can be taught to speak SQL, and has many colors (supports many dialects). diff --git a/deps.edn b/deps.edn new file mode 100644 index 0000000..01bbb86 --- /dev/null +++ b/deps.edn @@ -0,0 +1,57 @@ +{:paths + ["src" "resources"] + + :deps + {com.github.jsqlparser/jsqlparser {:mvn/version "4.8"}} ; The actual SQL Parser to wrap! + + + :aliases + {:dev + {:extra-deps + {io.github.metabase/hawk {:sha "539eefaa31a43d52d7c9b5731f471bb6742e7131"}} + + :extra-paths + ["test"] + + :jvm-opts + ["-Duser.timezone=UTC" + "-Duser.language=en" + "-Duser.country=US" + ;; if compilation on launch fails or whatever print to console instead of a temp file. + "-Dclojure.main.report=stderr" + ;; [LEVEL logger-name] message stacktrace + "-Djava.util.logging.SimpleFormatter.format=%n[%4$s %3$s] %5$s%n%6$s%n" + ;; Exceptions that get thrown repeatedly are created without stacktraces as a performance optimization in newer Java + ;; versions. This makes debugging pretty hard when working on stuff locally -- prefer debuggability over performance + ;; for local dev work. + "-XX:-OmitStackTraceInFastThrow" + ;; ignore options that aren't present in older versions of Java, like the one below: + "-XX:+IgnoreUnrecognizedVMOptions" + ;; include more details for debugging NPEs (Java 14+) + "-XX:+ShowCodeDetailsInExceptionMessages"]} + + ;; clojure -M:check + :check + {:extra-deps {athos/clj-check {:git/url "https://github.com/athos/clj-check.git" + :sha "518d5a1cbfcd7c952f548e6dbfcb9a4a5faf9062"}} + :main-opts ["-m" "clj-check.check"]} + + ;; clj -T:whitespace-linter + :whitespace-linter + {:deps {com.github.camsaul/whitespace-linter {:sha "e35bc252ccf5cc74f7d543ef95ad8a3e5131f25b"}} + :ns-default whitespace-linter + :exec-fn whitespace-linter/lint + :exec-args {:paths ["deps.edn" "src" "test" ".github"] + :include-patterns ["\\.clj[cs]?$" "\\.edn$" "\\.yaml$" "\\.md$"]}} + + ;; Run tests + ;; + ;; clojure -X:dev:test + :test + {:exec-fn mb.hawk.core/find-and-run-tests-cli} + + ;; clojure -T:build + :build + {:deps {io.github.clojure/tools.build {:mvn/version "0.9.6"} + slipset/deps-deploy {:mvn/version "0.2.1"}} + :ns-default build}}} diff --git a/src/macaw/core.clj b/src/macaw/core.clj new file mode 100644 index 0000000..369fe70 --- /dev/null +++ b/src/macaw/core.clj @@ -0,0 +1,48 @@ +(ns macaw.core + (:import + (net.sf.jsqlparser.parser + CCJSqlParserUtil) + (net.sf.jsqlparser.statement + Statement) + (net.sf.jsqlparser.util + TablesNamesFinder))) + +(set! *warn-on-reflection* true) + +(defn query->tables + "Given a parsed query (i.e., a subclass of `Statement`) return a list of fully-qualified table names found within it. + + Note: 'fully-qualified' means 'as found in the query'; it doesn't extrapolate schema names from other data sources." + [^Statement parsed-query] + (let [table-finder (TablesNamesFinder.)] + (.getTableList table-finder parsed-query))) + +(defn query->columns + "TODO: implement!" + [^Statement _parsed-query] + ["oh no" "TODO"]) + +(defn parsed-query + "Main entry point: takes a string query and returns a `Statement` object that can be handled by the other functions." + [^String query] + (CCJSqlParserUtil/parse query)) + +(defn resolve-columns + "TODO: Make this use metadata we know about. + TODO: If nil is a column (from a select *) then no need for the rest of the entries + TODO: might want to live in another ns" + [tables columns] + (let [cartesian-product (for [table tables + column columns] + {:table table + :column column})] + (update-vals (group-by :table cartesian-product) + #(merge-with concat (map :column %))))) + +(defn lineage + "Returns a sequence of the columns used in / referenced by the query" + [query] + (let [parsed (parsed-query query) + tables (query->tables parsed) + columns (query->columns parsed)] + (resolve-columns tables columns))) diff --git a/test/macaw/core_test.clj b/test/macaw/core_test.clj new file mode 100644 index 0000000..de0d14c --- /dev/null +++ b/test/macaw/core_test.clj @@ -0,0 +1,25 @@ +(ns macaw.core-test + (:require + [clojure.test :refer :all] + [macaw.core :as m])) + +(def tables (comp m/query->tables m/parsed-query)) + +(deftest ^:parallel query->tables-test + (testing "Simple queries" + (is (= ["core_user"] + (tables "select * from core_user;"))) + (is (= ["core_user"] + (tables "select id, email from core_user;")))) + (testing "With a schema (Postgres)" ;; TODO: only run this against supported DBs + (is (= ["the_schema_name.core_user"] + (tables "select * from the_schema_name.core_user;")))) + (testing "Sub-selects" + (is (= ["core_user"] + (tables "select * from (select distinct email from core_user) q;"))))) + +(deftest ^:parallel resolve-columns-test + (let [cols ["name" "id" "email"]] + (is (= {"core_user" cols + "report_card" cols} + (m/resolve-columns ["core_user" "report_card"] cols)))))