diff --git a/.clj-kondo/config.edn b/.clj-kondo/config.edn index 9816581..264cdbc 100644 --- a/.clj-kondo/config.edn +++ b/.clj-kondo/config.edn @@ -35,7 +35,7 @@ :used-underscored-binding {:level :warning} :warn-on-reflection {:level :warning} - :second-date/validate-deftest {:level :warning}} + :macaw/validate-deftest {:level :warning}} :lint-as {} @@ -58,4 +58,9 @@ :config-in-ns {tests {:linters - {:missing-docstring {:level :off}}}}} + {:missing-docstring {:level :off}}} + + ;; Disable deftest parallel/synchronized linting since we marked the whole ns as ^:parallel + macaw.core-test + {:linters + {:macaw/validate-deftest {:level :off}}}}} diff --git a/.clj-kondo/hooks/clojure/test.clj b/.clj-kondo/hooks/clojure/test.clj index 71e8209..5677085 100644 --- a/.clj-kondo/hooks/clojure/test.clj +++ b/.clj-kondo/hooks/clojure/test.clj @@ -15,7 +15,7 @@ #_(= (last (str sexpr)) \!)) (hooks/reg-finding! (assoc (meta form) :message (format "%s is not allowed inside a ^:parallel test" sexpr) - :type :toucan2/validate-deftest))))))) + :type :macaw/validate-deftest))))))) (walk [form] (f form) (doseq [child (:children form)] @@ -38,11 +38,11 @@ (when-not (or parallel? synchronized?) (hooks/reg-finding! (assoc (meta test-name) :message "Test should be marked either ^:parallel or ^:synchronized" - :type :second-date/validate-deftest))) + :type :macaw/validate-deftest))) (when (and parallel? synchronized?) (hooks/reg-finding! (assoc (meta test-name) :message "Test should not be marked both ^:parallel and ^:synchronized" - :type :second-date/validate-deftest))) + :type :macaw/validate-deftest))) (when parallel? (doseq [form body] (warn-about-disallowed-parallel-forms form)))) diff --git a/deps.edn b/deps.edn index 9e61415..d8a74a5 100644 --- a/deps.edn +++ b/deps.edn @@ -7,7 +7,7 @@ :ensure "target/classes"} :deps - {com.github.jsqlparser/jsqlparser {:mvn/version "4.8"}} ; The actual SQL Parser to wrap! + {com.github.jsqlparser/jsqlparser {:mvn/version "4.9"}} ; The actual SQL Parser to wrap! :aliases diff --git a/java/com/metabase/macaw/AstWalker.java b/java/com/metabase/macaw/AstWalker.java index c879495..8ae207c 100644 --- a/java/com/metabase/macaw/AstWalker.java +++ b/java/com/metabase/macaw/AstWalker.java @@ -168,6 +168,8 @@ import net.sf.jsqlparser.statement.update.Update; import net.sf.jsqlparser.statement.upsert.Upsert; +import static com.metabase.macaw.AstWalker.CallbackKey.ALL_COLUMNS; +import static com.metabase.macaw.AstWalker.CallbackKey.ALL_TABLE_COLUMNS; import static com.metabase.macaw.AstWalker.CallbackKey.COLUMN; import static com.metabase.macaw.AstWalker.CallbackKey.TABLE; @@ -210,6 +212,8 @@ public class AstWalker implements SelectVisitor, FromItemVisitor, Expressio SelectItemVisitor, StatementVisitor { public enum CallbackKey { + ALL_COLUMNS, + ALL_TABLE_COLUMNS, COLUMN, TABLE; @@ -224,17 +228,10 @@ public String toString() { private final EnumMap callbacks; /** - * Construct a new walker with the given `callbacks`. The `callbacks` should be a (Clojure) map like so: + * Construct a new walker with the given `callbacks`. The `callbacks` should be a (Clojure) map of CallbackKeys to + * reducing functions. * - *

-     *   (ASTWalker. {AstWalker$CallbackKey/COLUMN (fn [acc col] (do-something-with-the-found-column acc col))
-     *                AstWalker$CallbackKey/TABLE  (fn [acc table] (...))})
-     * 
- * - * The appropriate callback fn will be invoked for every matching element found. Valid keys are of course defined by - * the nested [[CallbackKey]] enum. - *

- * Silently rejects invalid keys. + * c.f. the Clojure wrapper in macaw.walk */ public AstWalker(Map rawCallbacks, Acc val) { this.acc = val; @@ -379,6 +376,19 @@ public void visit(OverlapsCondition overlapsCondition) { @Override public void visit(Column tableColumn) { invokeCallback(COLUMN, tableColumn); + + // The below is frustrating: it's counterproductive for table-finding, since at best it's not needed and at + // worst it visits aliased tables, causing bugs we've chosen to fix on the Clojure side. e.g. + // + // select o.id from orders o; + // + // will incorrectly list `o` as a table (from the `o.id` term). + // + // However, it's necessary for table name rewriting: if you try to rename `orders` to `purchases` in this: + // + // select orders.id from orders; + // + // you need to visit the `orders` in `orders.id`. That table is distinct from the one in `from orders`. if (tableColumn.getTable() != null && tableColumn.getTable().getName() != null) { visit(tableColumn.getTable()); @@ -758,12 +768,12 @@ public void visit(JsonOperator jsonExpr) { @Override public void visit(AllColumns allColumns) { - + invokeCallback(ALL_COLUMNS, allColumns); } @Override public void visit(AllTableColumns allTableColumns) { - + invokeCallback(ALL_TABLE_COLUMNS, allTableColumns); } @Override diff --git a/src/macaw/core.clj b/src/macaw/core.clj index d37cf01..e7c0baf 100644 --- a/src/macaw/core.clj +++ b/src/macaw/core.clj @@ -3,23 +3,64 @@ [macaw.rewrite :as rewrite] [macaw.walk :as mw]) (:import + (net.sf.jsqlparser.expression Alias) (net.sf.jsqlparser.parser CCJSqlParserUtil) (net.sf.jsqlparser.schema Column Table) - (net.sf.jsqlparser.statement Statement))) + (net.sf.jsqlparser.statement Statement) + (net.sf.jsqlparser.statement.select AllTableColumns))) (set! *warn-on-reflection* true) +(defn- conj-to + [key-name] + (fn item-conjer [results item] + (update results key-name conj item))) + +(defn- query->raw-components + [^Statement parsed-query] + (mw/fold-query parsed-query + {:column (conj-to :columns) + :wildcard (fn [results _all-columns] + (assoc results :has-wildcard? true)) + :table (conj-to :tables) + :table-wildcard (conj-to :table-wildcards)} + {:columns #{} + :has-wildcard? false + :tables #{} + :table-wildcards #{}})) + +(defn- alias-mapping + [^Table table] + (when-let [^Alias table-alias (.getAlias table)] + [(.getName table-alias) (.getName table)])) + +(defn- resolve-table-name + "JSQLParser can't tell whether the `f` in `select f.*` refers to a real table or an alias. Therefore, we have to + disambiguate them based on our own map of aliases->table names. So this function will return the real name of the table + referenced in a table-wildcard (as far as can be determined from the query)." + [alias->name ^AllTableColumns atc] + (let [table-name (-> atc .getTable .getName)] + (or (alias->name table-name) + table-name))) + +(defn- remove-aliases + [aliases table-names] + (let [alias? (into #{} (keys aliases))] + (filter (complement alias?) table-names))) + (defn query->components "Given a parsed query (i.e., a [subclass of] `Statement`) return a map with the `:tables` and `:columns` found within it. (Specifically, it returns their fully-qualified names as strings, where 'fully-qualified' means 'as referred to in the query'; this function doesn't do additional inference work to find out a table's schema.)" [^Statement parsed-query] - (mw/fold-query parsed-query - {:column #(update %1 :columns conj (.getColumnName ^Column %2)) - :table #(update %1 :tables conj (.getName ^Table %2))} - {:columns #{} - :tables #{}})) + (let [{:keys [columns has-wildcard? + tables table-wildcards]} (query->raw-components parsed-query) + aliases (into {} (map alias-mapping tables))] + {:columns (into #{} (map #(.getColumnName ^Column %) columns)) + :has-wildcard? has-wildcard? + :tables (into #{} (remove-aliases aliases (map #(.getName ^Table %) tables))) + :table-wildcards (into #{} (map (partial resolve-table-name aliases) table-wildcards))})) (defn parsed-query "Main entry point: takes a string query and returns a `Statement` object that can be handled by the other functions." @@ -28,10 +69,9 @@ (defn resolve-columns "TODO: Make this use metadata we know about. - TODO: If nil is a column (from a select *) then no need for the rest of the entries TODO: might want to live in another ns" [tables columns] - (let [cartesian-product (for [table tables + (let [cartesian-product (for [table tables column columns] {:table table :column column})] diff --git a/src/macaw/walk.clj b/src/macaw/walk.clj index bf915a6..dc25e44 100644 --- a/src/macaw/walk.clj +++ b/src/macaw/walk.clj @@ -7,8 +7,10 @@ (def ->callback-key "keyword->key map for the AST-folding callbacks." ;; TODO: Move this to a Malli schema to simplify the indirection - {:column AstWalker$CallbackKey/COLUMN - :table AstWalker$CallbackKey/TABLE}) + {:column AstWalker$CallbackKey/COLUMN + :table AstWalker$CallbackKey/TABLE + :table-wildcard AstWalker$CallbackKey/ALL_TABLE_COLUMNS + :wildcard AstWalker$CallbackKey/ALL_COLUMNS}) (defn- preserve "Lift a side effecting callback so that it preserves the accumulator." diff --git a/test/macaw/core_test.clj b/test/macaw/core_test.clj index 4222a60..249731f 100644 --- a/test/macaw/core_test.clj +++ b/test/macaw/core_test.clj @@ -1,11 +1,15 @@ -(ns macaw.core-test +(ns ^:parallel macaw.core-test (:require [clojure.test :refer [deftest testing is]] [macaw.core :as m])) -(def tables (comp :tables m/query->components m/parsed-query)) +(def components (comp m/query->components m/parsed-query)) +(def columns (comp :columns components)) +(def has-wildcard? (comp :has-wildcard? components)) +(def tables (comp :tables components)) +(def table-wcs (comp :table-wildcards components)) -(deftest ^:parallel query->tables-test +(deftest query->tables-test (testing "Simple queries" (is (= #{"core_user"} (tables "select * from core_user;"))) @@ -19,23 +23,52 @@ (is (= #{"core_user"} (tables "select * from (select distinct email from core_user) q;"))))) -(def columns (comp :columns m/query->components m/parsed-query)) +(deftest issue-14-tables-with-complex-aliases-test + (testing "With an alias that is also a table name" + #_(is (= #{"user" "user2_final"} + (tables + "SELECT legacy_user.id AS old_id, + user.id AS new_id + FROM user AS legacy_user + OUTER JOIN user2_final AS user + ON legacy_user.email = user2_final.email;"))))) -(deftest ^:parallel query->columns-test +(deftest query->columns-test (testing "Simple queries" (is (= #{"foo" "bar" "id" "quux_id"} (columns "select foo, bar from baz inner join quux on quux.id = baz.quux_id"))))) -(deftest ^:parallel resolve-columns-test +(deftest alias-inclusion-test + (testing "Aliases are not included" + (is (= #{"orders" "foo"} + (tables "select id, o.id from orders o join foo on orders.id = foo.order_id"))))) + +(deftest resolve-columns-test (let [cols ["name" "id" "email"]] (is (= {"core_user" cols "report_card" cols} (m/resolve-columns ["core_user" "report_card"] cols))))) +(deftest select-*-test + (is (true? (has-wildcard? "select * from orders"))) + (is (true? (has-wildcard? "select id, * from orders join foo on orders.id = foo.order_id")))) + +(deftest table-wildcard-test-without-aliases + (is (= #{"orders"} + (table-wcs "select orders.* from orders join foo on orders.id = foo.order_id"))) + (is (= #{"foo"} + (table-wcs "select foo.* from orders join foo on orders.id = foo.order_id")))) + +(deftest table-star-test-with-aliases + (is (= #{"orders"} + (table-wcs "select o.* from orders o join foo on orders.id = foo.order_id"))) + (is (= #{"foo"} + (table-wcs "select f.* from orders o join foo f on orders.id = foo.order_id")))) + (defn test-replacement [before replacements after] (is (= after (m/replace-names before replacements)))) -(deftest ^:parallel replace-names-test +(deftest replace-names-test (test-replacement "select a.x, b.y from a, b;" {:tables {"a" "aa"} :columns {"x" "xx"}}