Skip to content

Commit

Permalink
Add support for select * and select table.* (#13)
Browse files Browse the repository at this point in the history
* Add support for select * and select table.*

* Move ^:parallel to ns, not each deftest

* Whitespace etc

* Upgrade to 4.9

* More semantic renaming
  • Loading branch information
tsmacdonald authored Mar 21, 2024
1 parent 8341ff1 commit c6f3edc
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 35 deletions.
9 changes: 7 additions & 2 deletions .clj-kondo/config.edn
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
:used-underscored-binding {:level :warning}
:warn-on-reflection {:level :warning}

:second-date/validate-deftest {:level :warning}}
:macaw/validate-deftest {:level :warning}}

:lint-as
{}
Expand All @@ -58,4 +58,9 @@
:config-in-ns
{tests
{:linters
{:missing-docstring {:level :off}}}}}
{:missing-docstring {:level :off}}}

;; Disable deftest parallel/synchronized linting since we marked the whole ns as ^:parallel
macaw.core-test
{:linters
{:macaw/validate-deftest {:level :off}}}}}
6 changes: 3 additions & 3 deletions .clj-kondo/hooks/clojure/test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#_(= (last (str sexpr)) \!))
(hooks/reg-finding! (assoc (meta form)
:message (format "%s is not allowed inside a ^:parallel test" sexpr)
:type :toucan2/validate-deftest)))))))
:type :macaw/validate-deftest)))))))
(walk [form]
(f form)
(doseq [child (:children form)]
Expand All @@ -38,11 +38,11 @@
(when-not (or parallel? synchronized?)
(hooks/reg-finding! (assoc (meta test-name)
:message "Test should be marked either ^:parallel or ^:synchronized"
:type :second-date/validate-deftest)))
:type :macaw/validate-deftest)))
(when (and parallel? synchronized?)
(hooks/reg-finding! (assoc (meta test-name)
:message "Test should not be marked both ^:parallel and ^:synchronized"
:type :second-date/validate-deftest)))
:type :macaw/validate-deftest)))
(when parallel?
(doseq [form body]
(warn-about-disallowed-parallel-forms form))))
Expand Down
2 changes: 1 addition & 1 deletion deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
:ensure "target/classes"}

:deps
{com.github.jsqlparser/jsqlparser {:mvn/version "4.8"}} ; The actual SQL Parser to wrap!
{com.github.jsqlparser/jsqlparser {:mvn/version "4.9"}} ; The actual SQL Parser to wrap!


:aliases
Expand Down
34 changes: 22 additions & 12 deletions java/com/metabase/macaw/AstWalker.java
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@
import net.sf.jsqlparser.statement.update.Update;
import net.sf.jsqlparser.statement.upsert.Upsert;

import static com.metabase.macaw.AstWalker.CallbackKey.ALL_COLUMNS;
import static com.metabase.macaw.AstWalker.CallbackKey.ALL_TABLE_COLUMNS;
import static com.metabase.macaw.AstWalker.CallbackKey.COLUMN;
import static com.metabase.macaw.AstWalker.CallbackKey.TABLE;

Expand Down Expand Up @@ -210,6 +212,8 @@ public class AstWalker<Acc> implements SelectVisitor, FromItemVisitor, Expressio
SelectItemVisitor, StatementVisitor {

public enum CallbackKey {
ALL_COLUMNS,
ALL_TABLE_COLUMNS,
COLUMN,
TABLE;

Expand All @@ -224,17 +228,10 @@ public String toString() {
private final EnumMap<CallbackKey, IFn> callbacks;

/**
* Construct a new walker with the given `callbacks`. The `callbacks` should be a (Clojure) map like so:
* Construct a new walker with the given `callbacks`. The `callbacks` should be a (Clojure) map of CallbackKeys to
* reducing functions.
*
* <pre><code>
* (ASTWalker. {AstWalker$CallbackKey/COLUMN (fn [acc col] (do-something-with-the-found-column acc col))
* AstWalker$CallbackKey/TABLE (fn [acc table] (...))})
* </code></pre>
*
* The appropriate callback fn will be invoked for every matching element found. Valid keys are of course defined by
* the nested [[CallbackKey]] enum.
* <p>
* Silently rejects invalid keys.
* c.f. the Clojure wrapper in <code>macaw.walk</code>
*/
public AstWalker(Map<CallbackKey, IFn> rawCallbacks, Acc val) {
this.acc = val;
Expand Down Expand Up @@ -379,6 +376,19 @@ public void visit(OverlapsCondition overlapsCondition) {
@Override
public void visit(Column tableColumn) {
invokeCallback(COLUMN, tableColumn);

// The below is frustrating: it's counterproductive for table-finding, since at best it's not needed and at
// worst it visits aliased tables, causing bugs we've chosen to fix on the Clojure side. e.g.
//
// select o.id from orders o;
//
// will incorrectly list `o` as a table (from the `o.id` term).
//
// However, it's necessary for table name rewriting: if you try to rename `orders` to `purchases` in this:
//
// select orders.id from orders;
//
// you need to visit the `orders` in `orders.id`. That table is distinct from the one in `from orders`.
if (tableColumn.getTable() != null
&& tableColumn.getTable().getName() != null) {
visit(tableColumn.getTable());
Expand Down Expand Up @@ -758,12 +768,12 @@ public void visit(JsonOperator jsonExpr) {

@Override
public void visit(AllColumns allColumns) {

invokeCallback(ALL_COLUMNS, allColumns);
}

@Override
public void visit(AllTableColumns allTableColumns) {

invokeCallback(ALL_TABLE_COLUMNS, allTableColumns);
}

@Override
Expand Down
56 changes: 48 additions & 8 deletions src/macaw/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,64 @@
[macaw.rewrite :as rewrite]
[macaw.walk :as mw])
(:import
(net.sf.jsqlparser.expression Alias)
(net.sf.jsqlparser.parser CCJSqlParserUtil)
(net.sf.jsqlparser.schema Column Table)
(net.sf.jsqlparser.statement Statement)))
(net.sf.jsqlparser.statement Statement)
(net.sf.jsqlparser.statement.select AllTableColumns)))

(set! *warn-on-reflection* true)

(defn- conj-to
[key-name]
(fn item-conjer [results item]
(update results key-name conj item)))

(defn- query->raw-components
[^Statement parsed-query]
(mw/fold-query parsed-query
{:column (conj-to :columns)
:wildcard (fn [results _all-columns]
(assoc results :has-wildcard? true))
:table (conj-to :tables)
:table-wildcard (conj-to :table-wildcards)}
{:columns #{}
:has-wildcard? false
:tables #{}
:table-wildcards #{}}))

(defn- alias-mapping
[^Table table]
(when-let [^Alias table-alias (.getAlias table)]
[(.getName table-alias) (.getName table)]))

(defn- resolve-table-name
"JSQLParser can't tell whether the `f` in `select f.*` refers to a real table or an alias. Therefore, we have to
disambiguate them based on our own map of aliases->table names. So this function will return the real name of the table
referenced in a table-wildcard (as far as can be determined from the query)."
[alias->name ^AllTableColumns atc]
(let [table-name (-> atc .getTable .getName)]
(or (alias->name table-name)
table-name)))

(defn- remove-aliases
[aliases table-names]
(let [alias? (into #{} (keys aliases))]
(filter (complement alias?) table-names)))

(defn query->components
"Given a parsed query (i.e., a [subclass of] `Statement`) return a map with the `:tables` and `:columns` found within it.
(Specifically, it returns their fully-qualified names as strings, where 'fully-qualified' means 'as referred to in
the query'; this function doesn't do additional inference work to find out a table's schema.)"
[^Statement parsed-query]
(mw/fold-query parsed-query
{:column #(update %1 :columns conj (.getColumnName ^Column %2))
:table #(update %1 :tables conj (.getName ^Table %2))}
{:columns #{}
:tables #{}}))
(let [{:keys [columns has-wildcard?
tables table-wildcards]} (query->raw-components parsed-query)
aliases (into {} (map alias-mapping tables))]
{:columns (into #{} (map #(.getColumnName ^Column %) columns))
:has-wildcard? has-wildcard?
:tables (into #{} (remove-aliases aliases (map #(.getName ^Table %) tables)))
:table-wildcards (into #{} (map (partial resolve-table-name aliases) table-wildcards))}))

(defn parsed-query
"Main entry point: takes a string query and returns a `Statement` object that can be handled by the other functions."
Expand All @@ -28,10 +69,9 @@

(defn resolve-columns
"TODO: Make this use metadata we know about.
TODO: If nil is a column (from a select *) then no need for the rest of the entries
TODO: might want to live in another ns"
[tables columns]
(let [cartesian-product (for [table tables
(let [cartesian-product (for [table tables
column columns]
{:table table
:column column})]
Expand Down
6 changes: 4 additions & 2 deletions src/macaw/walk.clj
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
(def ->callback-key
"keyword->key map for the AST-folding callbacks."
;; TODO: Move this to a Malli schema to simplify the indirection
{:column AstWalker$CallbackKey/COLUMN
:table AstWalker$CallbackKey/TABLE})
{:column AstWalker$CallbackKey/COLUMN
:table AstWalker$CallbackKey/TABLE
:table-wildcard AstWalker$CallbackKey/ALL_TABLE_COLUMNS
:wildcard AstWalker$CallbackKey/ALL_COLUMNS})

(defn- preserve
"Lift a side effecting callback so that it preserves the accumulator."
Expand Down
47 changes: 40 additions & 7 deletions test/macaw/core_test.clj
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
(ns macaw.core-test
(ns ^:parallel macaw.core-test
(:require
[clojure.test :refer [deftest testing is]]
[macaw.core :as m]))

(def tables (comp :tables m/query->components m/parsed-query))
(def components (comp m/query->components m/parsed-query))
(def columns (comp :columns components))
(def has-wildcard? (comp :has-wildcard? components))
(def tables (comp :tables components))
(def table-wcs (comp :table-wildcards components))

(deftest ^:parallel query->tables-test
(deftest query->tables-test
(testing "Simple queries"
(is (= #{"core_user"}
(tables "select * from core_user;")))
Expand All @@ -19,23 +23,52 @@
(is (= #{"core_user"}
(tables "select * from (select distinct email from core_user) q;")))))

(def columns (comp :columns m/query->components m/parsed-query))
(deftest issue-14-tables-with-complex-aliases-test
(testing "With an alias that is also a table name"
#_(is (= #{"user" "user2_final"}
(tables
"SELECT legacy_user.id AS old_id,
user.id AS new_id
FROM user AS legacy_user
OUTER JOIN user2_final AS user
ON legacy_user.email = user2_final.email;")))))

(deftest ^:parallel query->columns-test
(deftest query->columns-test
(testing "Simple queries"
(is (= #{"foo" "bar" "id" "quux_id"}
(columns "select foo, bar from baz inner join quux on quux.id = baz.quux_id")))))

(deftest ^:parallel resolve-columns-test
(deftest alias-inclusion-test
(testing "Aliases are not included"
(is (= #{"orders" "foo"}
(tables "select id, o.id from orders o join foo on orders.id = foo.order_id")))))

(deftest resolve-columns-test
(let [cols ["name" "id" "email"]]
(is (= {"core_user" cols
"report_card" cols}
(m/resolve-columns ["core_user" "report_card"] cols)))))

(deftest select-*-test
(is (true? (has-wildcard? "select * from orders")))
(is (true? (has-wildcard? "select id, * from orders join foo on orders.id = foo.order_id"))))

(deftest table-wildcard-test-without-aliases
(is (= #{"orders"}
(table-wcs "select orders.* from orders join foo on orders.id = foo.order_id")))
(is (= #{"foo"}
(table-wcs "select foo.* from orders join foo on orders.id = foo.order_id"))))

(deftest table-star-test-with-aliases
(is (= #{"orders"}
(table-wcs "select o.* from orders o join foo on orders.id = foo.order_id")))
(is (= #{"foo"}
(table-wcs "select f.* from orders o join foo f on orders.id = foo.order_id"))))

(defn test-replacement [before replacements after]
(is (= after (m/replace-names before replacements))))

(deftest ^:parallel replace-names-test
(deftest replace-names-test
(test-replacement "select a.x, b.y from a, b;"
{:tables {"a" "aa"}
:columns {"x" "xx"}}
Expand Down

0 comments on commit c6f3edc

Please sign in to comment.