Skip to content

Commit

Permalink
determine column alias (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
piranha authored May 15, 2024
1 parent 14014cb commit f2d3ace
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 31 deletions.
3 changes: 3 additions & 0 deletions .dir-locals.el
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
((nil . ((indent-tabs-mode . nil) ; always use spaces for tabs
(require-final-newline . t))) ; add final newline on save
(java-mode . ((whitespace-line-column . 118)))
(clojure-mode . ((cider-preferred-build-tool . clojure-cli)
(cider-clojure-cli-aliases . "dev:user")
(cljr-favor-prefix-notation . nil)
(cljr-insert-newline-after-require . t)
;; prefer keeping source width about ~118, GitHub seems to cut
;; off stuff at either 119 or 120 and it's nicer to look at
;; code in GH when you don't have to scroll back and forth
(fill-column . 118)
(whitespace-line-column . 118)
(column-enforce-column . 118)
(clojure-docstring-fill-column . 118)
(clojure-indent-style . always-align)
(eval . (put-clojure-indent 'with-meta '(:form)))
(eval . (put-clojure-indent 'with-bindings* '(:form)))))
(markdown-mode . ((fill-column . 80)
Expand Down
50 changes: 45 additions & 5 deletions java/com/metabase/macaw/AstWalker.java
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,12 @@ public String toString() {
}
}

public enum QueryContext {
public interface QueueItem {
public String getKey();
public String getValue();
}

public enum QueryContext implements QueueItem {
DELETE,
ELSE,
FROM,
Expand All @@ -242,16 +247,38 @@ public enum QueryContext {
UPDATE,
WHERE;

public String toString() {
public String getKey() {
return "query";
}

public String getValue() {
return name().toUpperCase();
}
}

public class SomeContext implements QueueItem {
private String key;
private String value;

SomeContext(String key, String value) {
this.key = key;
this.value = value;
}

public String getKey() {
return this.key;
}

public String getValue() {
return this.value;
}
}

private static final String NOT_SUPPORTED_YET = "Not supported yet.";

private Acc acc;
private final EnumMap<CallbackKey, IFn> callbacks;
private final Deque<String> contextStack;
private final Deque<QueueItem> contextStack;

/**
* Construct a new walker with the given `callbacks`. The `callbacks` should be a (Clojure) map of CallbackKeys to
Expand All @@ -262,7 +289,7 @@ public String toString() {
public AstWalker(Map<CallbackKey, IFn> rawCallbacks, Acc val) {
this.acc = val;
this.callbacks = new EnumMap<>(rawCallbacks);
this.contextStack = new ArrayDeque<String>();
this.contextStack = new ArrayDeque<QueueItem>();
}

/**
Expand All @@ -278,7 +305,11 @@ public void invokeCallback(CallbackKey key, Object visitedItem) {
}

private void pushContext(QueryContext c) {
this.contextStack.push(c.toString());
this.contextStack.push(c);
}

private void pushContext(QueueItem item) {
this.contextStack.push(item);
}

// This is pure sugar, but it's nice to be symmetrical with pushContext
Expand Down Expand Up @@ -838,7 +869,16 @@ public void visit(IsDistinctExpression isDistinctExpression) {

@Override
public void visit(SelectItem item) {
// TODO: what are .getAliasColumns()? Should we look at them?
var alias = item.getAlias();
if (alias != null) {
// FIXME: this is absolutely a hack, what's the best way to get around it?
pushContext(new SomeContext("alias", alias.getName()));
}
item.getExpression().accept(this);
if (alias != null) {
popContext();
}
}

@Override
Expand Down
82 changes: 56 additions & 26 deletions src/macaw/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
[macaw.util :as u]
[macaw.walk :as mw])
(:import
(com.metabase.macaw AstWalker$QueueItem)
(net.sf.jsqlparser.expression Alias)
(net.sf.jsqlparser.parser CCJSqlParserUtil)
(net.sf.jsqlparser.schema Column Table)
Expand All @@ -18,7 +19,10 @@
([key-name xf]
(fn item-conjer [results component context]
(update results key-name conj {:component (xf component)
:context (vec context)}))))
:context (mapv
(fn [^AstWalker$QueueItem x]
[(keyword (.getKey x)) (.getValue x)])
context)}))))

(defn- query->raw-components
[^Statement parsed-query]
Expand All @@ -34,59 +38,85 @@
:tables #{}
:table-wildcards #{}}))

(defn- make-table [^Table t]
;;; tables

(defn- make-table [^Table t _ctx]
(merge
{:table (.getName t)}
(when-let [s (.getSchemaName t)]
{:schema s})))

(defn- make-column [alias-map table-map ^Column c]
(merge
{:column (.getColumnName c)}
(if-let [t (.getTable c)]
(or
(get alias-map (.getName t))
(:component (get table-map (.getName t))))
;; if we see only a single table, we can safely say it's the table of that column
(when (= (count table-map) 1)
(:component (val (first table-map)))))))

(defn- alias-mapping
[^Table table]
[^Table table ctx]
(when-let [^Alias table-alias (.getAlias table)]
[(.getName table-alias) (make-table table)]))
[(.getName table-alias) (make-table table ctx)]))

(defn- resolve-table-name
"JSQLParser can't tell whether the `f` in `select f.*` refers to a real table or an alias. Therefore, we have to
disambiguate them based on our own map of aliases->table names. So this function will return the real name of the table
referenced in a table-wildcard (as far as can be determined from the query)."
[alias->table name->table ^AllTableColumns atc]
[{:keys [alias->table name->table]} ^AllTableColumns atc _ctx]
(let [table-name (-> atc .getTable .getName)]
(or (alias->table table-name)
(name->table table-name))))

;;; columns

(defn- maybe-column-alias [[maybe-alias :as _ctx]]
(when (= (first maybe-alias) :alias)
{:alias (second maybe-alias)}))

(defn- maybe-column-table [{:keys [alias->table name->table]} ^Column c]
(if-let [t (.getTable c)]
(or
(get alias->table (.getName t))
(:component (get name->table (.getName t))))
;; if we see only a single table, we can safely say it's the table of that column
(when (= (count name->table) 1)
(:component (val (first name->table))))))

(defn- make-column [data ^Column c ctx]
(merge
{:column (.getColumnName c)}
(maybe-column-alias ctx)
(maybe-column-table data c)))

;;; get them together

(defn- only-query-context [ctx]
(into [] (comp (filter #(= (first %) :query))
(map second))
ctx))

(defn- update-components
[f components]
(map #(update % :component f) components))
(map #(-> %
(update :component f (:context %))
(update :context only-query-context))
components))

(defn query->components
"Given a parsed query (i.e., a [subclass of] `Statement`) return a map with the elements found within it.
(Specifically, it returns their fully-qualified names as strings, where 'fully-qualified' means 'as referred to in
the query'; this function doesn't do additional inference work to find out a table's schema.)"
[^Statement parsed-query]
(let [{:keys [columns has-wildcard?
(let [{:keys [columns
has-wildcard?
mutation-commands
tables table-wildcards]} (query->raw-components parsed-query)
alias-map (into {} (map #(-> % :component alias-mapping) tables))
table-map (->> (update-components make-table tables)
(u/group-with #(-> % :component :table)
(fn [a b] (if (:schema a) a b))))]
{:columns (into #{} (update-components (partial make-column alias-map table-map) columns))
:has-wildcard? (into #{} has-wildcard?)
tables
table-wildcards]} (query->raw-components parsed-query)
alias-map (into {} (map #(-> % :component (alias-mapping (:context %))) tables))
table-map (->> (update-components make-table tables)
(u/group-with #(-> % :component :table)
(fn [a b] (if (:schema a) a b))))
data {:alias->table alias-map
:name->table table-map}]
{:columns (into #{} (update-components (partial make-column data) columns))
:has-wildcard? (into #{} (update-components (fn [x & _args] x) has-wildcard?))
:mutation-commands (into #{} mutation-commands)
:tables (into #{} (vals table-map))
:table-wildcards (into #{} (update-components (partial resolve-table-name alias-map table-map) table-wildcards))}))
:table-wildcards (into #{} (update-components (partial resolve-table-name data) table-wildcards))}))

(defn parsed-query
"Main entry point: takes a string query and returns a `Statement` object that can be handled by the other functions."
Expand Down
9 changes: 9 additions & 0 deletions test/macaw/core_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@
(is (= #{{:column "id" :table "orders" :schema "public"}}
(columns "SELECT o.id FROM public.orders o")))))

(deftest infer-test
(testing "We can first column through a few hoops"
(is (= #{{:column "amount" :table "orders"}}
(columns "SELECT amount FROM (SELECT amount FROM orders)")))
(is (= #{{:column "amount" :alias "cost" :table "orders"}
;; FIXME: we need to figure out that `cost` is an alias from subquery
{:column "cost", :table "orders"}}
(columns "SELECT cost FROM (SELECT amount AS cost FROM orders)")))))

(deftest mutation-test
(is (= #{"alter-sequence"}
(mutations "ALTER SEQUENCE serial RESTART WITH 42")))
Expand Down

0 comments on commit f2d3ace

Please sign in to comment.