From 3549faef86cde6229f2854a444d501a4b9f42565 Mon Sep 17 00:00:00 2001 From: Robert Hou Date: Mon, 6 Feb 2017 16:43:37 -0800 Subject: [PATCH] Test Framework changes to enable some order-by clause verification to work for JSON strings. (#250) --- README.md | 122 +++++++++ .../drill/test/framework/TestVerifier.java | 247 ++++++++++++++++-- .../apache/drill/test/framework/Utils.java | 22 ++ 3 files changed, 375 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 677bf3377..b27852c2e 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,127 @@ Example: Use this option to provide the usage of the command, which includes additional options +### Special handling for order-by tests + +The Test Framework is able to verify the order in which rows are returned for +some SQL statements that have an order-by clause. If the columns in the order-by +clause are also projected (present) in the output, then the Test Framework may +be able to check the order of the rows that are returned. Here are some +requirements for a query to be validated by the Test Framework: + +1) All columns/fields in the order-by clause must appear in the projection list +2) Expressions cannot be used in the order-by clause. Things like + "order by column.field[2]". The [2] indicates an expression which is the + third element in the field array. +3) Referencing a field within a json string in a column is more complicated. + Most cases are supported. Some cases may not work. +4) If a query references more than one table, then use aliases for each column + in the projection list, and reference these aliases in the order-by clause. + Using aliases is a good practice in general when verifying an order-by clause. +5) The order-by clause cannot be followed by another SQL operation except for + limit. If the order-by clause is followed by an offset or collate, for + example, it might not work. + +Here are some queries that can be validated: + +1) select id, gbyi from `complex.json` order by id limit 10; + ++-----+-------+ +| id | gbyi | ++-----+-------+ +| 1 | 1 | +| 2 | 2 | +| 3 | 3 | +| 4 | 4 | +| 5 | 5 | +| 6 | 6 | +| 7 | 7 | +| 8 | 8 | +| 9 | 9 | +| 10 | 10 | ++-----+-------+ + +This query can be validated because the results are ordered by the "id" column, +and the "id" column is projected in the output. The Test Framework can +examine the output and verify that the rows are in order. + +2) select * from + (select d.uid uid, flatten(d.events) evnts from `data.json` d order by d.uid) s + order by s.evnts.event_time, s.evnts.campaign_id; + ++-------+--------------------------------------------------------------------------------------------------+ +| uid | evnts | ++-------+--------------------------------------------------------------------------------------------------+ +| 1 | {"event_name":"e1_name","event_time":1000000,"type":"cmpgn1"} | +| 1 | {"event_name":"e2_name","event_time":2000000,"type":"cmpgn4","evnt_id":"e2","campaign_id":"c1"} | +| null | {"event_name":"e2_name","event_time":2000000,"type":"cmpgn4","evnt_id":"e2","campaign_id":"c1"} | +| 1 | {"event_name":"e3_name","event_time":3000000,"type":"cmpgn1","evnt_id":"e3","campaign_id":"c1"} | +| null | {"event_name":"e3_name","event_time":3000000,"type":"cmpgn1","evnt_id":"e3","campaign_id":"c1"} | +| null | {"event_name":"e4_name","event_time":4000000,"type":"cmpgn1","evnt_id":"e4","campaign_id":"c1"} | +| 1 | {"event_name":"e5_name","event_time":5000000,"type":"cmpgn3","evnt_id":"e5","campaign_id":"c2"} | +| null | {"event_time":6000000,"type":"cmpgn9","evnt_id":"e6","campaign_id":"c1"} | +| 1 | {"event_name":"e6_name","event_time":6000000,"type":"cmpgn9","evnt_id":"e6"} | +| null | {"event_name":"e7_name","event_time":7000000,"type":"cmpgn3","evnt_id":"e7","campaign_id":"c1"} | +| null | {"event_name":"e8_name","event_time":8000000,"type":"null","evnt_id":"e8","campaign_id":"c2"} | +| 1 | {"event_name":"e8_name","event_time":8000000,"type":"cmpgn2","evnt_id":"e8","campaign_id":"c2"} | +| null | {"event_time":9000000,"type":"cmpgn4","evnt_id":"e9","campaign_id":"c2"} | +| 1 | {"event_name":"e9_name","event_time":9000000,"type":"cmpgn4","evnt_id":"e9"} | +| 1 | {"event_name":"e7_name","type":"cmpgn3","evnt_id":"e7","campaign_id":"c1"} | +| 1 | {"event_name":"e4_name","type":"cmpgn1","evnt_id":"e4","campaign_id":"c1"} | +| null | {"event_name":"e1_name","type":"cmpgn9","campaign_id":"c1"} | +| null | {"event_name":"e5_name","type":"cmpgn2","evnt_id":"e5","campaign_id":"c2"} | ++-------+--------------------------------------------------------------------------------------------------+ + +This query can be validated because the results are ordered by the data in the +"evnts" column, and the "evnts" column is projected in the output. The Test +Framework can parse the JSON string in the "evnts" column and examine the +event_time and campaign_id values. + + +These queries cannot be validated: + +1) select t.gbyt, t.id, t.ooa[0].`in` zeroin, t.ooa[1].fl.f1 flf1, t.ooa[1].fl.f2 flf2, t.ooa[1].`in` onein, t.ooa[2].a.aa.aaa, t.ooa[2].b.bb.bbb, t.ooa[2].c.cc.ccc from `complex.json` t where t.ooa[2].b.bb.bbb is not null order by t.ooa[2].c.cc.ccc limit 10; + ++-------+--------+---------+-------------+-----------+--------+------------+------------+------------+ +| gbyt | id | zeroin | flf1 | flf2 | onein | EXPR$6 | EXPR$7 | EXPR$8 | ++-------+--------+---------+-------------+-----------+--------+------------+------------+------------+ +| aaa | 10 | null | null | null | 10 | aaa 10 | bbb 10 | ccc 10 | +| ooos | 1000 | null | null | null | 1000 | aaa 1000 | bbb 1000 | ccc 1000 | +| nul | 10002 | null | null | null | 10002 | aaa 10002 | bbb 10002 | ccc 10002 | +| sba | 10003 | 10003 | 10003.6789 | 154351.0 | 10003 | aaa 10003 | bbb 10003 | ccc 10003 | +| str | 10008 | 10008 | null | null | 10008 | aaa 10008 | bbb 10008 | ccc 10008 | +| fl | 10009 | 10009 | null | null | 10009 | aaa 10009 | bbb 10009 | ccc 10009 | +| saa | 1001 | null | 1001.6789 | 64331.0 | 1001 | aaa 1001 | bbb 1001 | ccc 1001 | +| soa | 10023 | null | null | null | 10023 | aaa 10023 | bbb 10023 | ccc 10023 | +| nul | 10028 | null | 10028.6789 | 154601.0 | 10028 | aaa 10028 | bbb 10028 | ccc 10028 | +| ooos | 10029 | null | 10029.6789 | 154611.0 | 10029 | aaa 10029 | bbb 10029 | ccc 10029 | ++-------+--------+---------+-------------+-----------+--------+------------+------------+------------+ + +This query cannot be validated because the order-by has an expression, +"t.ooa[2].c.cc.ccc". The Test Framework cannot evaluate the array reference +"ooa[2]". + +2) select id from `complex.json` order by gbyi limit 10; + ++------+ +| id | ++------+ +| 106 | +| 121 | +| 91 | +| 46 | +| 61 | +| 31 | +| 1 | +| 76 | +| 16 | +| 136 | ++------+ + +This query cannot be validated because the "gbyi" column is not projected so +the Test Framework cannot determine what the order should be. + + ## Authors [Zhiyong](https://github.com/zhiyongliu) @@ -146,3 +267,4 @@ Example: [Jacques](https://github.com/jacques-n) [Jason](https://github.com/jaltekruse) [Sudheesh](https://github.com/sudheeshkatkam) +[Robert](https://github.com/rhou1) diff --git a/framework/src/main/java/org/apache/drill/test/framework/TestVerifier.java b/framework/src/main/java/org/apache/drill/test/framework/TestVerifier.java index 49c4b635e..9e58068ca 100755 --- a/framework/src/main/java/org/apache/drill/test/framework/TestVerifier.java +++ b/framework/src/main/java/org/apache/drill/test/framework/TestVerifier.java @@ -30,6 +30,7 @@ import java.nio.file.Paths; import java.sql.Types; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; @@ -38,6 +39,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.Lists; import org.apache.log4j.Logger; @@ -348,16 +350,46 @@ private static int getMissingCount(Map map) { } private static int compareTo(ColumnList list1, ColumnList list2, - Map columnIndexAndOrder, int start) { + Map columnIndexAndOrder, int start) + throws VerificationException { + // convert map into a list + List columnIndexAndOrderList = new ArrayList(); + for (Map.Entry entry : columnIndexAndOrder.entrySet()) { + IndexAndOrder indexAndOrder = new IndexAndOrder(); + indexAndOrder.index = entry.getKey(); + indexAndOrder.order = entry.getValue(); + columnIndexAndOrderList.add (indexAndOrder); + } + return compareTo(list1, list2, columnIndexAndOrderList, start, null); + } + + /** + * Compare two rows, represented by list1 and list2. + * Compare the two rows in the order specified in the order by clause. If the + * two rows have have different values in the first order-by column, then it + * can be determined if they are in the right order without looking at the + * second order-by column. If the two rows have the same value in the first + * order-by column, then the second order-by column is checked. The second + * order-by column (and subsequent order-by columns) are handled in a recursive + * call to this procedure. + * Numerical values are handled separately from other values. + * JSON strings have special handling because they have to be parsed. A field + * within a JSON string can be used in an order-by clause, so the field needs + * to be extracted from the JSON string. + * This code currently handles integer and string values in JSON strings. + */ + private static int compareTo(ColumnList list1, ColumnList list2, + List columnIndexAndOrder, int start, + Map orderByColumns) throws VerificationException { if (start == columnIndexAndOrder.size()) { return 0; } - int idx = (Integer) columnIndexAndOrder.keySet().toArray()[start]; + int idx = columnIndexAndOrder.get(start).index; int result = -1; Object o1 = list1.getValues().get(idx); Object o2 = list2.getValues().get(idx); if (ColumnList.bothNull(o1, o2)) { - return compareTo(list1, list2, columnIndexAndOrder, start + 1); + return compareTo(list1, list2, columnIndexAndOrder, start + 1, orderByColumns); } else if (ColumnList.oneNull(o1, o2)) { return 0; // TODO handle NULLS FIRST and NULLS LAST cases } @@ -366,19 +398,85 @@ private static int compareTo(ColumnList list1, ColumnList list2, Number number2 = (Number) o2; double diff = number1.doubleValue() - number2.doubleValue(); if (diff == 0) { - return compareTo(list1, list2, columnIndexAndOrder, start + 1); + return compareTo(list1, list2, columnIndexAndOrder, start + 1, orderByColumns); } else { if (diff < 0) { result = -1; } else { result = 1; } - if (columnIndexAndOrder.get(idx).equalsIgnoreCase("desc")) { + if (columnIndexAndOrder.get(start).order.equalsIgnoreCase("desc")) { result *= -1; } return result; } } + if (o1 instanceof String) { + // check if JSON string + try { + String string1 = (String) o1; + String string2 = (String) o2; + if (orderByColumns != null) { + String columnName; + String fieldName; + String nextFieldName = ""; + columnName = (String) orderByColumns.keySet().toArray()[start]; + if (columnName.indexOf('.') >= 0) { + // there is a field in the JSON string + fieldName = columnName.substring(columnName.indexOf('.') + 1); + JsonNode idNode1 = Utils.getJsonValue (string1, fieldName); + JsonNode idNode2 = Utils.getJsonValue (string2, fieldName); + // if a field is missing, treat it as a null + if ( (idNode1.isMissingNode()) && (idNode2.isMissingNode()) ) { + return 0; + } else if (idNode2.isMissingNode()) { + return -1; + } else if (idNode1.isMissingNode()) { + return 1; + } + if (idNode1.isNumber()) { + Number number1 = (Number) idNode1.asInt(); + Number number2 = (Number) idNode2.asInt(); + double diff = number1.doubleValue() - number2.doubleValue(); + if (diff == 0) { + return compareTo(list1, list2, columnIndexAndOrder, start + 1, orderByColumns); + } else { + if (diff < 0) { + result = -1; + } else { + result = 1; + } + if (columnIndexAndOrder.get(start).order.equalsIgnoreCase("desc")) { + result *= -1; + } + return result; + } + } else if (idNode1.isTextual()) { + String string3 = (String) idNode1.asText(); + String string4 = (String) idNode2.asText(); + int stringCompare = string3.compareTo(string4); + if (stringCompare == 0) { + return compareTo(list1, list2, columnIndexAndOrder, start + 1, orderByColumns); + } else { + if (stringCompare < 0) { + result = -1; + } else { + result = 1; + } + if (columnIndexAndOrder.get(start).order.equalsIgnoreCase("desc")) { + result *= -1; + } + return result; + } + } else { + throw new VerificationException ("JSON string field " + fieldName + " is not a number: " + string1); + } + } + } + } catch (IOException e) { + // do nothing. continue with code below + } + } @SuppressWarnings("unchecked") Comparable comparable1 = (Comparable) list1.getValues().get( idx); @@ -386,19 +484,49 @@ private static int compareTo(ColumnList list1, ColumnList list2, Comparable comparable2 = (Comparable) list2.getValues().get( idx); result = comparable1.compareTo(comparable2); - if (columnIndexAndOrder.get(idx).equalsIgnoreCase("desc")) { + if (columnIndexAndOrder.get(start).order.equalsIgnoreCase("desc")) { result *= -1; } if (result == 0) { - return compareTo(list1, list2, columnIndexAndOrder, start + 1); + return compareTo(list1, list2, columnIndexAndOrder, start + 1, orderByColumns); } else { return result; } } + private static class IndexAndOrder { + private int index; + private String order; + } + /** * Verifies orders in the result set if a query involves order by in the final * output. + * There are some requirements for a query to be properly validated. + * 1) All columns/fields in the order-by clause must appear in the projection + * list + * 2) Expressions cannot be used in the order-by clause. Things like + * "order by column.field[2]". The [2] indicates an expression which is + * the third element in the field array. + * 3) Referencing a field within a json string in a column is more complicated. + * Most cases are supported. Some cases may not work. + * 4) If a query references more than one table, then use aliases for each + * column in the projection list, and reference these aliases in the + * order-by clause. Using aliases is a good practice in general when + * verifying an order-by clause. + * 5) The order-by clause cannot be followed by another SQL operation except + * for limit. See the code in getOrderByBlock to understand this comment + * better. If the order-by clause is followed by an offset or collate, + * for example, it might not work. For now, there are bugs related to + * offset, and collate is not supported, so this is probably not a major + * issue. + * + * This code assumes that the output of the query (result set) contains the + * columns in the order-by clause. If so, then these columns can be used to + * verify that the rows are presented in the order specified. If one or more + * order-by columns are not present in the result set, then it cannot be + * verified that the rows are in the correct order, and this part of the + * verification is skipped. * * @param filename * name of file of actual output @@ -414,13 +542,15 @@ public TestStatus verifyResultSetOrders(String filename, List columnLabels, Map orderByColumns) throws IOException, VerificationException, IllegalAccessException { loadFromFileToMap(filename, true); - Map columnIndexAndOrder = getColumnIndexAndOrder( - columnLabels, orderByColumns); + List columnIndexAndOrder = getColumnIndexAndOrderList( + columnLabels, orderByColumns, true); + // if one or more order-by columns is not present in the result set, + // then skip this part of the verification. if (columnIndexAndOrder == null) { LOG.debug("skipping order verification"); return TestStatus.PASS; } - if (!isOrdered(columnIndexAndOrder)) { + if (!isOrdered(columnIndexAndOrder, orderByColumns)) { LOG.info("\nOrder mismatch in actual result set."); return TestStatus.ORDER_MISMATCH; } @@ -429,24 +559,80 @@ public TestStatus verifyResultSetOrders(String filename, private Map getColumnIndexAndOrder( List columnLabels, Map orderByColumns) { - Map columnIndexAndOrder = new LinkedHashMap(); + List result = getColumnIndexAndOrderList(columnLabels, + orderByColumns, false); + if (result == null) { + return null; + } + // convert list to map + Map columnIndexAndOrderMap = new LinkedHashMap(); + for (IndexAndOrder index : result) { + columnIndexAndOrderMap.put(index.index, index.order); + } + return columnIndexAndOrderMap; + } + + /** + * Create a list that represents the order by columns. Each element in the + * list (IndexAndOrder structure) indicates the ordinal position of the + * column, and whether it is ordered in ascending or descending order. A + * column can appear more than once, especially if it contains a JSON string, + * and individual fields in the JSON string are being used in the order-by + * clause. + * checkForFields is set to true to enable the code to work with JSON strings. + * checkForFields is set to false to retain the original functionality to + * maintain backwards-compatibility. checkForFields is passed through to + * getIndicesOfOrderByColumns. + */ + private List getColumnIndexAndOrderList( + List columnLabels, Map orderByColumns, + boolean checkForFields) { + List columnIndexAndOrder = new ArrayList(); List indicesOfOrderByColumns = getIndicesOfOrderByColumns( - columnLabels, orderByColumns); + columnLabels, orderByColumns, checkForFields); if (indicesOfOrderByColumns == null) { return null; } for (int i = 0; i < indicesOfOrderByColumns.size(); i++) { - columnIndexAndOrder.put(indicesOfOrderByColumns.get(i), - orderByColumns.get(orderByColumns.keySet().toArray()[i])); + IndexAndOrder indexAndOrder = new IndexAndOrder(); + indexAndOrder.index = indicesOfOrderByColumns.get(i); + indexAndOrder.order = orderByColumns.get(orderByColumns.keySet().toArray()[i]); + columnIndexAndOrder.add(indexAndOrder); } return columnIndexAndOrder; } private List getIndicesOfOrderByColumns( List columnLabels, Map orderByColumns) { + return getIndicesOfOrderByColumns(columnLabels, orderByColumns, false); + } + + /** + * Create a list of integers that represents the order by columns. Each column + * is represented by its ordinal position. A column can appear more than once, + * especially if it contains a JSON string, and individual fields in the JSON + * string are being used in the order-by clause. + * Verify that each order by column is present in the result set (as indicated + * by columnLabels). + * checkForFields is set to true to enable the code to work with JSON strings. + * checkForFields is set to false to retain the original functionality to + * maintain backwards-compatibility + */ + private List getIndicesOfOrderByColumns( + List columnLabels, + Map orderByColumns, + boolean checkForFields) { List indices = new ArrayList(); for (Map.Entry entry : orderByColumns.entrySet()) { - int index = columnLabels.indexOf(entry.getKey()); + String entryKey = entry.getKey(); + if (checkForFields && (entryKey.indexOf('.') >= 0)) { + // column name has sub-field names. remove them. + entryKey = entryKey.substring(0, entryKey.indexOf('.')); + } + // verify that each order by column is present in the result set by + // checking columnLabels, which represents the columns in the result set. + // if an order by column is not in the result set, return null. + int index = columnLabels.indexOf(entryKey); if (index < 0) { return null; } @@ -456,13 +642,28 @@ private List getIndicesOfOrderByColumns( } private boolean isOrdered(Map columnIndexAndOrder) throws VerificationException { + // convert map to a list + List columnIndexAndOrderList = new ArrayList(); + for (Map.Entry entry : columnIndexAndOrder.entrySet()) { + IndexAndOrder indexAndOrder = new IndexAndOrder(); + indexAndOrder.index = entry.getKey(); + indexAndOrder.order = entry.getValue(); + columnIndexAndOrderList.add(indexAndOrder); + } + return isOrdered(columnIndexAndOrderList, null); + } + + private boolean isOrdered(List columnIndexAndOrder, + Map orderByColumns) + throws VerificationException { if (resultSet.size() <= 1) { return true; } ColumnList first = resultSet.get(0); for (int i = 1; i < resultSet.size(); i++) { ColumnList next = resultSet.get(i); - int compared = compareTo(first, next, columnIndexAndOrder, 0); + int compared = compareTo(first, next, columnIndexAndOrder, 0, + orderByColumns); if (compared <= 0) { first = next; continue; @@ -631,6 +832,13 @@ private static boolean matchAndCompareAll(String actual, String expected) { return true; } + /** + * Create a map containing the names of the columns in the order-by clause and + * whether they are being ordered in ascending or descending order. + * Remove the table name from the column name because the column name in the + * order-by clause will be compared with the column name in the resultSet + * returned by the Driver, and the Driver does not return table names. + */ public static Map getOrderByColumns(String statement, List columnLabels) { if (!isOrderByQuery(statement)) { @@ -650,10 +858,14 @@ public static Map getOrderByColumns(String statement, column = column.trim(); String[] columnOrder = column.split("\\s+"); String columnName = columnOrder[0].trim(); + String fieldName; if (columnName.indexOf('.') >= 0) { + // table name precedes column name. remove it columnName = columnName.substring(columnName.indexOf('.') + 1); } int ordinal = -1; + // if columnName is an ordinal position, then get the column name + // that it corresponds to. try { ordinal = Integer.parseInt(columnName); } catch (Exception e) { @@ -662,8 +874,11 @@ public static Map getOrderByColumns(String statement, columnName = columnLabels.get(ordinal - 1); } if (columnOrder.length == 2) { + // if ascending or descending has been specified, then + // store this in orderByColumns orderByColumns.put(columnName, columnOrder[1].trim()); } else { + // assume ascending orderByColumns.put(columnName, "asc"); } } diff --git a/framework/src/main/java/org/apache/drill/test/framework/Utils.java b/framework/src/main/java/org/apache/drill/test/framework/Utils.java index ade238c10..e8f3893cc 100755 --- a/framework/src/main/java/org/apache/drill/test/framework/Utils.java +++ b/framework/src/main/java/org/apache/drill/test/framework/Utils.java @@ -46,6 +46,7 @@ import java.util.regex.Pattern; import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; @@ -582,4 +583,25 @@ public static ResultSet execSQL(String sql, Connection connection) throws SQLExc throw e; } } + + /** + * Parses json string to get the node for a specified key + * + * @param jsonString + * json string to be parsed + * @param key + * key for the value to be determined + * @return JsonNode + * @throws IOException if jsonString is not valid + */ + public static JsonNode getJsonValue(String jsonString, String key) + throws IOException { + ObjectMapper oM = new ObjectMapper(); + JsonNode rootNode = oM.readTree(jsonString); + // key has format key1.key2.key3 + // change to format /key1/key2/key3 + String ptrExpr = "/" + key.replace(".","/"); + return rootNode.at(ptrExpr); + } + }