Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support standard UNION/INTERSECT syntax #248

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ $(SRCPARSER)/bison_parser.o: $(SRCPARSER)/bison_parser.cpp
$(CXX) $(LIB_CFLAGS) -c -o $@ $< -Wno-unused-but-set-variable

%.o: %.cpp $(PARSER_CPP) $(LIB_H)
$(CXX) $(LIB_CFLAGS) -c -o $@ $<
$(CXX) $(LIB_CFLAGS) $(CXXFLAGS) -c -o $@ $<
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't use additonal flags in our base repo builds. Unless there's a reason to have them here, I'd ask you to remove them.

Copy link
Author

@dinodork dinodork Sep 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TL;DR: CFLAGS/CXXFLAGS is a Makefile convention, for example for a program

https://en.wikipedia.org/wiki/CFLAGS
https://www.gnu.org/software/make/manual/make.html#index-CFLAGS

#include <iostream>

int main() {
#ifdef FOO
  std::cout << "Foo\n";
#endif
}

and, using make's default rule:

$ CXXFLAGS=-DFOO make -B cflagtest && ./cflagtest 
g++ -DFOO    cflagtest.cpp   -o cflagtest
Foo

It's not that common to have hand-written Makefiles that call other hand-written Makefiles - you normally let a build system such as CMake or meson do that for you - but when you do, you need to make to sure you follow this convention and pass along CFLAGS/CXXFLAGS. You can of course choose your own names for them if you want, but that would be confusing IMHO.


$(SRCPARSER)/bison_parser.cpp: $(SRCPARSER)/bison_parser.y
$(GMAKE) -C $(SRCPARSER)/ bison_parser.cpp
Expand Down Expand Up @@ -140,7 +140,7 @@ test: $(TEST_BUILD)

$(TEST_BUILD): $(TEST_ALL) $(LIB_BUILD)
@mkdir -p $(BIN)/
$(CXX) $(TEST_CFLAGS) $(TEST_CPP) -o $(TEST_BUILD) -lsqlparser -lstdc++
$(CXX) $(CXXFLAGS) $(TEST_CFLAGS) $(TEST_CPP) -o $(TEST_BUILD) -lsqlparser -lstdc++
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(see above)


test_example:
$(GMAKE) -C example/
Expand Down
3,418 changes: 1,721 additions & 1,697 deletions src/parser/bison_parser.cpp

Large diffs are not rendered by default.

401 changes: 200 additions & 201 deletions src/parser/bison_parser.h

Large diffs are not rendered by default.

172 changes: 109 additions & 63 deletions src/parser/bison_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@
// %output "bison_parser.cpp"
// %defines "bison_parser.h"

%expect 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this do?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://www.gnu.org/software/bison/manual/html_node/Expect-Decl.html

In other words, bails out with an error if there's a shift/reduce conflict.


// Tell bison to create a reentrant parser
%define api.pure full

Expand Down Expand Up @@ -232,7 +234,8 @@
%type <exec_stmt> execute_statement
%type <transaction_stmt> transaction_statement
%type <prep_stmt> prepare_statement
%type <select_stmt> select_statement select_with_paren select_no_paren select_clause select_within_set_operation select_within_set_operation_no_parentheses
%type <select_stmt> query_expression query_expression_body query_expression_parens query_term
query_primary select_statement subquery
%type <import_stmt> import_statement
%type <export_stmt> export_statement
%type <create_stmt> create_statement
Expand Down Expand Up @@ -290,7 +293,7 @@
%type <expr_vec> expr_list select_list opt_extended_literal_list extended_literal_list hint_list opt_hints opt_partition
%type <table_vec> table_ref_commalist
%type <order_vec> opt_order order_list
%type <with_description_vec> opt_with_clause with_clause with_description_list
%type <with_description_vec> with_clause with_description_list
%type <update_vec> update_clause_commalist
%type <table_element_vec> table_elem_commalist
%type <locking_clause_vec> opt_locking_clause_list opt_locking_clause
Expand All @@ -316,6 +319,7 @@
/* Unary Operators */
%right UMINUS
%left '[' ']'
%left SUBQUERY_AS_EXPR
%left '(' ')'
%left '.'
%left JOIN
Expand Down Expand Up @@ -478,9 +482,7 @@ file_type : IDENTIFIER {
free($1);
};

file_path : STRING {
$$ = $1;
};
file_path : STRING { $$ = $1; };

opt_import_export_options : WITH '(' import_export_options ')' { $$ = $3; }
| '(' import_export_options ')' { $$ = $2; }
Expand Down Expand Up @@ -530,7 +532,7 @@ export_statement : COPY table_name TO file_path opt_import_export_options {
}
delete $5;
}
| COPY select_with_paren TO file_path opt_import_export_options {
| COPY subquery TO file_path opt_import_export_options {
$$ = new ExportStatement($5->format);
$$->filePath = $4;
$$->select = $2;
Expand Down Expand Up @@ -763,7 +765,7 @@ insert_statement : INSERT INTO table_name opt_column_list VALUES '(' extended_li
$$->columns = $4;
$$->values = $7;
}
| INSERT INTO table_name opt_column_list select_no_paren {
| INSERT INTO table_name opt_column_list query_term {
$$ = new InsertStatement(kInsertSelect);
$$->schema = $3.schema;
$$->tableName = $3.name;
Expand Down Expand Up @@ -804,69 +806,109 @@ update_clause : IDENTIFIER '=' expr {
/******************************
* Select Statement
******************************/
select_statement : query_expression | query_expression_parens;

select_statement : opt_with_clause select_with_paren {
$$ = $2;
$$->withDescriptions = $1;
}
| opt_with_clause select_no_paren {
$$ = $2;
$$->withDescriptions = $1;
}
| opt_with_clause select_with_paren set_operator select_within_set_operation opt_order opt_limit {
$$ = $2;
if ($$->setOperations == nullptr) {
$$->setOperations = new std::vector<SetOperation*>();
}
$$->setOperations->push_back($3);
$$->setOperations->back()->nestedSelectStatement = $4;
$$->setOperations->back()->resultOrder = $5;
$$->setOperations->back()->resultLimit = $6;
$$->withDescriptions = $1;
};
query_expression : query_expression_body opt_order opt_limit opt_locking_clause {
if ($1->setOperations == nullptr) {
$1->order = $2;

select_within_set_operation : select_with_paren | select_within_set_operation_no_parentheses;
// Limit could have been set by TOP.
if ($3 != nullptr) {
delete $1->limit;
$1->limit = $3;
}

if ($4 != nullptr) {
$1->lockings = $4;
}
} else {
$1->setOperations->back()->resultOrder = $2;
$1->setOperations->back()->resultLimit = $3;
}

select_within_set_operation_no_parentheses : select_clause { $$ = $1; }
| select_clause set_operator select_within_set_operation {
$$ = $1;
if ($$->setOperations == nullptr) {
$$->setOperations = new std::vector<SetOperation*>();
}
| with_clause query_expression_body opt_order opt_limit opt_locking_clause {
$2->withDescriptions = $1;
if ($2->setOperations == nullptr) {
$2->order = $3;

// Limit could have been set by TOP.
if ($4 != nullptr) {
delete $2->limit;
$2->limit = $4;
}

if ($5 != nullptr) {
$2->lockings = $5;
}
} else {
$2->setOperations->back()->resultOrder = $3;
$2->setOperations->back()->resultLimit = $4;
}
$$->setOperations->push_back($2);
$$->setOperations->back()->nestedSelectStatement = $3;
};

select_with_paren : '(' select_no_paren ')' { $$ = $2; }
| '(' select_with_paren ')' { $$ = $2; };
$$ = $2;
}

select_no_paren : select_clause opt_order opt_limit opt_locking_clause {
/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

knit-picky: We either use C-style comments with an asterisk * on each line or C++-style single line comments.

Copy link
Author

@dinodork dinodork Sep 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*nit-pick ;)

Done.

* The SQL standard defines this rule as left-recursive, however, the parser
* tree structure is right-recursive. To overcome this impedance mismatch, we
* have to build the parse tree top-down, by running down the tree to the
* deepest node and adding the set operation there.
*/
query_expression_body : query_term | query_expression_body set_operator query_term {
$$ = $1;
$$->order = $2;
auto* setOperations = &$1->setOperations;
while (*setOperations != nullptr) {
setOperations = &(*setOperations)->back()->nestedSelectStatement->setOperations;
}
$2->nestedSelectStatement = $3;
*setOperations = new std::vector<SetOperation*>({$2});

// Limit could have been set by TOP.
if ($3) {
delete $$->limit;
$$->limit = $3;
$$ = $1;
}
| query_expression_parens set_operator query_term {
$$ = $1;
auto* setOperations = &$1->setOperations;
while (*setOperations != nullptr) {
setOperations = &(*setOperations)->back()->nestedSelectStatement->setOperations;
}
$2->nestedSelectStatement = $3;
*setOperations = new std::vector<SetOperation*>({$2});

if ($4) {
$$->lockings = $4;
$$ = $1;
}
| query_expression_body set_operator query_expression_parens {
$$ = $1;
auto* setOperations = &$1->setOperations;
while (*setOperations != nullptr) {
setOperations = &(*setOperations)->back()->nestedSelectStatement->setOperations;
}
$2->nestedSelectStatement = $3;
*setOperations = new std::vector<SetOperation*>({$2});

$$ = $1;
}
| select_clause set_operator select_within_set_operation opt_order opt_limit opt_locking_clause {
| query_expression_parens set_operator query_expression_parens {
$$ = $1;
if ($$->setOperations == nullptr) {
$$->setOperations = new std::vector<SetOperation*>();
auto* setOperations = &$1->setOperations;
while (*setOperations != nullptr) {
setOperations = &(*setOperations)->back()->nestedSelectStatement->setOperations;
}
$$->setOperations->push_back($2);
$$->setOperations->back()->nestedSelectStatement = $3;
$$->setOperations->back()->resultOrder = $4;
$$->setOperations->back()->resultLimit = $5;
$$->lockings = $6;
$2->nestedSelectStatement = $3;
*setOperations = new std::vector<SetOperation*>({$2});

$$ = $1;
};

set_operator : set_type opt_all {
query_expression_parens : '(' query_expression_parens ')' { $$ = $2; }
| '(' query_expression ')' { $$ = $2; };

query_term : query_primary;

subquery : query_expression_parens %prec SUBQUERY_AS_EXPR

set_operator : set_type opt_all {
$$ = $1;
$$->isAll = $2;
};
Expand All @@ -887,7 +929,7 @@ set_type : UNION {
opt_all : ALL { $$ = true; }
| /* empty */ { $$ = false; };

select_clause : SELECT opt_top opt_distinct select_list opt_from_clause opt_where opt_group {
query_primary : SELECT opt_top opt_distinct select_list opt_from_clause opt_where opt_group {
$$ = new SelectStatement();
$$->limit = $2;
$$->selectDistinct = $3;
Expand Down Expand Up @@ -1003,8 +1045,8 @@ expr : operand | between_expr | logic_expr | exists_expr | in_expr;

operand : '(' expr ')' { $$ = $2; }
| array_index | scalar_expr | unary_expr | binary_expr | case_expr | function_expr | extract_expr | cast_expr |
array_expr | '(' select_no_paren ')' {
$$ = Expr::makeSelect($2);
array_expr | subquery {
$$ = Expr::makeSelect($1);
};

scalar_expr : column_name | literal;
Expand All @@ -1031,8 +1073,8 @@ logic_expr : expr AND expr { $$ = Expr::makeOpBinary($1, kOpAnd, $3); }

in_expr : operand IN '(' expr_list ')' { $$ = Expr::makeInOperator($1, $4); }
| operand NOT IN '(' expr_list ')' { $$ = Expr::makeOpUnary(kOpNot, Expr::makeInOperator($1, $5)); }
| operand IN '(' select_no_paren ')' { $$ = Expr::makeInOperator($1, $4); }
| operand NOT IN '(' select_no_paren ')' { $$ = Expr::makeOpUnary(kOpNot, Expr::makeInOperator($1, $5)); };
| operand IN '(' query_primary ')' { $$ = Expr::makeInOperator($1, $4); }
| operand NOT IN '(' query_primary ')' { $$ = Expr::makeOpUnary(kOpNot, Expr::makeInOperator($1, $5)); };

// CASE grammar based on: flex & bison by John Levine
// https://www.safaribooksonline.com/library/view/flex-bison/9780596805418/ch04.html#id352665
Expand All @@ -1044,8 +1086,8 @@ case_expr : CASE expr case_list END { $$ = Expr::makeCase($2, $3, nullptr); }
case_list : WHEN expr THEN expr { $$ = Expr::makeCaseList(Expr::makeCaseListElement($2, $4)); }
| case_list WHEN expr THEN expr { $$ = Expr::caseListAppend($1, Expr::makeCaseListElement($3, $5)); };

exists_expr : EXISTS '(' select_no_paren ')' { $$ = Expr::makeExists($3); }
| NOT EXISTS '(' select_no_paren ')' { $$ = Expr::makeOpUnary(kOpNot, Expr::makeExists($4)); };
exists_expr : EXISTS '(' query_primary ')' { $$ = Expr::makeExists($3); }
| NOT EXISTS '(' query_primary ')' { $$ = Expr::makeOpUnary(kOpNot, Expr::makeExists($4)); };

comp_expr : operand '=' operand { $$ = Expr::makeOpBinary($1, kOpEquals, $3); }
| operand EQUALS operand { $$ = Expr::makeOpBinary($1, kOpEquals, $3); }
Expand Down Expand Up @@ -1298,8 +1340,6 @@ opt_row_lock_policy : SKIP LOCKED { $$ = RowLockWaitPolicy::SkipLocked; }
* With Descriptions
******************************/

opt_with_clause : with_clause | /* empty */ { $$ = nullptr; };

with_clause : WITH with_description_list { $$ = $2; };

with_description_list : with_description {
Expand All @@ -1311,7 +1351,13 @@ with_description_list : with_description {
$$ = $1;
};

with_description : IDENTIFIER AS select_with_paren {
with_description : IDENTIFIER AS subquery {
if ($3->withDescriptions != nullptr) {
free($1);
delete $3;
yyerror(&yyloc, result, scanner, "Nested CTE is not allowed.");
YYERROR;
}
$$ = new WithDescription();
$$->alias = $1;
$$->select = $3;
Expand Down
Loading