Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Snowflake column aliases that use SQL keywords #1632

Merged
merged 3 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,20 @@ pub trait Dialect: Debug + Any {
fn supports_set_stmt_without_operator(&self) -> bool {
false
}

/// Returns true if the specified keyword should be parsed as a select item alias.
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
/// to enable looking ahead if needed.
fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
}

/// Returns true if the specified keyword should be parsed as a table factor alias.
/// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
/// to enable looking ahead if needed.
fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw)
}
}

/// This represents the operators for which precedence must be defined
Expand Down
45 changes: 45 additions & 0 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,51 @@ impl Dialect for SnowflakeDialect {
fn supports_partiql(&self) -> bool {
true
}

fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
explicit
|| match kw {
// The following keywords can be considered an alias as long as
// they are not followed by other tokens that may change their meaning
// e.g. `SELECT * EXCEPT (col1) FROM tbl`
Keyword::EXCEPT
// e.g. `SELECT 1 LIMIT 5`
| Keyword::LIMIT
// e.g. `SELECT 1 OFFSET 5 ROWS`
| Keyword::OFFSET
// e.g. `INSERT INTO t SELECT 1 RETURNING *`
| Keyword::RETURNING if !matches!(parser.peek_token_ref().token, Token::Comma | Token::EOF) =>
{
false
}

// `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT`
// which would give it a different meanins, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias
Keyword::FETCH
if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) =>
{
false
}

// Reserved keywords by the Snowflake dialect, which seem to be less strictive
// than what is listed in `keywords::RESERVED_FOR_COLUMN_ALIAS`. The following
// keywords were tested with the this statement: `SELECT 1 <KW>`.
Keyword::FROM
| Keyword::GROUP
| Keyword::HAVING
| Keyword::INTERSECT
| Keyword::INTO
| Keyword::MINUS
| Keyword::ORDER
| Keyword::SELECT
| Keyword::UNION
| Keyword::WHERE
| Keyword::WITH => false,

// Any other word is considered an alias
_ => true,
}
}
}

fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {
Expand Down
131 changes: 75 additions & 56 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8837,38 +8837,76 @@ impl<'a> Parser<'a> {
Ok(IdentWithAlias { ident, alias })
}

/// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword)
/// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`,
/// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar`
/// Optionally parses an alias for a select list item
fn maybe_parse_select_item_alias(&mut self) -> Result<Option<Ident>, ParserError> {
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
parser.dialect.is_select_item_alias(explicit, kw, parser)
}
self.parse_optional_alias_inner(None, validator)
}

/// Optionally parses an alias for a table like in `... FROM generate_series(1, 10) AS t (col)`.
/// In this case, the alias is allowed to optionally name the columns in the table, in
/// addition to the table itself.
pub fn maybe_parse_table_alias(&mut self) -> Result<Option<TableAlias>, ParserError> {
fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
parser.dialect.is_table_factor_alias(explicit, kw, parser)
}
match self.parse_optional_alias_inner(None, validator)? {
Some(name) => {
let columns = self.parse_table_alias_column_defs()?;
Ok(Some(TableAlias { name, columns }))
}
None => Ok(None),
}
}

/// Wrapper for parse_optional_alias_inner, left for backwards-compatibility
/// but new flows should use the context-specific methods such as `maybe_parse_select_item_alias`
/// and `maybe_parse_table_alias`.
pub fn parse_optional_alias(
&mut self,
reserved_kwds: &[Keyword],
) -> Result<Option<Ident>, ParserError> {
fn validator(_explicit: bool, _kw: &Keyword, _parser: &mut Parser) -> bool {
false
}
self.parse_optional_alias_inner(Some(reserved_kwds), validator)
}

/// Parses an optional alias after a SQL element such as a select list item
/// or a table name.
///
/// This method accepts an optional list of reserved keywords or a function
/// to call to validate if a keyword should be parsed as an alias, to allow
/// callers to customize the parsing logic based on their context.
fn parse_optional_alias_inner<F>(
&mut self,
reserved_kwds: Option<&[Keyword]>,
validator: F,
) -> Result<Option<Ident>, ParserError>
where
F: Fn(bool, &Keyword, &mut Parser) -> bool,
{
let after_as = self.parse_keyword(Keyword::AS);

let next_token = self.next_token();
match next_token.token {
// Accept any identifier after `AS` (though many dialects have restrictions on
// keywords that may appear here). If there's no `AS`: don't parse keywords,
// which may start a construct allowed in this position, to be parsed as aliases.
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
// not an alias.)
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
// By default, if a word is located after the `AS` keyword we consider it an alias
// as long as it's not reserved.
Token::Word(w)
if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) =>
{
Ok(Some(w.into_ident(next_token.span)))
}
// MSSQL supports single-quoted strings as aliases for columns
// We accept them as table aliases too, although MSSQL does not.
//
// Note, that this conflicts with an obscure rule from the SQL
// standard, which we don't implement:
// https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s
// "[Obscure Rule] SQL allows you to break a long <character
// string literal> up into two or more smaller <character string
// literal>s, split by a <separator> that includes a newline
// character. When it sees such a <literal>, your DBMS will
// ignore the <separator> and treat the multiple strings as
// a single <literal>."
// This pattern allows for customizing the acceptance of words as aliases based on the caller's
// context, such as to what SQL element this word is a potential alias of (select item alias, table name
// alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords.
Token::Word(w) if validator(after_as, &w.keyword, self) => {
Ok(Some(w.into_ident(next_token.span)))
}
// For backwards-compatibility, we accept quoted strings as aliases regardless of the context.
Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))),
// Support for MySql dialect double-quoted string, `AS "HOUR"` for example
Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))),
_ => {
if after_as {
Expand All @@ -8880,23 +8918,6 @@ impl<'a> Parser<'a> {
}
}

/// Parse `AS identifier` when the AS is describing a table-valued object,
/// like in `... FROM generate_series(1, 10) AS t (col)`. In this case
/// the alias is allowed to optionally name the columns in the table, in
/// addition to the table itself.
pub fn parse_optional_table_alias(
&mut self,
reserved_kwds: &[Keyword],
) -> Result<Option<TableAlias>, ParserError> {
match self.parse_optional_alias(reserved_kwds)? {
Some(name) => {
let columns = self.parse_table_alias_column_defs()?;
Ok(Some(TableAlias { name, columns }))
}
None => Ok(None),
}
}

pub fn parse_optional_group_by(&mut self) -> Result<Option<GroupByExpr>, ParserError> {
if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) {
let expressions = if self.parse_keyword(Keyword::ALL) {
Expand Down Expand Up @@ -10898,7 +10919,7 @@ impl<'a> Parser<'a> {
let name = self.parse_object_name(false)?;
self.expect_token(&Token::LParen)?;
let args = self.parse_optional_args()?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Function {
lateral: true,
name,
Expand All @@ -10911,7 +10932,7 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::LParen)?;
let expr = self.parse_expr()?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::TableFunction { expr, alias })
} else if self.consume_token(&Token::LParen) {
// A left paren introduces either a derived table (i.e., a subquery)
Expand Down Expand Up @@ -10960,7 +10981,7 @@ impl<'a> Parser<'a> {
#[allow(clippy::if_same_then_else)]
if !table_and_joins.joins.is_empty() {
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::NestedJoin {
table_with_joins: Box::new(table_and_joins),
alias,
Expand All @@ -10973,7 +10994,7 @@ impl<'a> Parser<'a> {
// (B): `table_and_joins` (what we found inside the parentheses)
// is a nested join `(foo JOIN bar)`, not followed by other joins.
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::NestedJoin {
table_with_joins: Box::new(table_and_joins),
alias,
Expand All @@ -10987,9 +11008,7 @@ impl<'a> Parser<'a> {
// [AS alias])`) as well.
self.expect_token(&Token::RParen)?;

if let Some(outer_alias) =
self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?
{
if let Some(outer_alias) = self.maybe_parse_table_alias()? {
// Snowflake also allows specifying an alias *after* parens
// e.g. `FROM (mytable) AS alias`
match &mut table_and_joins.relation {
Expand Down Expand Up @@ -11042,7 +11061,7 @@ impl<'a> Parser<'a> {
// SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2)
// where there are no parentheses around the VALUES clause.
let values = SetExpr::Values(self.parse_values(false)?);
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Derived {
lateral: false,
subquery: Box::new(Query {
Expand All @@ -11068,7 +11087,7 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::RParen)?;

let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]);
let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) {
let alias = match self.maybe_parse_table_alias() {
Ok(Some(alias)) => Some(alias),
Ok(None) => None,
Err(e) => return Err(e),
Expand Down Expand Up @@ -11105,7 +11124,7 @@ impl<'a> Parser<'a> {
let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?;
self.expect_token(&Token::RParen)?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::JsonTable {
json_expr,
json_path,
Expand Down Expand Up @@ -11150,7 +11169,7 @@ impl<'a> Parser<'a> {
}
}

let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;

// MSSQL-specific table hints:
let mut with_hints = vec![];
Expand Down Expand Up @@ -11328,7 +11347,7 @@ impl<'a> Parser<'a> {
} else {
Vec::new()
};
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::OpenJsonTable {
json_expr,
json_path,
Expand Down Expand Up @@ -11427,7 +11446,7 @@ impl<'a> Parser<'a> {

self.expect_token(&Token::RParen)?;

let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;

Ok(TableFactor::MatchRecognize {
table: Box::new(table),
Expand Down Expand Up @@ -11671,7 +11690,7 @@ impl<'a> Parser<'a> {
) -> Result<TableFactor, ParserError> {
let subquery = self.parse_query()?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Derived {
lateral: match lateral {
Lateral => true,
Expand Down Expand Up @@ -11765,7 +11784,7 @@ impl<'a> Parser<'a> {
};

self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Pivot {
table: Box::new(table),
aggregate_functions,
Expand All @@ -11787,7 +11806,7 @@ impl<'a> Parser<'a> {
self.expect_keyword_is(Keyword::IN)?;
let columns = self.parse_parenthesized_column_list(Mandatory, false)?;
self.expect_token(&Token::RParen)?;
let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
let alias = self.maybe_parse_table_alias()?;
Ok(TableFactor::Unpivot {
table: Box::new(table),
value,
Expand Down Expand Up @@ -12613,7 +12632,7 @@ impl<'a> Parser<'a> {
})
}
expr => self
.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)
.maybe_parse_select_item_alias()
.map(|alias| match alias {
Some(alias) => SelectItem::ExprWithAlias { expr, alias },
None => SelectItem::UnnamedExpr(expr),
Expand Down
29 changes: 29 additions & 0 deletions tests/sqlparser_snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3022,3 +3022,32 @@ fn parse_ls_and_rm() {

snowflake().verified_stmt(r#"LIST @"STAGE_WITH_QUOTES""#);
}

#[test]
fn test_sql_keywords_as_select_item_aliases() {
// Some keywords that should be parsed as an alias
let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT"];
for kw in unreserved_kws {
snowflake()
.one_statement_parses_to(&format!("SELECT 1 {kw}"), &format!("SELECT 1 AS {kw}"));
}

// Some keywords that should not be parsed as an alias
let reserved_kws = vec![
"FROM",
"GROUP",
"HAVING",
"INTERSECT",
"INTO",
"ORDER",
"SELECT",
"UNION",
"WHERE",
"WITH",
];
for kw in reserved_kws {
assert!(snowflake()
.parse_sql_statements(&format!("SELECT 1 {kw}"))
.is_err());
}
}
Loading