From 73650de2638404bfae09f699b2046636780dafd0 Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Tue, 24 Sep 2024 18:04:55 -0700 Subject: [PATCH] Add type annotations to `parserLex` and `parserYacc` modules. --- .../queries/expressions/parser/parser.py | 2 +- .../queries/expressions/parser/parserLex.py | 50 +++++------ .../queries/expressions/parser/parserYacc.py | 87 ++++++++++--------- 3 files changed, 69 insertions(+), 70 deletions(-) diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py index aa69e0f30a..a58cb5ff77 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py +++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py @@ -28,7 +28,7 @@ from __future__ import annotations from .exprTree import Node -from .parserYacc import ParserYacc # type: ignore +from .parserYacc import ParserYacc def parse_expression(expression: str) -> Node | None: diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py index b09303da7f..77fe302b8d 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py +++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py @@ -25,35 +25,30 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# type: ignore - -"""Module which defines PLY lexer for user expressions parsed by pre-flight. -""" +"""Module which defines PLY lexer for user expressions parsed by pre-flight.""" __all__ = ["ParserLex", "ParserLexError"] -# ------------------------------- -# Imports of standard modules -- -# ------------------------------- import re +from typing import Any, Protocol -# ----------------------------- -# Imports for other modules -- -# ----------------------------- from .ply import lex -# ---------------------------------- -# Local non-exported definitions -- -# ---------------------------------- - _RE_RANGE = r"(?P-?\d+)\s*\.\.\s*(?P-?\d+)(\s*:\s*(?P[1-9]\d*))?" """Regular expression to match range literal in the form NUM..NUM[:NUM], this must match t_RANGE_LITERAL docstring. """ -# ------------------------ -# Exported definitions -- -# ------------------------ + +class LexToken(Protocol): + """Protocol for LexToken defined in ``ply.lex``.""" + + value: Any + type: str + lexer: Any + lexdata: str + lexpos: int + lineno: int class ParserLexError(Exception): @@ -72,7 +67,7 @@ class ParserLexError(Exception): Current line number in the expression. """ - def __init__(self, expression, remain, pos, lineno): + def __init__(self, expression: str, remain: str, pos: int, lineno: int): Exception.__init__(self, f"Unexpected character at position {pos}") self.expression = expression self.remain = remain @@ -84,7 +79,7 @@ class ParserLex: """Class which defines PLY lexer.""" @classmethod - def make_lexer(cls, reflags=0, **kwargs): + def make_lexer(cls, reflags: int = 0, **kwargs: Any) -> Any: """Return lexer. Parameters @@ -169,19 +164,19 @@ def make_lexer(cls, reflags=0, **kwargs): t_ignore = " \t" # Define a rule so we can track line numbers - def t_newline(self, t): + def t_newline(self, t: LexToken) -> None: r"""\n+""" t.lexer.lineno += len(t.value) # quoted string prefixed with 'T' - def t_TIME_LITERAL(self, t): + def t_TIME_LITERAL(self, t: LexToken) -> LexToken: """T'.*?'""" # strip quotes t.value = t.value[2:-1] return t # quoted string - def t_STRING_LITERAL(self, t): + def t_STRING_LITERAL(self, t: LexToken) -> LexToken: """'.*?'""" # strip quotes t.value = t.value[1:-1] @@ -189,8 +184,9 @@ def t_STRING_LITERAL(self, t): # range literal in format N..M[:S], spaces allowed, see _RE_RANGE @lex.TOKEN(_RE_RANGE) - def t_RANGE_LITERAL(self, t): + def t_RANGE_LITERAL(self, t: LexToken) -> LexToken: match = re.match(_RE_RANGE, t.value) + assert match is not None, "Guaranteed by tokenization" start = int(match.group("start")) stop = int(match.group("stop")) stride = match.group("stride") @@ -200,7 +196,7 @@ def t_RANGE_LITERAL(self, t): return t # numbers are used as strings by parser, do not convert - def t_NUMERIC_LITERAL(self, t): + def t_NUMERIC_LITERAL(self, t: LexToken) -> LexToken: r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc. | \.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10 @@ -208,13 +204,13 @@ def t_NUMERIC_LITERAL(self, t): return t # qualified identifiers have one or two dots - def t_QUALIFIED_IDENTIFIER(self, t): + def t_QUALIFIED_IDENTIFIER(self, t: LexToken) -> LexToken: r"""[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*){1,2}""" t.type = "QUALIFIED_IDENTIFIER" return t # we only support ASCII in identifier names - def t_SIMPLE_IDENTIFIER(self, t): + def t_SIMPLE_IDENTIFIER(self, t: LexToken) -> LexToken: """[a-zA-Z_][a-zA-Z0-9_]*""" # Check for reserved words and make sure they are upper case reserved = self.reserved.get(t.value.upper()) @@ -225,7 +221,7 @@ def t_SIMPLE_IDENTIFIER(self, t): t.type = "SIMPLE_IDENTIFIER" return t - def t_error(self, t): + def t_error(self, t: LexToken) -> None: """Error handling rule""" lexer = t.lexer raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno) diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py index 9e0dacf9c9..3328497ad6 100644 --- a/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py +++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py @@ -25,22 +25,15 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# type: ignore - -"""Syntax definition for user expression parser. -""" +"""Syntax definition for user expression parser.""" __all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"] -# ------------------------------- -# Imports of standard modules -- -# ------------------------------- import re import warnings +from collections.abc import Mapping +from typing import Any, Protocol -# ----------------------------- -# Imports for other modules -- -# ----------------------------- import astropy.time # As of astropy 4.2, the erfa interface is shipped independently and @@ -54,6 +47,7 @@ BinaryOp, Identifier, IsIn, + Node, NumericLiteral, Parens, RangeLiteral, @@ -63,12 +57,21 @@ UnaryOp, function_call, ) -from .parserLex import ParserLex +from .parserLex import LexToken, ParserLex from .ply import yacc -# ---------------------------------- -# Local non-exported definitions -- -# ---------------------------------- + +class YaccProduction(Protocol): + """Protocol for YaccProduction defined in ``ply.yacc``.""" + + lexer: Any + + def __getitem__(self, n: int) -> Any: ... + def __setitem__(self, n: int, v: Any) -> None: ... + def __len__(self) -> int: ... + def lineno(self, n: int) -> int: ... + def lexpos(self, n: int) -> int: ... + # The purpose of this regex is to guess time format if it is not explicitly # provided in the string itself @@ -91,7 +94,7 @@ ) -def _parseTimeString(time_str): +def _parseTimeString(time_str: str) -> astropy.time.Time: """Try to convert time string into astropy.Time. Parameters @@ -212,7 +215,7 @@ class ParseError(ParserYaccError): Parsing position in current line, 0-based. """ - def __init__(self, expression, token, pos, lineno): + def __init__(self, expression: str, token: str, pos: int, lineno: int): self.expression = expression self.token = token self.pos = pos @@ -222,7 +225,7 @@ def __init__(self, expression, token, pos, lineno): msg = msg.format(token, lineno, self.posInLine + 1) ParserYaccError.__init__(self, msg) - def _posInLine(self): + def _posInLine(self) -> int: """Return position in current line""" lines = self.expression.split("\n") pos = self.pos @@ -235,7 +238,7 @@ def _posInLine(self): class ParserEOFError(ParserYaccError): """Exception raised for EOF-during-parser.""" - def __init__(self): + def __init__(self) -> None: Exception.__init__(self, "End of input reached while expecting further input") @@ -257,14 +260,14 @@ class ParserYacc: Optional keyword arguments that are passed to `yacc.yacc` constructor. """ - def __init__(self, idMap=None, **kwargs): + def __init__(self, idMap: Mapping[str, Node] | None = None, **kwargs: Any): kw = dict(write_tables=0, debug=False) kw.update(kwargs) self.parser = yacc.yacc(module=self, **kw) self._idMap = idMap or {} - def parse(self, input, lexer=None, debug=False, tracking=False): + def parse(self, input: str, lexer: Any = None, debug: bool = False, tracking: bool = False) -> Node: """Parse input expression ad return parsed tree object. This is a trivial wrapper for yacc.LRParser.parse method which @@ -302,17 +305,17 @@ def parse(self, input, lexer=None, debug=False, tracking=False): ) # this is the starting rule - def p_input(self, p): + def p_input(self, p: YaccProduction) -> None: """input : expr | empty """ p[0] = p[1] - def p_empty(self, p): + def p_empty(self, p: YaccProduction) -> None: """empty :""" p[0] = None - def p_expr(self, p): + def p_expr(self, p: YaccProduction) -> None: """expr : expr OR expr | expr AND expr | NOT expr @@ -325,7 +328,7 @@ def p_expr(self, p): else: p[0] = p[1] - def p_bool_primary(self, p): + def p_bool_primary(self, p: YaccProduction) -> None: """bool_primary : bool_primary EQ predicate | bool_primary NE predicate | bool_primary LT predicate @@ -340,7 +343,7 @@ def p_bool_primary(self, p): else: p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) - def p_predicate(self, p): + def p_predicate(self, p: YaccProduction) -> None: """predicate : bit_expr IN LPAREN literal_or_id_list RPAREN | bit_expr NOT IN LPAREN literal_or_id_list RPAREN | bit_expr @@ -352,7 +355,7 @@ def p_predicate(self, p): else: p[0] = p[1] - def p_identifier(self, p): + def p_identifier(self, p: YaccProduction) -> None: """identifier : SIMPLE_IDENTIFIER | QUALIFIED_IDENTIFIER """ @@ -361,7 +364,7 @@ def p_identifier(self, p): node = Identifier(p[1]) p[0] = node - def p_literal_or_id_list(self, p): + def p_literal_or_id_list(self, p: YaccProduction) -> None: """literal_or_id_list : literal_or_id_list COMMA literal | literal_or_id_list COMMA identifier | literal @@ -372,7 +375,7 @@ def p_literal_or_id_list(self, p): else: p[0] = p[1] + [p[3]] - def p_bit_expr(self, p): + def p_bit_expr(self, p: YaccProduction) -> None: """bit_expr : bit_expr ADD bit_expr | bit_expr SUB bit_expr | bit_expr MUL bit_expr @@ -385,49 +388,49 @@ def p_bit_expr(self, p): else: p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3]) - def p_simple_expr_lit(self, p): + def p_simple_expr_lit(self, p: YaccProduction) -> None: """simple_expr : literal""" p[0] = p[1] - def p_simple_expr_id(self, p): + def p_simple_expr_id(self, p: YaccProduction) -> None: """simple_expr : identifier""" p[0] = p[1] - def p_simple_expr_function_call(self, p): + def p_simple_expr_function_call(self, p: YaccProduction) -> None: """simple_expr : function_call""" p[0] = p[1] - def p_simple_expr_unary(self, p): + def p_simple_expr_unary(self, p: YaccProduction) -> None: """simple_expr : ADD simple_expr %prec UPLUS | SUB simple_expr %prec UMINUS """ p[0] = UnaryOp(op=p[1], operand=p[2]) - def p_simple_expr_paren(self, p): + def p_simple_expr_paren(self, p: YaccProduction) -> None: """simple_expr : LPAREN expr RPAREN""" p[0] = Parens(p[2]) - def p_simple_expr_tuple(self, p): + def p_simple_expr_tuple(self, p: YaccProduction) -> None: """simple_expr : LPAREN expr COMMA expr RPAREN""" # For now we only support tuples with two items, # these are used for time ranges. p[0] = TupleNode((p[2], p[4])) - def p_literal_num(self, p): + def p_literal_num(self, p: YaccProduction) -> None: """literal : NUMERIC_LITERAL""" p[0] = NumericLiteral(p[1]) - def p_literal_num_signed(self, p): + def p_literal_num_signed(self, p: YaccProduction) -> None: """literal : ADD NUMERIC_LITERAL %prec UPLUS | SUB NUMERIC_LITERAL %prec UMINUS """ p[0] = NumericLiteral(p[1] + p[2]) - def p_literal_str(self, p): + def p_literal_str(self, p: YaccProduction) -> None: """literal : STRING_LITERAL""" p[0] = StringLiteral(p[1]) - def p_literal_time(self, p): + def p_literal_time(self, p: YaccProduction) -> None: """literal : TIME_LITERAL""" try: value = _parseTimeString(p[1]) @@ -435,17 +438,17 @@ def p_literal_time(self, p): raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) from e p[0] = TimeLiteral(value) - def p_literal_range(self, p): + def p_literal_range(self, p: YaccProduction) -> None: """literal : RANGE_LITERAL""" # RANGE_LITERAL value is tuple of three numbers start, stop, stride = p[1] p[0] = RangeLiteral(start, stop, stride) - def p_function_call(self, p): + def p_function_call(self, p: YaccProduction) -> None: """function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN""" p[0] = function_call(p[1], p[3]) - def p_expr_list(self, p): + def p_expr_list(self, p: YaccProduction) -> None: """expr_list : expr_list COMMA expr | expr | empty @@ -461,7 +464,7 @@ def p_expr_list(self, p): # ---------- end of all grammar rules ---------- # Error rule for syntax errors - def p_error(self, p): + def p_error(self, p: LexToken | None) -> None: if p is None: raise ParserEOFError() else: