diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py
index aa69e0f30a..a58cb5ff77 100644
--- a/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py
+++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parser.py
@@ -28,7 +28,7 @@
from __future__ import annotations
from .exprTree import Node
-from .parserYacc import ParserYacc # type: ignore
+from .parserYacc import ParserYacc
def parse_expression(expression: str) -> Node | None:
diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py
index b09303da7f..77fe302b8d 100644
--- a/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py
+++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parserLex.py
@@ -25,35 +25,30 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
-# type: ignore
-
-"""Module which defines PLY lexer for user expressions parsed by pre-flight.
-"""
+"""Module which defines PLY lexer for user expressions parsed by pre-flight."""
__all__ = ["ParserLex", "ParserLexError"]
-# -------------------------------
-# Imports of standard modules --
-# -------------------------------
import re
+from typing import Any, Protocol
-# -----------------------------
-# Imports for other modules --
-# -----------------------------
from .ply import lex
-# ----------------------------------
-# Local non-exported definitions --
-# ----------------------------------
-
_RE_RANGE = r"(?P-?\d+)\s*\.\.\s*(?P-?\d+)(\s*:\s*(?P[1-9]\d*))?"
"""Regular expression to match range literal in the form NUM..NUM[:NUM],
this must match t_RANGE_LITERAL docstring.
"""
-# ------------------------
-# Exported definitions --
-# ------------------------
+
+class LexToken(Protocol):
+ """Protocol for LexToken defined in ``ply.lex``."""
+
+ value: Any
+ type: str
+ lexer: Any
+ lexdata: str
+ lexpos: int
+ lineno: int
class ParserLexError(Exception):
@@ -72,7 +67,7 @@ class ParserLexError(Exception):
Current line number in the expression.
"""
- def __init__(self, expression, remain, pos, lineno):
+ def __init__(self, expression: str, remain: str, pos: int, lineno: int):
Exception.__init__(self, f"Unexpected character at position {pos}")
self.expression = expression
self.remain = remain
@@ -84,7 +79,7 @@ class ParserLex:
"""Class which defines PLY lexer."""
@classmethod
- def make_lexer(cls, reflags=0, **kwargs):
+ def make_lexer(cls, reflags: int = 0, **kwargs: Any) -> Any:
"""Return lexer.
Parameters
@@ -169,19 +164,19 @@ def make_lexer(cls, reflags=0, **kwargs):
t_ignore = " \t"
# Define a rule so we can track line numbers
- def t_newline(self, t):
+ def t_newline(self, t: LexToken) -> None:
r"""\n+"""
t.lexer.lineno += len(t.value)
# quoted string prefixed with 'T'
- def t_TIME_LITERAL(self, t):
+ def t_TIME_LITERAL(self, t: LexToken) -> LexToken:
"""T'.*?'"""
# strip quotes
t.value = t.value[2:-1]
return t
# quoted string
- def t_STRING_LITERAL(self, t):
+ def t_STRING_LITERAL(self, t: LexToken) -> LexToken:
"""'.*?'"""
# strip quotes
t.value = t.value[1:-1]
@@ -189,8 +184,9 @@ def t_STRING_LITERAL(self, t):
# range literal in format N..M[:S], spaces allowed, see _RE_RANGE
@lex.TOKEN(_RE_RANGE)
- def t_RANGE_LITERAL(self, t):
+ def t_RANGE_LITERAL(self, t: LexToken) -> LexToken:
match = re.match(_RE_RANGE, t.value)
+ assert match is not None, "Guaranteed by tokenization"
start = int(match.group("start"))
stop = int(match.group("stop"))
stride = match.group("stride")
@@ -200,7 +196,7 @@ def t_RANGE_LITERAL(self, t):
return t
# numbers are used as strings by parser, do not convert
- def t_NUMERIC_LITERAL(self, t):
+ def t_NUMERIC_LITERAL(self, t: LexToken) -> LexToken:
r"""\d+(\.\d*)?(e[-+]?\d+)? # 1, 1., 1.1, 1e10, 1.1e-10, etc.
|
\.\d+(e[-+]?\d+)? # .1, .1e10, .1e+10
@@ -208,13 +204,13 @@ def t_NUMERIC_LITERAL(self, t):
return t
# qualified identifiers have one or two dots
- def t_QUALIFIED_IDENTIFIER(self, t):
+ def t_QUALIFIED_IDENTIFIER(self, t: LexToken) -> LexToken:
r"""[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*){1,2}"""
t.type = "QUALIFIED_IDENTIFIER"
return t
# we only support ASCII in identifier names
- def t_SIMPLE_IDENTIFIER(self, t):
+ def t_SIMPLE_IDENTIFIER(self, t: LexToken) -> LexToken:
"""[a-zA-Z_][a-zA-Z0-9_]*"""
# Check for reserved words and make sure they are upper case
reserved = self.reserved.get(t.value.upper())
@@ -225,7 +221,7 @@ def t_SIMPLE_IDENTIFIER(self, t):
t.type = "SIMPLE_IDENTIFIER"
return t
- def t_error(self, t):
+ def t_error(self, t: LexToken) -> None:
"""Error handling rule"""
lexer = t.lexer
raise ParserLexError(lexer.lexdata, t.value, lexer.lexpos, lexer.lineno)
diff --git a/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py b/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py
index 9e0dacf9c9..3328497ad6 100644
--- a/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py
+++ b/python/lsst/daf/butler/registry/queries/expressions/parser/parserYacc.py
@@ -25,22 +25,15 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
-# type: ignore
-
-"""Syntax definition for user expression parser.
-"""
+"""Syntax definition for user expression parser."""
__all__ = ["ParserYacc", "ParserYaccError", "ParseError", "ParserEOFError"]
-# -------------------------------
-# Imports of standard modules --
-# -------------------------------
import re
import warnings
+from collections.abc import Mapping
+from typing import Any, Protocol
-# -----------------------------
-# Imports for other modules --
-# -----------------------------
import astropy.time
# As of astropy 4.2, the erfa interface is shipped independently and
@@ -54,6 +47,7 @@
BinaryOp,
Identifier,
IsIn,
+ Node,
NumericLiteral,
Parens,
RangeLiteral,
@@ -63,12 +57,21 @@
UnaryOp,
function_call,
)
-from .parserLex import ParserLex
+from .parserLex import LexToken, ParserLex
from .ply import yacc
-# ----------------------------------
-# Local non-exported definitions --
-# ----------------------------------
+
+class YaccProduction(Protocol):
+ """Protocol for YaccProduction defined in ``ply.yacc``."""
+
+ lexer: Any
+
+ def __getitem__(self, n: int) -> Any: ...
+ def __setitem__(self, n: int, v: Any) -> None: ...
+ def __len__(self) -> int: ...
+ def lineno(self, n: int) -> int: ...
+ def lexpos(self, n: int) -> int: ...
+
# The purpose of this regex is to guess time format if it is not explicitly
# provided in the string itself
@@ -91,7 +94,7 @@
)
-def _parseTimeString(time_str):
+def _parseTimeString(time_str: str) -> astropy.time.Time:
"""Try to convert time string into astropy.Time.
Parameters
@@ -212,7 +215,7 @@ class ParseError(ParserYaccError):
Parsing position in current line, 0-based.
"""
- def __init__(self, expression, token, pos, lineno):
+ def __init__(self, expression: str, token: str, pos: int, lineno: int):
self.expression = expression
self.token = token
self.pos = pos
@@ -222,7 +225,7 @@ def __init__(self, expression, token, pos, lineno):
msg = msg.format(token, lineno, self.posInLine + 1)
ParserYaccError.__init__(self, msg)
- def _posInLine(self):
+ def _posInLine(self) -> int:
"""Return position in current line"""
lines = self.expression.split("\n")
pos = self.pos
@@ -235,7 +238,7 @@ def _posInLine(self):
class ParserEOFError(ParserYaccError):
"""Exception raised for EOF-during-parser."""
- def __init__(self):
+ def __init__(self) -> None:
Exception.__init__(self, "End of input reached while expecting further input")
@@ -257,14 +260,14 @@ class ParserYacc:
Optional keyword arguments that are passed to `yacc.yacc` constructor.
"""
- def __init__(self, idMap=None, **kwargs):
+ def __init__(self, idMap: Mapping[str, Node] | None = None, **kwargs: Any):
kw = dict(write_tables=0, debug=False)
kw.update(kwargs)
self.parser = yacc.yacc(module=self, **kw)
self._idMap = idMap or {}
- def parse(self, input, lexer=None, debug=False, tracking=False):
+ def parse(self, input: str, lexer: Any = None, debug: bool = False, tracking: bool = False) -> Node:
"""Parse input expression ad return parsed tree object.
This is a trivial wrapper for yacc.LRParser.parse method which
@@ -302,17 +305,17 @@ def parse(self, input, lexer=None, debug=False, tracking=False):
)
# this is the starting rule
- def p_input(self, p):
+ def p_input(self, p: YaccProduction) -> None:
"""input : expr
| empty
"""
p[0] = p[1]
- def p_empty(self, p):
+ def p_empty(self, p: YaccProduction) -> None:
"""empty :"""
p[0] = None
- def p_expr(self, p):
+ def p_expr(self, p: YaccProduction) -> None:
"""expr : expr OR expr
| expr AND expr
| NOT expr
@@ -325,7 +328,7 @@ def p_expr(self, p):
else:
p[0] = p[1]
- def p_bool_primary(self, p):
+ def p_bool_primary(self, p: YaccProduction) -> None:
"""bool_primary : bool_primary EQ predicate
| bool_primary NE predicate
| bool_primary LT predicate
@@ -340,7 +343,7 @@ def p_bool_primary(self, p):
else:
p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
- def p_predicate(self, p):
+ def p_predicate(self, p: YaccProduction) -> None:
"""predicate : bit_expr IN LPAREN literal_or_id_list RPAREN
| bit_expr NOT IN LPAREN literal_or_id_list RPAREN
| bit_expr
@@ -352,7 +355,7 @@ def p_predicate(self, p):
else:
p[0] = p[1]
- def p_identifier(self, p):
+ def p_identifier(self, p: YaccProduction) -> None:
"""identifier : SIMPLE_IDENTIFIER
| QUALIFIED_IDENTIFIER
"""
@@ -361,7 +364,7 @@ def p_identifier(self, p):
node = Identifier(p[1])
p[0] = node
- def p_literal_or_id_list(self, p):
+ def p_literal_or_id_list(self, p: YaccProduction) -> None:
"""literal_or_id_list : literal_or_id_list COMMA literal
| literal_or_id_list COMMA identifier
| literal
@@ -372,7 +375,7 @@ def p_literal_or_id_list(self, p):
else:
p[0] = p[1] + [p[3]]
- def p_bit_expr(self, p):
+ def p_bit_expr(self, p: YaccProduction) -> None:
"""bit_expr : bit_expr ADD bit_expr
| bit_expr SUB bit_expr
| bit_expr MUL bit_expr
@@ -385,49 +388,49 @@ def p_bit_expr(self, p):
else:
p[0] = BinaryOp(lhs=p[1], op=p[2], rhs=p[3])
- def p_simple_expr_lit(self, p):
+ def p_simple_expr_lit(self, p: YaccProduction) -> None:
"""simple_expr : literal"""
p[0] = p[1]
- def p_simple_expr_id(self, p):
+ def p_simple_expr_id(self, p: YaccProduction) -> None:
"""simple_expr : identifier"""
p[0] = p[1]
- def p_simple_expr_function_call(self, p):
+ def p_simple_expr_function_call(self, p: YaccProduction) -> None:
"""simple_expr : function_call"""
p[0] = p[1]
- def p_simple_expr_unary(self, p):
+ def p_simple_expr_unary(self, p: YaccProduction) -> None:
"""simple_expr : ADD simple_expr %prec UPLUS
| SUB simple_expr %prec UMINUS
"""
p[0] = UnaryOp(op=p[1], operand=p[2])
- def p_simple_expr_paren(self, p):
+ def p_simple_expr_paren(self, p: YaccProduction) -> None:
"""simple_expr : LPAREN expr RPAREN"""
p[0] = Parens(p[2])
- def p_simple_expr_tuple(self, p):
+ def p_simple_expr_tuple(self, p: YaccProduction) -> None:
"""simple_expr : LPAREN expr COMMA expr RPAREN"""
# For now we only support tuples with two items,
# these are used for time ranges.
p[0] = TupleNode((p[2], p[4]))
- def p_literal_num(self, p):
+ def p_literal_num(self, p: YaccProduction) -> None:
"""literal : NUMERIC_LITERAL"""
p[0] = NumericLiteral(p[1])
- def p_literal_num_signed(self, p):
+ def p_literal_num_signed(self, p: YaccProduction) -> None:
"""literal : ADD NUMERIC_LITERAL %prec UPLUS
| SUB NUMERIC_LITERAL %prec UMINUS
"""
p[0] = NumericLiteral(p[1] + p[2])
- def p_literal_str(self, p):
+ def p_literal_str(self, p: YaccProduction) -> None:
"""literal : STRING_LITERAL"""
p[0] = StringLiteral(p[1])
- def p_literal_time(self, p):
+ def p_literal_time(self, p: YaccProduction) -> None:
"""literal : TIME_LITERAL"""
try:
value = _parseTimeString(p[1])
@@ -435,17 +438,17 @@ def p_literal_time(self, p):
raise ParseError(p.lexer.lexdata, p[1], p.lexpos(1), p.lineno(1)) from e
p[0] = TimeLiteral(value)
- def p_literal_range(self, p):
+ def p_literal_range(self, p: YaccProduction) -> None:
"""literal : RANGE_LITERAL"""
# RANGE_LITERAL value is tuple of three numbers
start, stop, stride = p[1]
p[0] = RangeLiteral(start, stop, stride)
- def p_function_call(self, p):
+ def p_function_call(self, p: YaccProduction) -> None:
"""function_call : SIMPLE_IDENTIFIER LPAREN expr_list RPAREN"""
p[0] = function_call(p[1], p[3])
- def p_expr_list(self, p):
+ def p_expr_list(self, p: YaccProduction) -> None:
"""expr_list : expr_list COMMA expr
| expr
| empty
@@ -461,7 +464,7 @@ def p_expr_list(self, p):
# ---------- end of all grammar rules ----------
# Error rule for syntax errors
- def p_error(self, p):
+ def p_error(self, p: LexToken | None) -> None:
if p is None:
raise ParserEOFError()
else: