From 3f4f84daa4195aa4a8988fbfd34b42533b6f3dd3 Mon Sep 17 00:00:00 2001 From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com> Date: Wed, 22 Nov 2023 21:42:12 -0500 Subject: [PATCH] [cdd/docstring/utils/parse_utils.py] Add support for more of the adhoc syntax (and now fail earlier on parse) --- cdd/class_/parse.py | 2 +- cdd/docstring/utils/parse_utils.py | 73 ++++++++++++++----- cdd/shared/defaults_utils.py | 2 - cdd/shared/docstring_parsers.py | 5 +- .../test_parse_docstring_utils.py | 31 ++++---- cdd/tests/test_utils_for_tests.py | 2 +- 6 files changed, 75 insertions(+), 40 deletions(-) diff --git a/cdd/class_/parse.py b/cdd/class_/parse.py index d542d42f..6fd7cfa8 100644 --- a/cdd/class_/parse.py +++ b/cdd/class_/parse.py @@ -220,7 +220,7 @@ def class_( intermediate_repr=intermediate_repr, merge_inner_function=merge_inner_function, ) - + # pp(intermediate_repr) return intermediate_repr diff --git a/cdd/docstring/utils/parse_utils.py b/cdd/docstring/utils/parse_utils.py index 4e52dc28..6a9df148 100644 --- a/cdd/docstring/utils/parse_utils.py +++ b/cdd/docstring/utils/parse_utils.py @@ -3,11 +3,13 @@ """ import string +from collections import Counter from functools import partial from itertools import filterfalse, takewhile +from keyword import iskeyword from operator import contains, itemgetter -from cdd.shared.pure_utils import count_iter_items, pp, sliding_window +from cdd.shared.pure_utils import count_iter_items, sliding_window adhoc_type_to_type = { "bool": "bool", @@ -68,7 +70,15 @@ def _union_literal_from_sentence(sentence): union[-1].append(ch) elif is_space: if union[-1]: - union[-1] = "".join(union[-1]) + union[-1] = "".join( + union[-1][:-1] + if union[-1][-1] in frozenset((",", ";")) + and ( + union[-1][0] in frozenset(string.digits + "'\"`") + or union[-1][0].isidentifier() + ) + else union[-1] + ) if union[-1] in frozenset( ("or", "or,", "or;", "or:", "of", "of,", "of;", "of:") ): @@ -98,7 +108,6 @@ def _union_literal_from_sentence(sentence): union[-1] = "".join( union[-1][:-1] if union[-1][-1] in frozenset((".", ",")) else union[-1] ) - # pp({"union": union}) if len(union) > 1: candidate_type = next( map( @@ -114,12 +123,31 @@ def _union_literal_from_sentence(sentence): return candidate_type union = sorted( - map( - lambda k: adhoc_type_to_type.get(k.lower(), k), - filterfalse(str.isspace, union), + frozenset( + map( + lambda k: adhoc_type_to_type.get(k.lower(), k), + filterfalse(str.isspace, union), + ) ) ) - pp({"union": union}) + # Sanity check, if the vars are not legit then exit now + # checks if each var is keyword or digit or quoted + if any( + filter( + lambda e: iskeyword(e) + or e.isdigit() + or ( + # could take care and use a customer scanner to handle escaped quotes; but this hack for now + lambda counter: counter["'"] & 1 == 1 + and counter["'"] > 0 + or counter['"'] & 1 == 1 + and counter['"'] > 0 + )(Counter(e)), + union, + ) + ): + return None + literals = count_iter_items( takewhile( frozenset(string.digits + "'\"").__contains__, @@ -139,7 +167,7 @@ def _union_literal_from_sentence(sentence): return None -def parse_adhoc_doc_for_typ(doc): +def parse_adhoc_doc_for_typ(doc, name): """ Google's Keras and other frameworks have an adhoc syntax. @@ -148,6 +176,9 @@ def parse_adhoc_doc_for_typ(doc): :param doc: Possibly ambiguous docstring for argument, that *might* hint as to the type :type doc: ```str``` + :param name: Name of argument; useful for debugging and if the name hints as to the type + :type name: ```str``` + :return: The type (if determined) else `None` :rtype: ```Optional[str]``` """ @@ -164,6 +195,8 @@ def parse_adhoc_doc_for_typ(doc): or ch == "." and len(doc) > (i + 1) and doc[i + 1] in word_chars + # Make "bar" start the next sentence: `foo`.bar + and (i - 1 == 0 or doc[i - 1] != "`") ): words[-1].append(ch) elif ch in frozenset((".", ";", ",")) or ch.isspace(): @@ -173,12 +206,19 @@ def parse_adhoc_doc_for_typ(doc): sentence_ends = len(words) words.append([]) words[-1] = "".join(words[-1]) + + candidate_type = next( + map( + adhoc_type_to_type.__getitem__, + filter(partial(contains, adhoc_type_to_type), words), + ), + None, + ) + fst_sentence = "".join(words[:sentence_ends]) sentence = None - if words[0] == "Whether": - return "bool" - + type_in_fst_sentence = adhoc_type_to_type.get(next(filterfalse(str.isspace, words))) if " or " in fst_sentence or " of " in fst_sentence: sentence = fst_sentence else: @@ -209,17 +249,12 @@ def parse_adhoc_doc_for_typ(doc): wrap_type_with = candidate_collection + "[{}]" sentence = sentence[fst_tick : sentence.rfind("`")] - candidate_type = _union_literal_from_sentence(sentence) + new_candidate_type = _union_literal_from_sentence(sentence) + if new_candidate_type is not None: + candidate_type = new_candidate_type if candidate_type is not None: return wrap_type_with.format(candidate_type) - candidate_type = next( - map( - adhoc_type_to_type.__getitem__, - filter(partial(contains, adhoc_type_to_type), words), - ), - None, - ) if candidate_type is not None: return candidate_type elif "/" in words[2]: diff --git a/cdd/shared/defaults_utils.py b/cdd/shared/defaults_utils.py index cbce3da3..8ec579fa 100644 --- a/cdd/shared/defaults_utils.py +++ b/cdd/shared/defaults_utils.py @@ -11,8 +11,6 @@ from itertools import takewhile from operator import contains, eq -from typing import * - from cdd.shared.pure_utils import ( PY_GTE_3_9, count_iter_items, diff --git a/cdd/shared/docstring_parsers.py b/cdd/shared/docstring_parsers.py index cb332523..c9dcaed6 100644 --- a/cdd/shared/docstring_parsers.py +++ b/cdd/shared/docstring_parsers.py @@ -7,6 +7,7 @@ - [numpydoc docstring format](https://numpydoc.readthedocs.io/en/latest/format.html) - [Google's docstring format](https://google.github.io/styleguide/pyguide.html) """ + import ast import sys from ast import AST @@ -16,6 +17,7 @@ from itertools import chain, takewhile from operator import attrgetter, eq, le from typing import Dict, List, Tuple +from typing import * from cdd.docstring.utils.emit_utils import interpolate_defaults from cdd.docstring.utils.parse_utils import parse_adhoc_doc_for_typ @@ -542,7 +544,7 @@ def _set_name_and_type(param, infer_type, word_wrap, none_default_for_kwargs=Fal else _param["doc"] ).rstrip() - typ = parse_adhoc_doc_for_typ(_param["doc"]) + typ = parse_adhoc_doc_for_typ(_param["doc"], name) if typ is not None: try: eval(typ, globals(), locals()) @@ -556,7 +558,6 @@ def _set_name_and_type(param, infer_type, word_wrap, none_default_for_kwargs=Fal and not _param["typ"].startswith("Optional[") ): _param["typ"] = "Optional[{typ}]".format(typ=_param["typ"]) - return name, _param diff --git a/cdd/tests/test_docstring/test_parse_docstring_utils.py b/cdd/tests/test_docstring/test_parse_docstring_utils.py index 4205b80b..31e3a812 100644 --- a/cdd/tests/test_docstring/test_parse_docstring_utils.py +++ b/cdd/tests/test_docstring/test_parse_docstring_utils.py @@ -32,7 +32,7 @@ def test_parse_adhoc_doc_for_typ(self) -> None: deque( map( lambda output_input: self.assertEqual( - output_input[0], parse_adhoc_doc_for_typ(output_input[1]) + output_input[0], parse_adhoc_doc_for_typ(output_input[1], name="") ), ( ( @@ -56,24 +56,25 @@ def test_parse_adhoc_doc_for_typ(self) -> None: "int", "Explicit `int64`-castable monotonic step value for this summary.", ), + ( + "bool", + cdd.shared.docstring_parsers.parse_docstring( + docstring_google_keras_tensorboard_return_str + )["returns"]["return_type"]["typ"], + ), + ( + "Literal['auto', 'max', 'min']", + "String. One of `{'auto', 'min', 'max'}`. In `'min'` mode,", + ), + ( + 'Union[Literal["epoch"], bool, int]', + '`"epoch"`, integer, or `False`.' + 'When set to `"epoch" the callback saves the checkpoint at the end of each epoch.', + ), ), ), maxlen=0, ) - self.assertEqual( - cdd.shared.docstring_parsers.parse_docstring( - docstring_google_keras_tensorboard_return_str - )["returns"]["return_type"]["typ"], - "bool", - ) - - self.assertEqual( - parse_adhoc_doc_for_typ( - "String. One of `{'auto', 'min', 'max'}`. In `'min'` mode," - ), - "Literal['auto', 'max', 'min']", - ) - unittest_main() diff --git a/cdd/tests/test_utils_for_tests.py b/cdd/tests/test_utils_for_tests.py index c9eef4f8..8169c0a1 100644 --- a/cdd/tests/test_utils_for_tests.py +++ b/cdd/tests/test_utils_for_tests.py @@ -34,7 +34,7 @@ def test_unittest_main(self) -> None: cdd.tests.utils_for_tests.unittest_main() # Python >=3.12 has: - # if self.result.testsRun == 0: sys.exit(_NO_TESTS_EXITCODE) where `_NO_TESTS_EXITCODE` is `5` + # if self.result.testsRun == 0: where `_NO_TESTS_EXITCODE` is `5` self.assertEqual(e.exception.code, 5) if PY_GTE_3_12 else self.assertIsInstance( e.exception.code, bool )