From 3f4f84daa4195aa4a8988fbfd34b42533b6f3dd3 Mon Sep 17 00:00:00 2001
From: Samuel Marks <807580+SamuelMarks@users.noreply.github.com>
Date: Wed, 22 Nov 2023 21:42:12 -0500
Subject: [PATCH] [cdd/docstring/utils/parse_utils.py] Add support for more of
 the adhoc syntax (and now fail earlier on parse)

---
 cdd/class_/parse.py                           |  2 +-
 cdd/docstring/utils/parse_utils.py            | 73 ++++++++++++++-----
 cdd/shared/defaults_utils.py                  |  2 -
 cdd/shared/docstring_parsers.py               |  5 +-
 .../test_parse_docstring_utils.py             | 31 ++++----
 cdd/tests/test_utils_for_tests.py             |  2 +-
 6 files changed, 75 insertions(+), 40 deletions(-)

diff --git a/cdd/class_/parse.py b/cdd/class_/parse.py
index d542d42f..6fd7cfa8 100644
--- a/cdd/class_/parse.py
+++ b/cdd/class_/parse.py
@@ -220,7 +220,7 @@ def class_(
             intermediate_repr=intermediate_repr,
             merge_inner_function=merge_inner_function,
         )
-
+    # pp(intermediate_repr)
     return intermediate_repr
 
 
diff --git a/cdd/docstring/utils/parse_utils.py b/cdd/docstring/utils/parse_utils.py
index 4e52dc28..6a9df148 100644
--- a/cdd/docstring/utils/parse_utils.py
+++ b/cdd/docstring/utils/parse_utils.py
@@ -3,11 +3,13 @@
 """
 
 import string
+from collections import Counter
 from functools import partial
 from itertools import filterfalse, takewhile
+from keyword import iskeyword
 from operator import contains, itemgetter
 
-from cdd.shared.pure_utils import count_iter_items, pp, sliding_window
+from cdd.shared.pure_utils import count_iter_items, sliding_window
 
 adhoc_type_to_type = {
     "bool": "bool",
@@ -68,7 +70,15 @@ def _union_literal_from_sentence(sentence):
             union[-1].append(ch)
         elif is_space:
             if union[-1]:
-                union[-1] = "".join(union[-1])
+                union[-1] = "".join(
+                    union[-1][:-1]
+                    if union[-1][-1] in frozenset((",", ";"))
+                    and (
+                        union[-1][0] in frozenset(string.digits + "'\"`")
+                        or union[-1][0].isidentifier()
+                    )
+                    else union[-1]
+                )
                 if union[-1] in frozenset(
                     ("or", "or,", "or;", "or:", "of", "of,", "of;", "of:")
                 ):
@@ -98,7 +108,6 @@ def _union_literal_from_sentence(sentence):
         union[-1] = "".join(
             union[-1][:-1] if union[-1][-1] in frozenset((".", ",")) else union[-1]
         )
-    # pp({"union": union})
     if len(union) > 1:
         candidate_type = next(
             map(
@@ -114,12 +123,31 @@ def _union_literal_from_sentence(sentence):
             return candidate_type
 
     union = sorted(
-        map(
-            lambda k: adhoc_type_to_type.get(k.lower(), k),
-            filterfalse(str.isspace, union),
+        frozenset(
+            map(
+                lambda k: adhoc_type_to_type.get(k.lower(), k),
+                filterfalse(str.isspace, union),
+            )
         )
     )
-    pp({"union": union})
+    # Sanity check, if the vars are not legit then exit now
+    # checks if each var is keyword or digit or quoted
+    if any(
+        filter(
+            lambda e: iskeyword(e)
+            or e.isdigit()
+            or (
+                # could take care and use a customer scanner to handle escaped quotes; but this hack for now
+                lambda counter: counter["'"] & 1 == 1
+                and counter["'"] > 0
+                or counter['"'] & 1 == 1
+                and counter['"'] > 0
+            )(Counter(e)),
+            union,
+        )
+    ):
+        return None
+
     literals = count_iter_items(
         takewhile(
             frozenset(string.digits + "'\"").__contains__,
@@ -139,7 +167,7 @@ def _union_literal_from_sentence(sentence):
         return None
 
 
-def parse_adhoc_doc_for_typ(doc):
+def parse_adhoc_doc_for_typ(doc, name):
     """
     Google's Keras and other frameworks have an adhoc syntax.
 
@@ -148,6 +176,9 @@ def parse_adhoc_doc_for_typ(doc):
     :param doc: Possibly ambiguous docstring for argument, that *might* hint as to the type
     :type doc: ```str```
 
+    :param name: Name of argument; useful for debugging and if the name hints as to the type
+    :type name: ```str```
+
     :return: The type (if determined) else `None`
     :rtype: ```Optional[str]```
     """
@@ -164,6 +195,8 @@ def parse_adhoc_doc_for_typ(doc):
             or ch == "."
             and len(doc) > (i + 1)
             and doc[i + 1] in word_chars
+            # Make "bar" start the next sentence:    `foo`.bar
+            and (i - 1 == 0 or doc[i - 1] != "`")
         ):
             words[-1].append(ch)
         elif ch in frozenset((".", ";", ",")) or ch.isspace():
@@ -173,12 +206,19 @@ def parse_adhoc_doc_for_typ(doc):
                 sentence_ends = len(words)
             words.append([])
     words[-1] = "".join(words[-1])
+
+    candidate_type = next(
+        map(
+            adhoc_type_to_type.__getitem__,
+            filter(partial(contains, adhoc_type_to_type), words),
+        ),
+        None,
+    )
+
     fst_sentence = "".join(words[:sentence_ends])
     sentence = None
 
-    if words[0] == "Whether":
-        return "bool"
-
+    type_in_fst_sentence = adhoc_type_to_type.get(next(filterfalse(str.isspace, words)))
     if " or " in fst_sentence or " of " in fst_sentence:
         sentence = fst_sentence
     else:
@@ -209,17 +249,12 @@ def parse_adhoc_doc_for_typ(doc):
                 wrap_type_with = candidate_collection + "[{}]"
             sentence = sentence[fst_tick : sentence.rfind("`")]
 
-        candidate_type = _union_literal_from_sentence(sentence)
+        new_candidate_type = _union_literal_from_sentence(sentence)
+        if new_candidate_type is not None:
+            candidate_type = new_candidate_type
         if candidate_type is not None:
             return wrap_type_with.format(candidate_type)
 
-    candidate_type = next(
-        map(
-            adhoc_type_to_type.__getitem__,
-            filter(partial(contains, adhoc_type_to_type), words),
-        ),
-        None,
-    )
     if candidate_type is not None:
         return candidate_type
     elif "/" in words[2]:
diff --git a/cdd/shared/defaults_utils.py b/cdd/shared/defaults_utils.py
index cbce3da3..8ec579fa 100644
--- a/cdd/shared/defaults_utils.py
+++ b/cdd/shared/defaults_utils.py
@@ -11,8 +11,6 @@
 from itertools import takewhile
 from operator import contains, eq
 
-from typing import *
-
 from cdd.shared.pure_utils import (
     PY_GTE_3_9,
     count_iter_items,
diff --git a/cdd/shared/docstring_parsers.py b/cdd/shared/docstring_parsers.py
index cb332523..c9dcaed6 100644
--- a/cdd/shared/docstring_parsers.py
+++ b/cdd/shared/docstring_parsers.py
@@ -7,6 +7,7 @@
  - [numpydoc docstring format](https://numpydoc.readthedocs.io/en/latest/format.html)
  - [Google's docstring format](https://google.github.io/styleguide/pyguide.html)
 """
+
 import ast
 import sys
 from ast import AST
@@ -16,6 +17,7 @@
 from itertools import chain, takewhile
 from operator import attrgetter, eq, le
 from typing import Dict, List, Tuple
+from typing import *
 
 from cdd.docstring.utils.emit_utils import interpolate_defaults
 from cdd.docstring.utils.parse_utils import parse_adhoc_doc_for_typ
@@ -542,7 +544,7 @@ def _set_name_and_type(param, infer_type, word_wrap, none_default_for_kwargs=Fal
                 else _param["doc"]
             ).rstrip()
 
-        typ = parse_adhoc_doc_for_typ(_param["doc"])
+        typ = parse_adhoc_doc_for_typ(_param["doc"], name)
         if typ is not None:
             try:
                 eval(typ, globals(), locals())
@@ -556,7 +558,6 @@ def _set_name_and_type(param, infer_type, word_wrap, none_default_for_kwargs=Fal
             and not _param["typ"].startswith("Optional[")
         ):
             _param["typ"] = "Optional[{typ}]".format(typ=_param["typ"])
-
     return name, _param
 
 
diff --git a/cdd/tests/test_docstring/test_parse_docstring_utils.py b/cdd/tests/test_docstring/test_parse_docstring_utils.py
index 4205b80b..31e3a812 100644
--- a/cdd/tests/test_docstring/test_parse_docstring_utils.py
+++ b/cdd/tests/test_docstring/test_parse_docstring_utils.py
@@ -32,7 +32,7 @@ def test_parse_adhoc_doc_for_typ(self) -> None:
         deque(
             map(
                 lambda output_input: self.assertEqual(
-                    output_input[0], parse_adhoc_doc_for_typ(output_input[1])
+                    output_input[0], parse_adhoc_doc_for_typ(output_input[1], name="")
                 ),
                 (
                     (
@@ -56,24 +56,25 @@ def test_parse_adhoc_doc_for_typ(self) -> None:
                         "int",
                         "Explicit `int64`-castable monotonic step value for this summary.",
                     ),
+                    (
+                        "bool",
+                        cdd.shared.docstring_parsers.parse_docstring(
+                            docstring_google_keras_tensorboard_return_str
+                        )["returns"]["return_type"]["typ"],
+                    ),
+                    (
+                        "Literal['auto', 'max', 'min']",
+                        "String. One of `{'auto', 'min', 'max'}`. In `'min'` mode,",
+                    ),
+                    (
+                        'Union[Literal["epoch"], bool, int]',
+                        '`"epoch"`, integer, or `False`.'
+                        'When set to `"epoch" the callback saves the checkpoint at the end of each epoch.',
+                    ),
                 ),
             ),
             maxlen=0,
         )
 
-        self.assertEqual(
-            cdd.shared.docstring_parsers.parse_docstring(
-                docstring_google_keras_tensorboard_return_str
-            )["returns"]["return_type"]["typ"],
-            "bool",
-        )
-
-        self.assertEqual(
-            parse_adhoc_doc_for_typ(
-                "String. One of `{'auto', 'min', 'max'}`. In `'min'` mode,"
-            ),
-            "Literal['auto', 'max', 'min']",
-        )
-
 
 unittest_main()
diff --git a/cdd/tests/test_utils_for_tests.py b/cdd/tests/test_utils_for_tests.py
index c9eef4f8..8169c0a1 100644
--- a/cdd/tests/test_utils_for_tests.py
+++ b/cdd/tests/test_utils_for_tests.py
@@ -34,7 +34,7 @@ def test_unittest_main(self) -> None:
             cdd.tests.utils_for_tests.unittest_main()
 
         # Python >=3.12 has:
-        # if self.result.testsRun == 0: sys.exit(_NO_TESTS_EXITCODE) where `_NO_TESTS_EXITCODE` is `5`
+        # if self.result.testsRun == 0: where `_NO_TESTS_EXITCODE` is `5`
         self.assertEqual(e.exception.code, 5) if PY_GTE_3_12 else self.assertIsInstance(
             e.exception.code, bool
         )