Add better error handling when unable to find crumb

dpguthrie · jho · May 1, 2023 · Oct 30, 2023 · Jul 12, 2023 · Jul 15, 2023
commit 331c1341bc9037f62ba68ceb34115d183684149b
diff --git a/yahooquery/utils/__init__.py b/yahooquery/utils/__init__.py
@@ -1,14 +1,21 @@
+# stdlib
 import datetime
 import json
 import random
 import re
+from urllib3.exceptions import MaxRetryError
 
+# third party
 import pandas as pd
 from requests import Response, Session
 from requests.adapters import HTTPAdapter
+from requests.exceptions import ConnectionError, RetryError
 from requests.packages.urllib3.util.retry import Retry
 from requests_futures.sessions import FuturesSession
 
+# first party
+from yahooquery.login import YahooSelenium, _has_selenium
+
 
 DEFAULT_TIMEOUT = 5
 
@@ -168,21 +175,37 @@ def _init_session(session=None, **kwargs):
 def setup_session_with_cookies_and_crumb(session: Session):
     headers = {**random.choice(HEADERS), **addl_headers}
     session.headers = headers
-    response = session.get('https://finance.yahoo.com', hooks={'response': get_crumb})
-    if isinstance(session, FuturesSession):
-        response = response.result()
-    return session, response.crumb
+    try:
+        response = session.get('https://finance.yahoo.com')
+    except Exception:
+        return session, None
+    else:
+        if isinstance(session, FuturesSession):
+            response = response.result()
+        crumb = _get_crumb(response.text, session)
+        return session, crumb
 
 
-def get_crumb(r: Response, *args, **kwargs):
-    r.crumb = None
+def _get_crumb(page_text, session):
+    crumb = None
     path = re.compile(r'window\.YAHOO\.context = ({.*?});', re.DOTALL)
-    match = re.search(path, r.text)
+    match = re.search(path, page_text)
     if match:
-        js_dict = json.loads(match.group(1))
-        r.crumb = js_dict.get('crumb', None)
-
-    return r
+        dct = json.loads(match.group(1))
+        crumb = dct.get('crumb', None)
+        if crumb is not None:
+            return crumb
+
+    try:
+        response = session.get('https://query2.finance.yahoo.com/v1/test/getcrumb')
+        if isinstance(session, FuturesSession):
+            response = response.result()
+        crumb = response.text
+    except (ConnectionError, RetryError) as e:
+        # Cookies most likely not set in previous request
+        pass
+
+    return crumb
 
 def _flatten_list(ls):
     return [item for sublist in ls for item in sublist]