diff --git a/yfinance/data.py b/yfinance/data.py index 5c9f2e847..4f7f061b5 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -4,6 +4,7 @@ import logging import requests as requests +from bs4 import BeautifulSoup import re import random import time @@ -50,13 +51,12 @@ def __init__(self, ticker: str, session=None): self.ticker = ticker self._session = session or requests - self._cookie, self._crumb = None, None - def _get_cookie(self, proxy=None, timeout=30): - if self._cookie is not None: - return self._cookie + if utils.cookie is not None: + return utils.cookie + + utils.get_yf_logger().debug(f"Fetching cookie ...") - # response = self.get('https://fc.yahoo.com') # To avoid infinite recursion, do NOT use self.get() response = self._session.get( url='https://fc.yahoo.com', @@ -66,39 +66,87 @@ def _get_cookie(self, proxy=None, timeout=30): if not response.cookies: raise Exception("Failed to obtain Yahoo auth cookie.") - self._cookie = list(response.cookies)[0] - return self._cookie + + utils.cookie = list(response.cookies)[0] + utils.get_yf_logger().debug(f"cookie = '{utils.cookie}'") + return utils.cookie def _get_crumb(self, proxy=None, timeout=30): - if self._crumb is not None: - return self._crumb - cookie = self._get_cookie() + if utils.crumb is not None: + return utils.crumb + utils.get_yf_logger().debug(f"Fetching crumb ...") + + cookie = self._get_cookie() crumb_response = self._session.get( url="https://query1.finance.yahoo.com/v1/test/getcrumb", headers=self.user_agent_headers, cookies={cookie.name: cookie.value}, proxies=proxy, timeout=timeout) - - self._crumb = crumb_response.text - return self._crumb - + utils.crumb = crumb_response.text + if utils.crumb is None or '' in utils.crumb: + raise Exception("Failed to fetch crumb") + + utils.get_yf_logger().debug(f"crumb = '{utils.crumb}'") + utils.crumb = utils.crumb + return utils.crumb + + def _get_crumb_botunit(self, proxy=None, timeout=30): + # Credit goes to @bot-unit #1729 + + if utils.crumb is not None: + return utils.crumb + + utils.get_yf_logger().debug(f"Fetching crumb ...") + + # ToDo: might have to force fetch crumb direct from `requests`, + # to avoid using cached crumb from `requests_cache` + + response = self._session.get('https://guce.yahoo.com/consent', headers=self.user_agent_headers) + soup = BeautifulSoup(response.content, 'html.parser') + csrfTokenInput = soup.find('input', attrs={'name': 'csrfToken'}) + csrfToken = csrfTokenInput['value'] + sessionIdInput = soup.find('input', attrs={'name': 'sessionId'}) + sessionId = sessionIdInput['value'] + originalDoneUrl = 'https://finance.yahoo.com/' + namespace = 'yahoo' + data = { + 'agree': ['agree', 'agree'], + 'consentUUID': 'default', + 'sessionId': sessionId, + 'csrfToken': csrfToken, + 'originalDoneUrl': originalDoneUrl, + 'namespace': namespace, + } + self._session.post(f'https://consent.yahoo.com/v2/collectConsent?sessionId={sessionId}', data=data, headers=self.user_agent_headers) + self._session.get(f'https://guce.yahoo.com/copyConsent?sessionId={sessionId}', headers=self.user_agent_headers) + r = self._session.get('https://query2.finance.yahoo.com/v1/test/getcrumb', headers=self.user_agent_headers) + utils.crumb = r.text + + if utils.crumb is None or '' in utils.crumb: + raise Exception("Failed to fetch crumb") + + utils.get_yf_logger().debug(f"crumb = '{utils.crumb}'") + utils.crumb = utils.crumb + return utils.crumb def get(self, url, user_agent_headers=None, params=None, cookies=None, proxy=None, timeout=30): + utils.get_yf_logger().debug(f'get(): {url}') proxy = self._get_proxy(proxy) # Add cookie & crumb - if cookies is None: - cookie = self._get_cookie() - cookies = {cookie.name: cookie.value} + # if cookies is None: + # cookie = self._get_cookie() + # cookies = {cookie.name: cookie.value} + # Update: don't need cookie if params is None: params = {} if 'crumb' not in params: - params['crumb'] = self._get_crumb() + # params['crumb'] = self._get_crumb() + params['crumb'] = self._get_crumb_botunit() response = self._session.get( - # response = requests.get( url=url, params=params, cookies=cookies, diff --git a/yfinance/scrapers/quote.py b/yfinance/scrapers/quote.py index c3bc0725a..6e5326fd6 100644 --- a/yfinance/scrapers/quote.py +++ b/yfinance/scrapers/quote.py @@ -568,7 +568,7 @@ def __init__(self, data: TickerData, proxy=None): def info(self) -> dict: if self._info is None: self._fetch(self.proxy) - self._fetch_complementary(self.proxy) + # self._fetch_complementary(self.proxy) # Failing, don't know why. Help! return self._info diff --git a/yfinance/utils.py b/yfinance/utils.py index 7bb8bc4f9..acc86cbf8 100644 --- a/yfinance/utils.py +++ b/yfinance/utils.py @@ -54,6 +54,11 @@ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} +# Quick hack to globally-cache cookie & crumb +cookie = None +crumb = None + + # From https://stackoverflow.com/a/59128615 def attributes(obj): disallowed_names = {