From 48545a1972c86df9d4153570c9b12a9ad4f51529 Mon Sep 17 00:00:00 2001 From: Value Raider Date: Mon, 7 Aug 2023 20:52:45 +0100 Subject: [PATCH] Experimental code to add cookie & crumb to requests --- yfinance/data.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/yfinance/data.py b/yfinance/data.py index fdcf2cf41..5c9f2e847 100644 --- a/yfinance/data.py +++ b/yfinance/data.py @@ -39,6 +39,9 @@ def wrapped(*args, **kwargs): class TickerData: """ Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations + + Credit for code for cookie & crumb goes to StackOverflow: + https://stackoverflow.com/questions/76065035/yahoo-finance-v7-api-now-requiring-cookies-python """ user_agent_headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'} @@ -47,11 +50,58 @@ def __init__(self, ticker: str, session=None): self.ticker = ticker self._session = session or requests - def get(self, url, user_agent_headers=None, params=None, proxy=None, timeout=30): + self._cookie, self._crumb = None, None + + def _get_cookie(self, proxy=None, timeout=30): + if self._cookie is not None: + return self._cookie + + # response = self.get('https://fc.yahoo.com') + # To avoid infinite recursion, do NOT use self.get() + response = self._session.get( + url='https://fc.yahoo.com', + headers=self.user_agent_headers, + proxies=proxy, + timeout=timeout) + + if not response.cookies: + raise Exception("Failed to obtain Yahoo auth cookie.") + self._cookie = list(response.cookies)[0] + return self._cookie + + def _get_crumb(self, proxy=None, timeout=30): + if self._crumb is not None: + return self._crumb + cookie = self._get_cookie() + + crumb_response = self._session.get( + url="https://query1.finance.yahoo.com/v1/test/getcrumb", + headers=self.user_agent_headers, + cookies={cookie.name: cookie.value}, + proxies=proxy, + timeout=timeout) + + self._crumb = crumb_response.text + return self._crumb + + + def get(self, url, user_agent_headers=None, params=None, cookies=None, proxy=None, timeout=30): proxy = self._get_proxy(proxy) + + # Add cookie & crumb + if cookies is None: + cookie = self._get_cookie() + cookies = {cookie.name: cookie.value} + if params is None: + params = {} + if 'crumb' not in params: + params['crumb'] = self._get_crumb() + response = self._session.get( + # response = requests.get( url=url, params=params, + cookies=cookies, proxies=proxy, timeout=timeout, headers=user_agent_headers or self.user_agent_headers)