From 0b5a17948b95e585a2c87b43b2232af22c38b86d Mon Sep 17 00:00:00 2001
From: Marcus Read <marcusaread@gmail.com>
Date: Tue, 22 Nov 2022 23:53:58 +0000
Subject: [PATCH 1/3] Fix `Ticker.history` bugs

Fixes:
- #127 BUG: inaccurate evaluation of 'day' corresponding with close
price.
- #128 BUG: inaccurate intraday price indexing when exchanges
observe DST.

Also, improves performance by reducing DataFrame manipulations.
---
 yahooquery/ticker.py         |   4 +-
 yahooquery/utils/__init__.py | 126 ++++++++++++++++++++++-------------
 2 files changed, 80 insertions(+), 50 deletions(-)

diff --git a/yahooquery/ticker.py b/yahooquery/ticker.py
index 44c959c..8427b0b 100644
--- a/yahooquery/ticker.py
+++ b/yahooquery/ticker.py
@@ -1279,7 +1279,6 @@ def history(
             df = self._historical_data_to_dataframe(data, params, adj_timezone)
         if adj_ohlc and "adjclose" in df:
             df = self._adjust_ohlc(df)
-        df = df[~df.index.duplicated(keep='first')]
         return df
 
     def _history_1m(self, adj_timezone=True, adj_ohlc=False):
@@ -1305,7 +1304,8 @@ def _historical_data_to_dataframe(self, data, params, adj_timezone):
         d = {}
         for symbol in self._symbols:
             if "timestamp" in data[symbol]:
-                d[symbol] = _history_dataframe(data, symbol, params, adj_timezone)
+                daily = params["interval"][-1] == "d"
+                d[symbol] = _history_dataframe(data[symbol], daily, adj_timezone)
             else:
                 d[symbol] = data[symbol]
         d = {k: v for k, v in d.items() if isinstance(v, pd.DataFrame)}
diff --git a/yahooquery/utils/__init__.py b/yahooquery/utils/__init__.py
index 33354cb..31eff44 100644
--- a/yahooquery/utils/__init__.py
+++ b/yahooquery/utils/__init__.py
@@ -1,7 +1,7 @@
+import datetime
 import random
 import re
 import time
-from datetime import datetime
 
 import pandas as pd
 from requests import Session
@@ -112,57 +112,87 @@ def _convert_to_list(symbols, comma_split=False):
 def _convert_to_timestamp(date=None, start=True):
     if date is None:
         date = int((-858880800 * start) + (time.time() * (not start)))
-    elif isinstance(date, datetime):
+    elif isinstance(date, datetime.datetime):
         date = int(time.mktime(date.timetuple()))
     else:
         date = int(time.mktime(time.strptime(str(date), "%Y-%m-%d")))
     return date
 
 
-def _history_dataframe(data, symbol, params, adj_timezone=True):
-    df = pd.DataFrame(data[symbol]["indicators"]["quote"][0])
-    if data[symbol]["indicators"].get("adjclose"):
-        df["adjclose"] = data[symbol]["indicators"]["adjclose"][0]["adjclose"]
-    df.index = pd.to_datetime(data[symbol]["timestamp"], unit="s") + pd.Timedelta(
-        (data[symbol]["meta"]["gmtoffset"] * adj_timezone), "s"
-    )
-    if params["interval"][-1] not in ["m", "h"]:
-        df.index = df.index.date
-    df.dropna(inplace=True)
-    if data[symbol].get("events"):
-        df = pd.merge(
-            df,
-            _events_to_dataframe(data, symbol, params, adj_timezone),
-            how="outer",
-            left_index=True,
-            right_index=True,
-        )
-    return df
-
+def _get_daily_index(data, index_utc, adj_timezone):
+    # evalute if last indice represents a live interval
+    last_trade_secs = data["meta"]["regularMarketTime"] * 10**9
+    last_trade = pd.Timestamp(last_trade_secs, tz="UTC")
+    has_live_indice = index_utc[-1] >= last_trade - pd.Timedelta(2, "S")
+    if has_live_indice:
+        # remove it
+        live_indice = index_utc[-1]
+        index_utc = index_utc[:-1]
+        # evaluate if it should be put back later. If the close price for
+        # the day is already included in the data, i.e. if the live indice
+        # is simply duplicating data represented in the prior row, then the 
+        # following will evaluate to False.
+        keep_live_indice = live_indice > index_utc[-1] + datetime.timedelta(1)
+
+    tz = data["meta"]["exchangeTimezoneName"]
+    index_local = index_utc.tz_convert(tz)
+    times = index_local.time
+
+    bv = times <= datetime.time(14)
+    if (bv).all():
+        index = index_local.floor("d")
+    elif (~bv).all():
+        index = index_local.ceil("d")
+    else:
+        # mix of open times pre and post 14:00.
+        index1 = index_local[bv].floor("d")
+        index2 = index_local[~bv].ceil("d")
+        index = index1.union(index2)
+
+    index = pd.Index(index.date)
+    if has_live_indice and keep_live_indice:
+        live_indice = live_indice.astimezone(tz) if adj_timezone else live_indice
+        # do not keep tz info
+        live_indice = live_indice.tz_localize(None).to_pydatetime()
+        index = index.insert(len(index), live_indice)
+    return index
+
+
+def _event_as_srs(event_data, event):
+    index = pd.Index([int(v) for v in event_data.keys()], dtype="int64")
+    if event == "dividends":
+        values = [d["amount"] for d in event_data.values()]
+    else:
+        values = [d["numerator"] / d["denominator"] for d in event_data.values()]
+    return pd.Series(values, index=index)
+
+
+def _history_dataframe(data, daily, adj_timezone=True):
+    data_dict = data["indicators"]["quote"][0].copy()
+    if "adjclose" in data["indicators"]:
+        data_dict["adjclose"] = data["indicators"]["adjclose"][0]["adjclose"]
+
+    if 'events' in data:
+        for event, event_data in data["events"].items():
+            if event not in ("dividends", "splits"):
+                continue
+            data_dict[event] = _event_as_srs(event_data, event)
+
+    df = pd.DataFrame(data_dict, index=data["timestamp"])  # align all on timestamps
+    df.dropna(how="all", inplace=True)
+
+    index_utc = pd.to_datetime(df.index, unit="s", utc=True)
+    if daily:
+        index = _get_daily_index(data, index_utc, adj_timezone)
+        if len(index) == len(df) - 1:
+            # a live_indice was removed
+            df = df.iloc[:-1]
+    elif adj_timezone:
+        tz = data["meta"]["exchangeTimezoneName"]
+        # localize and remove tz info
+        index = index_utc.tz_convert(tz).tz_localize(None)
+    else:
+        index = index_utc.tz_localize(None)  # remove UTC tz info
 
-def _events_to_dataframe(data, symbol, params, adj_timezone):
-    dataframes = []
-    for event in ["dividends", "splits"]:
-        try:
-            df = pd.DataFrame(data[symbol]["events"][event].values())
-            df.set_index("date", inplace=True)
-            df.index = pd.to_datetime(df.index, unit="s") + pd.Timedelta(
-                (data[symbol]["meta"]["gmtoffset"] * adj_timezone), "s"
-            )
-            if params["interval"][-1] not in ["m", "h"]:
-                df.index = df.index.date
-            if event == "dividends":
-                df.rename(columns={"amount": "dividends"}, inplace=True)
-            else:
-                df["splits"] = df["numerator"] / df["denominator"]
-                df = df[["splits"]]
-            dataframes.append(df)
-        except KeyError:
-            pass
-    return (
-        pd.merge(
-            dataframes[0], dataframes[1], how="outer", left_index=True, right_index=True
-        )
-        if len(dataframes) > 1
-        else dataframes[0]
-    )
+    df.index = index
+    return df

From 74530c0302fb1a019fd1a49e20d12f9dde0b8c9b Mon Sep 17 00:00:00 2001
From: Marcus Read <marcusaread@gmail.com>
Date: Thu, 24 Nov 2022 22:28:33 +0000
Subject: [PATCH 2/3] Add timezone info to `.history` return

Adds timezone info to:
- index of intraday data.
- any live indice of daily data.

Also, sets column order of return as ohlcv then any 'adjclose',
'dividends', 'splits'.
---
 yahooquery/utils/__init__.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/yahooquery/utils/__init__.py b/yahooquery/utils/__init__.py
index 31eff44..836ab7d 100644
--- a/yahooquery/utils/__init__.py
+++ b/yahooquery/utils/__init__.py
@@ -121,8 +121,8 @@ def _convert_to_timestamp(date=None, start=True):
 
 def _get_daily_index(data, index_utc, adj_timezone):
     # evalute if last indice represents a live interval
-    last_trade_secs = data["meta"]["regularMarketTime"] * 10**9
-    last_trade = pd.Timestamp(last_trade_secs, tz="UTC")
+    timestamp = data["meta"]["regularMarketTime"]
+    last_trade = pd.Timestamp.fromtimestamp(timestamp, tz="UTC")
     has_live_indice = index_utc[-1] >= last_trade - pd.Timedelta(2, "S")
     if has_live_indice:
         # remove it
@@ -130,8 +130,9 @@ def _get_daily_index(data, index_utc, adj_timezone):
         index_utc = index_utc[:-1]
         # evaluate if it should be put back later. If the close price for
         # the day is already included in the data, i.e. if the live indice
-        # is simply duplicating data represented in the prior row, then the 
-        # following will evaluate to False.
+        # is simply duplicating data represented in the prior row, then the
+        # following will evaluate to False (as live_indice will now be
+        # within one day of the prior indice)
         keep_live_indice = live_indice > index_utc[-1] + datetime.timedelta(1)
 
     tz = data["meta"]["exchangeTimezoneName"]
@@ -152,9 +153,7 @@ def _get_daily_index(data, index_utc, adj_timezone):
     index = pd.Index(index.date)
     if has_live_indice and keep_live_indice:
         live_indice = live_indice.astimezone(tz) if adj_timezone else live_indice
-        # do not keep tz info
-        live_indice = live_indice.tz_localize(None).to_pydatetime()
-        index = index.insert(len(index), live_indice)
+        index = index.insert(len(index), live_indice.to_pydatetime())
     return index
 
 
@@ -169,30 +168,33 @@ def _event_as_srs(event_data, event):
 
 def _history_dataframe(data, daily, adj_timezone=True):
     data_dict = data["indicators"]["quote"][0].copy()
+    cols = [
+        col for col in ("open", "high", "low", "close", "volume") if col in data_dict
+    ]
     if "adjclose" in data["indicators"]:
         data_dict["adjclose"] = data["indicators"]["adjclose"][0]["adjclose"]
+        cols.append("adjclose")
 
     if 'events' in data:
         for event, event_data in data["events"].items():
             if event not in ("dividends", "splits"):
                 continue
             data_dict[event] = _event_as_srs(event_data, event)
+            cols.append(event)
 
     df = pd.DataFrame(data_dict, index=data["timestamp"])  # align all on timestamps
     df.dropna(how="all", inplace=True)
+    df = df[cols]  # determine column order
 
-    index_utc = pd.to_datetime(df.index, unit="s", utc=True)
+    index = pd.to_datetime(df.index, unit="s", utc=True)
     if daily:
-        index = _get_daily_index(data, index_utc, adj_timezone)
+        index = _get_daily_index(data, index, adj_timezone)
         if len(index) == len(df) - 1:
             # a live_indice was removed
             df = df.iloc[:-1]
     elif adj_timezone:
         tz = data["meta"]["exchangeTimezoneName"]
-        # localize and remove tz info
-        index = index_utc.tz_convert(tz).tz_localize(None)
-    else:
-        index = index_utc.tz_localize(None)  # remove UTC tz info
+        index = index.tz_convert(tz)
 
     df.index = index
     return df

From 9d8fd98c6e9cc0da2c4f800f3d3260440744b86f Mon Sep 17 00:00:00 2001
From: Marcus Read <marcusaread@gmail.com>
Date: Thu, 24 Nov 2022 22:30:32 +0000
Subject: [PATCH 3/3] Add tests for `utils.__init__._history_dataframe`

Adds `TestHistoryDataframe` test class.
---
 tests/test_ticker.py         | 665 ++++++++++++++++++++++++++++++++++-
 yahooquery/utils/__init__.py |   8 +-
 2 files changed, 664 insertions(+), 9 deletions(-)

diff --git a/tests/test_ticker.py b/tests/test_ticker.py
index cbaf9aa..ac2fe35 100644
--- a/tests/test_ticker.py
+++ b/tests/test_ticker.py
@@ -1,10 +1,13 @@
-import pytest
-import itertools
+import datetime
 import os
+
+import itertools
+import pytest
 import pandas as pd
-from datetime import datetime
+from pandas.testing import assert_index_equal, assert_frame_equal, assert_series_equal
 
 from yahooquery import Ticker
+from yahooquery.utils.__init__ import _history_dataframe
 
 
 TICKERS = [
@@ -148,8 +151,8 @@ def test_p_get_financial_data(ticker):
 )
 def test_history(ticker, period, interval):
     assert isinstance(ticker.history(period, interval), pd.DataFrame)
-    
-    
+
+
 def test_dividend_history(ticker):
     df = ticker.dividend_history(start='1970-01-01')
     assert isinstance(df, pd.DataFrame)
@@ -160,7 +163,8 @@ def test_dividend_history(ticker):
     [
         (start, end)
         for start, end in zip(
-            [datetime(2019, 1, 1), "2019-01-01"], ["2019-12-30", datetime(2019, 12, 30)]
+            [datetime.datetime(2019, 1, 1), "2019-01-01"],
+            ["2019-12-30", datetime.datetime(2019, 12, 30)],
         )
     ],
 )
@@ -178,3 +182,652 @@ def test_history_bad_args(ticker, period, interval):
 
 def test_adj_ohlc(ticker):
     assert ticker.history(period="max", adj_ohlc=True) is not None
+
+
+class TestHistoryDataframe():
+    """Tests for `utils.__init__._history_dataframe` and dependencies."""
+
+    @pytest.fixture
+    def tz_us(self):
+        yield 'America/New_York'
+
+    @pytest.fixture
+    def tz_oz(self):
+        yield 'Australia/Sydney'
+
+    @pytest.fixture
+    def tz_hk(self):
+        yield 'Asia/Hong_Kong'
+
+    @pytest.fixture
+    def utc(self):
+        yield 'UTC'
+
+    @pytest.fixture
+    def timestamps_daily(self, utc, tz_oz, tz_us, tz_hk):
+        """Timestamps representing fictional open datetimes and expected mapped days.
+
+        Expected conversions to specific timezones explicitly declared and asserted.
+
+        Yields
+        -------
+        tuple[list[int]
+            [0] [list[int]]
+                Unix timestamps, i.e. format as used by Yahoo API. Timestamps represent
+                datetimes of session opens in terms of UTC.
+
+            [1] pd.Index dtype 'object', values as type datetime.date
+                Expected days that timestamps would map to if local timezone were
+                'America/New_York'. In this case all timestamps are expected to map to
+                the day of the date of the timestamp.
+
+            [2] pd.Index dtype 'object', values as type datetime.date
+                Expected days that timestamps would map to if local timezone were
+                'Australia/Sydney'. In this case all timestamps are expected to map to
+                the day after the date of the timestamp.
+
+            [3] pd.Index dtype 'object', values as type datetime.date
+                Expected days that timestamps would map to if local timezone were 'UTC'.
+                The first timestamp is expected to map to day of the date of the
+                timestamp. All other timestamps are expected to map to the day after.
+        """
+        tss = [
+            1667568600,
+            1667831400,
+            1667917800,
+            1668004200,
+            1668090600,
+            1668177000,
+            1668436200,
+            1668522600,
+            1668609000,
+            1668695400,
+            1668781800,
+            1669041000,
+            1669127400,
+            1669213800,
+        ]
+
+        expected_utc = pd.DatetimeIndex(
+            [
+                '2022-11-04 13:30:00', '2022-11-07 14:30:00',
+                '2022-11-08 14:30:00', '2022-11-09 14:30:00',
+                '2022-11-10 14:30:00', '2022-11-11 14:30:00',
+                '2022-11-14 14:30:00', '2022-11-15 14:30:00',
+                '2022-11-16 14:30:00', '2022-11-17 14:30:00',
+                '2022-11-18 14:30:00', '2022-11-21 14:30:00',
+                '2022-11-22 14:30:00', '2022-11-23 14:30:00',
+            ],
+            tz=utc,
+        )
+        dti = pd.to_datetime(tss, unit="s")
+        dti_utc = dti.tz_localize(utc)
+        assert_index_equal(dti_utc, expected_utc)
+        expected_utc_days = pd.Index(
+            [
+                datetime.date(2022, 11, 4),
+                datetime.date(2022, 11, 8),
+                datetime.date(2022, 11, 9),
+                datetime.date(2022, 11, 10),
+                datetime.date(2022, 11, 11),
+                datetime.date(2022, 11, 12),
+                datetime.date(2022, 11, 15),
+                datetime.date(2022, 11, 16),
+                datetime.date(2022, 11, 17),
+                datetime.date(2022, 11, 18),
+                datetime.date(2022, 11, 19),
+                datetime.date(2022, 11, 22),
+                datetime.date(2022, 11, 23),
+                datetime.date(2022, 11, 24),
+            ]
+        )
+
+        expected_oz = pd.DatetimeIndex(
+            [
+                '2022-11-05 00:30:00', '2022-11-08 01:30:00',
+                '2022-11-09 01:30:00', '2022-11-10 01:30:00',
+                '2022-11-11 01:30:00', '2022-11-12 01:30:00',
+                '2022-11-15 01:30:00', '2022-11-16 01:30:00',
+                '2022-11-17 01:30:00', '2022-11-18 01:30:00',
+                '2022-11-19 01:30:00', '2022-11-22 01:30:00',
+                '2022-11-23 01:30:00', '2022-11-24 01:30:00',
+            ],
+            tz=tz_oz,
+        )
+        dti_oz = dti_utc.tz_convert(tz_oz)
+        assert_index_equal(dti_oz, expected_oz)
+        expected_oz_days = pd.Index(
+            [
+                datetime.date(2022, 11, 5),
+                datetime.date(2022, 11, 8),
+                datetime.date(2022, 11, 9),
+                datetime.date(2022, 11, 10),
+                datetime.date(2022, 11, 11),
+                datetime.date(2022, 11, 12),
+                datetime.date(2022, 11, 15),
+                datetime.date(2022, 11, 16),
+                datetime.date(2022, 11, 17),
+                datetime.date(2022, 11, 18),
+                datetime.date(2022, 11, 19),
+                datetime.date(2022, 11, 22),
+                datetime.date(2022, 11, 23),
+                datetime.date(2022, 11, 24),
+            ]
+        )
+        assert_index_equal(pd.Index(dti_oz.date), expected_oz_days)
+
+        expected_us = pd.DatetimeIndex(
+            [
+                '2022-11-04 09:30:00', '2022-11-07 09:30:00',
+                '2022-11-08 09:30:00', '2022-11-09 09:30:00',
+                '2022-11-10 09:30:00', '2022-11-11 09:30:00',
+                '2022-11-14 09:30:00', '2022-11-15 09:30:00',
+                '2022-11-16 09:30:00', '2022-11-17 09:30:00',
+                '2022-11-18 09:30:00', '2022-11-21 09:30:00',
+                '2022-11-22 09:30:00', '2022-11-23 09:30:00',
+            ],
+            tz=tz_us,
+        )
+        dti_us = dti_utc.tz_convert(tz_us)
+        assert_index_equal(dti_us, expected_us)
+        expected_us_days = pd.Index(
+            [
+                datetime.date(2022, 11, 4),
+                datetime.date(2022, 11, 7),
+                datetime.date(2022, 11, 8),
+                datetime.date(2022, 11, 9),
+                datetime.date(2022, 11, 10),
+                datetime.date(2022, 11, 11),
+                datetime.date(2022, 11, 14),
+                datetime.date(2022, 11, 15),
+                datetime.date(2022, 11, 16),
+                datetime.date(2022, 11, 17),
+                datetime.date(2022, 11, 18),
+                datetime.date(2022, 11, 21),
+                datetime.date(2022, 11, 22),
+                datetime.date(2022, 11, 23),
+            ]
+        )
+        assert_index_equal(pd.Index(dti_us.date), expected_us_days)
+
+        expected_hk = pd.DatetimeIndex(
+            [
+                '2022-11-04 21:30', '2022-11-07 22:30',
+                '2022-11-08 22:30', '2022-11-09 22:30',
+                '2022-11-10 22:30', '2022-11-11 22:30',
+                '2022-11-14 22:30', '2022-11-15 22:30',
+                '2022-11-16 22:30', '2022-11-17 22:30',
+                '2022-11-18 22:30', '2022-11-21 22:30',
+                '2022-11-22 22:30', '2022-11-23 22:30',
+            ],
+            tz=tz_hk,
+        )
+        dti_hk = dti_utc.tz_convert(tz_hk)
+        assert_index_equal(dti_hk, expected_hk)
+        expected_hk_days = expected_oz_days  # same, both should map to next day
+        assert_index_equal(
+            pd.Index(dti_hk.date + datetime.timedelta(1)), expected_hk_days
+        )
+
+        yield (
+            tss, expected_us_days, expected_oz_days, expected_hk_days, expected_utc_days
+        )
+
+    @pytest.fixture
+    def quote(self):
+        """Fictional mock OHLCV data for 14 datapoints.
+
+        Yields both unordered data and dictionary representing expected
+        order of return.
+        """
+        opens = list(range(2, 16))
+        lows = list(range(1, 15))
+        highs = list(range(4, 18))
+        closes = list(range(3, 17))
+        volumes = list(range(50, 64))
+        data = {
+            "volume": volumes,
+            "close": closes,
+            "open": opens,
+            "high": highs,
+            "low": lows,
+        }
+        expected = {
+            "open": opens,
+            "high": highs,
+            "low": lows,
+            "close": closes,
+            "volume": volumes,
+        }
+        yield data, expected
+
+    @pytest.fixture
+    def adjclose(self):
+        """Fictional mock adjclose data for 14 datapoints."""
+        yield [i + 0.25 for i in range(3, 17)]
+
+    @staticmethod
+    def get_dividends(tss):
+        """Get fictional mock dividends data for 2 timestamps of `tss`.
+
+        Returns
+        -------
+        tuple[dict[str, dict[str, float | int]], list[float]]
+            [0] dict[str, dict[str, float | int]]
+                Mock data for symbol_data["events"]["dividends"]. Data
+                includes dividends for two timestamps of `tss`.
+            [1] list[float]
+                Expected contents of dividends column of DataFrame created
+                for `tss` and with data that includes [0].
+        """
+        indices = (2, 8)
+        amount = 0.12
+        d = {str(tss[i]): {'amount': amount, 'date': tss[i]} for i in indices}
+        expected = [amount if i in indices else float('nan') for i in range(14)]
+        return d, expected
+
+    @pytest.fixture
+    def dividends_daily(self, timestamps_daily):
+        """Mock data and expected col values for daily dividends.
+
+        See `get_dividends.__doc__`
+        """
+        yield self.get_dividends(timestamps_daily[0])
+
+    @staticmethod
+    def get_splits(tss):
+        """Get fictional mock splits data for 1 timestamps of `tss`.
+
+        Returns
+        -------
+        tuple[dict[str, dict[str, int | str]], list[float]]
+            [0] dict[str, dict[str, float | int]]
+                Mock data for symbol_data["events"]["splits"]. Data
+                includes splits for one timestamp of `tss`.
+            [1] list[float]
+                Expected contents of splits column of DataFrame created
+                for `tss` and with data that includes [0].
+        """
+        indice = 11
+        ts = tss[indice]
+        d = {
+            str(ts): {
+                'data': ts,
+                'numerator': 3,
+                'denominator': 1,
+                'splitRatio': '3:1'
+            }
+        }
+        expected = [3 if i == indice else float('nan') for i in range(14)]
+        return d, expected
+
+    @pytest.fixture
+    def splits_daily(self, timestamps_daily):
+        """Mock data and expected col values for daily splits.
+
+        See `get_splits.__doc__`
+        """
+        yield self.get_splits(timestamps_daily[0])
+
+    @staticmethod
+    def build_mock_data(
+        tss, tz, quote, adjclose=None, splits=None, dividends=None, last_trade=None
+    ):
+        """Get mock data for a symbol from which to create dataframe.
+
+        Return can be passed as `data` parameter of `_history_dataframe`.
+        """
+        if last_trade is None:
+            last_trade = 1669237204
+            expected_ts = pd.Timestamp('2022-11-23 21:00:04')
+            assert pd.Timestamp.fromtimestamp(last_trade) == expected_ts
+        meta = {
+            'regularMarketTime': last_trade,
+            'exchangeTimezoneName': tz,
+        }
+
+        indicators = {"quote": [quote.copy()]}
+        if adjclose is not None:
+            indicators["adjclose"] = [{"adjclose": adjclose}]
+
+        events = {"fake_event": {'1667568600': {"fake_event_key": 66.6}}}
+        for key, event_data in zip(("dividends", "splits"), (dividends, splits)):
+            if event_data is None:
+                continue
+            events[key] = event_data
+
+        return dict(meta=meta, indicators=indicators, timestamp=tss, events=events)
+
+    @staticmethod
+    def create_expected(expected_index, quote, dividends, splits, adjclose=None):
+        """Create expected return from column parts."""
+        df = pd.DataFrame(quote, index=expected_index)
+        if adjclose is not None:
+            df["adjclose"] = adjclose
+        df["dividends"] = dividends
+        df["splits"] = splits
+        return df
+
+    @staticmethod
+    def verify_expected_daily_row_11(df, indice):
+        """Hard coded sanity check on specific row of expected dataframe."""
+        i = 11
+        expected = pd.Series(
+            dict(open=13, high=15, low=12, close=14, volume=61, adjclose=14.25),
+            name=indice,
+        )
+        assert_series_equal(df.iloc[i][:-2], expected)
+        assert pd.isna(df.iloc[i][-2])  # no dividends
+        assert df.iloc[i][-1] == 3  # splits
+        return df
+
+    @pytest.fixture
+    def expected_daily_utc(
+        self, timestamps_daily, quote, dividends_daily, splits_daily, adjclose
+    ):
+        """Expected return if timestamps interpreted with local tz as utc."""
+        df = self.create_expected(
+            timestamps_daily[4], quote[1], dividends_daily[1], splits_daily[1], adjclose
+        )
+        self.verify_expected_daily_row_11(df, datetime.date(2022, 11, 22))
+        yield df
+
+    @pytest.fixture
+    def expected_daily_us(
+        self, timestamps_daily, quote, dividends_daily, splits_daily, adjclose
+    ):
+        """Expected return if timestamps interpreted with local tz as us."""
+        df = self.create_expected(
+            timestamps_daily[1], quote[1], dividends_daily[1], splits_daily[1], adjclose
+        )
+        self.verify_expected_daily_row_11(df, datetime.date(2022, 11, 21))
+        yield df
+
+    @pytest.fixture
+    def expected_daily_us_bare(self, timestamps_daily, quote):
+        """As `expected_daily_us` with only ohlcv columns."""
+        df = pd.DataFrame(quote[1], index=timestamps_daily[1])
+        # Hard coded sanity check for specific row
+        i = 11
+        expected = pd.Series(
+            dict(open=13, high=15, low=12, close=14, volume=61),
+            name=datetime.date(2022, 11, 21),
+        )
+        assert_series_equal(df.iloc[i], expected)
+        yield df
+
+    @pytest.fixture
+    def expected_daily_oz(
+        self, timestamps_daily, quote, dividends_daily, splits_daily, adjclose
+    ):
+        """Expected return if timestamps interpreted with local tz as oz."""
+        df = self.create_expected(
+            timestamps_daily[2], quote[1], dividends_daily[1], splits_daily[1], adjclose
+        )
+        self.verify_expected_daily_row_11(df, datetime.date(2022, 11, 22))
+        yield df
+
+    @pytest.fixture
+    def expected_daily_hk(
+        self, timestamps_daily, quote, dividends_daily, splits_daily, adjclose
+    ):
+        """Expected return if timestamps interpreted with local tz as oz."""
+        df = self.create_expected(
+            timestamps_daily[3], quote[1], dividends_daily[1], splits_daily[1], adjclose
+        )
+        self.verify_expected_daily_row_11(df, datetime.date(2022, 11, 22))
+        yield df
+
+    def test_daily(
+        self,
+        timestamps_daily,
+        quote,
+        adjclose,
+        dividends_daily,
+        splits_daily,
+        expected_daily_utc,
+        expected_daily_us,
+        expected_daily_oz,
+        expected_daily_hk,
+        utc,
+        tz_us,
+        tz_oz,
+        tz_hk,
+    ):
+        """Test for expected returns for mock data reflecting a daily period."""
+
+        def f(data, adj_timezone):
+            return _history_dataframe(data, daily=True, adj_timezone=adj_timezone)
+
+        tss = timestamps_daily[0]
+        quote_, _ = quote
+        adjclose_ = adjclose
+        splits, _ = splits_daily
+        dividends, _ = dividends_daily
+
+        expecteds = (
+            expected_daily_utc, expected_daily_us, expected_daily_oz, expected_daily_hk
+        )
+        tzs = (utc, tz_us, tz_oz, tz_hk)
+        for expected, tz in zip(expecteds, tzs):
+            data = self.build_mock_data(tss, tz, quote_, adjclose_, splits, dividends)
+            for adj_timezone in (True, False):
+                # tz makes no difference as daily and there is no live indice
+                rtrn = f(data, adj_timezone=adj_timezone)
+                assert_frame_equal(rtrn, expected)
+
+        # check effect if there are no dividends and/or splits
+        expected = expected_daily_us
+        tz = tz_us
+        adj_timezone = False
+        # no dividends
+        dividends_srs = expected.pop("dividends")
+        data = self.build_mock_data(tss, tz, quote_, adjclose_, splits=splits)
+        rtrn = f(data, adj_timezone)
+        assert_frame_equal(rtrn, expected)
+        # no splits
+        expected.pop("splits")
+        expected["dividends"] = dividends_srs
+        data = self.build_mock_data(tss, tz, quote_, adjclose_, dividends=dividends)
+        rtrn = f(data, adj_timezone)
+        assert_frame_equal(rtrn, expected)
+        # neither dividends nor splits
+        expected.pop("dividends")
+        data = self.build_mock_data(tss, tz, quote_, adjclose_)
+        rtrn = f(data, adj_timezone)
+        assert_frame_equal(rtrn, expected)
+
+    def test_live_indice(
+        self, timestamps_daily, expected_daily_us_bare, tz_us, utc, quote
+    ):
+        """Test daily data with live indice."""
+        live_indice = 1669231860
+        expected_li_ts = pd.Timestamp("2022-11-23 19:31")
+        assert pd.Timestamp.fromtimestamp(live_indice) == expected_li_ts
+
+        tss, expected_days, *_ = timestamps_daily
+        tss = tss[:-1]
+        tss.append(live_indice)
+
+        expected_df = expected_daily_us_bare
+        data = self.build_mock_data(tss, tz_us, quote[0], last_trade=live_indice)
+
+        # verify live indice has utc timezone when adj_timezone True
+        rtrn = _history_dataframe(data, daily=True, adj_timezone=False)
+        expected_li = pd.Timestamp("2022-11-23 19:31", tz=utc).to_pydatetime()
+        expected_index = expected_days[:-1]
+        expected_index = expected_index.insert(len(expected_index), expected_li)
+        expected_df.index = expected_index
+        assert_frame_equal(rtrn, expected_df)
+
+        # verify live indice has local timezone when adj_timezone True
+        rtrn = _history_dataframe(data, daily=True, adj_timezone=True)
+        expected_li = pd.Timestamp("2022-11-23 14:31", tz=tz_us).to_pydatetime()
+        expected_index = expected_index[:-1].insert(len(expected_index)-1, expected_li)
+        expected_df.index = expected_index
+        assert_frame_equal(rtrn, expected_df)
+
+    def test_duplicate_live_indice(
+        self, timestamps_daily, expected_daily_us_bare, tz_us, quote
+    ):
+        """Test live indice removed if day already represented."""
+        live_indice = 1669237204
+        expected_li_ts = pd.Timestamp("2022-11-23 21:00:04")
+        assert pd.Timestamp.fromtimestamp(live_indice) == expected_li_ts
+
+        tss = timestamps_daily[0]
+        # to get it all to fit to 14 indices, lose the first ts
+        tss = tss[1:]
+        tss.append(live_indice)
+
+        data = self.build_mock_data(tss, tz_us, quote[0], last_trade=live_indice)
+        rtrn = _history_dataframe(data, daily=True, adj_timezone=False)
+
+        # create expected
+        expected_template = expected_daily_us_bare
+        expected_index = expected_template.index[1:]
+        assert expected_index[-1] == datetime.date(2022, 11, 23)
+        # last row, live indice, expected to be removed as day already represented
+        expected_df = expected_template[:-1]
+        expected_df.index = expected_index
+        assert_frame_equal(rtrn, expected_df)
+
+    @pytest.fixture
+    def timestamps_intraday(self, utc):
+        """Timestamps representing fictional datetimes and expected mapped indices.
+
+        Timestamps cover two days with change in DST observance.
+
+        Yields
+        -------
+        tuple[list[int]
+            [0] [list[int]]
+                Unix timestamps, i.e. format as used by Yahoo API. Timestamps represent
+                datetimes of hourly indices in terms of UTC.
+
+            [1] pd.DatetimeIndex dtype 'datetime64[ns, UTC]'
+                Expected indices that timestamps would map to if local timezone were
+                'UTC'.
+        """
+        tss = [
+            1667568600,
+            1667572200,
+            1667575800,
+            1667579400,
+            1667583000,
+            1667586600,
+            1667590200,
+            1667831400,
+            1667835000,
+            1667838600,
+            1667842200,
+            1667845800,
+            1667849400,
+            1667853000,
+        ]
+
+        expected_index_utc = pd.DatetimeIndex(
+            [
+                '2022-11-04 13:30:00', '2022-11-04 14:30:00',
+                '2022-11-04 15:30:00', '2022-11-04 16:30:00',
+                '2022-11-04 17:30:00', '2022-11-04 18:30:00',
+                '2022-11-04 19:30:00', '2022-11-07 14:30:00',
+                '2022-11-07 15:30:00', '2022-11-07 16:30:00',
+                '2022-11-07 17:30:00', '2022-11-07 18:30:00',
+                '2022-11-07 19:30:00', '2022-11-07 20:30:00'
+            ],
+            tz=utc,
+        )
+        dti = pd.to_datetime(tss, unit="s")
+        dti_utc = dti.tz_localize(utc)
+        assert_index_equal(dti_utc, expected_index_utc)
+
+        yield tss, expected_index_utc
+
+    @pytest.fixture
+    def dividends_intraday(self, timestamps_intraday):
+        """Get mock data and expected col values for intraday dividends.
+
+        The Yahoo API attaches any dividends to the first intraday indice
+        of each session. This mock does not respect this alignment, which
+        is inconsequential for the test purposes.
+
+        See `get_dividends.__doc__`.
+        """
+        yield self.get_dividends(timestamps_intraday[0])
+
+    @pytest.fixture
+    def splits_intraday(self, timestamps_intraday):
+        """Mock data and expected col values for intraday splits.
+
+        The Yahoo API attaches any dividends to the first intraday indice
+        of each session. This mock does not respect this alignment, which
+        is inconsequential for the test purposes.
+
+        See `get_splits.__doc__`.
+        """
+        yield self.get_splits(timestamps_intraday[0])
+
+    @pytest.fixture
+    def expected_intraday(
+        self, timestamps_intraday, quote, dividends_intraday, splits_intraday
+    ):
+        """Expected return for intraday timestamps."""
+        _, expected_utc = timestamps_intraday
+        df = self.create_expected(
+            expected_utc, quote[1], dividends_intraday[1], splits_intraday[1]
+        )
+        # hard coded sanity check on specific row
+        i = 8
+        expected = pd.Series(
+            dict(open=10, high=12, low=9, close=11, volume=58, dividends=0.12),
+            name=pd.Timestamp("2022-11-7 15:30", tz="UTC"),
+        )
+        assert_series_equal(df.iloc[i][:-1], expected)
+        assert pd.isna(df.iloc[i][-1])
+        yield df
+
+    def test_intraday(
+        self,
+        timestamps_intraday,
+        tz_us,
+        quote,
+        splits_intraday,
+        dividends_intraday,
+        expected_intraday,
+    ):
+        """Test for expected returns for mock data reflecting a daily period."""
+
+        def f(data, adj_timezone):
+            return _history_dataframe(data, daily=False, adj_timezone=adj_timezone)
+
+        tz = tz_us
+        tss, _ = timestamps_intraday
+        quote_, _ = quote
+        splits, _ = splits_intraday
+        dividends, _ = dividends_intraday
+
+        data = self.build_mock_data(tss, tz, quote_, splits=splits, dividends=dividends)
+        rtrn = f(data, adj_timezone=False)
+        expected = expected_intraday
+        assert_frame_equal(rtrn, expected)
+        rtrn = f(data, adj_timezone=True)
+        expected.index = expected.index.tz_convert(tz)
+        assert_frame_equal(rtrn, expected)
+
+        # no dividends
+        dividends_srs = expected.pop("dividends")
+        data = self.build_mock_data(tss, tz, quote_, splits=splits)
+        rtrn = f(data, adj_timezone=True)
+        assert_frame_equal(rtrn, expected)
+        # no splits
+        expected.pop("splits")
+        expected["dividends"] = dividends_srs
+        data = self.build_mock_data(tss, tz, quote_, dividends=dividends)
+        rtrn = f(data, adj_timezone=True)
+        assert_frame_equal(rtrn, expected)
+        # neither dividends nor splits
+        expected.pop("dividends")
+        data = self.build_mock_data(tss, tz, quote_)
+        rtrn = f(data, adj_timezone=True)
+        assert_frame_equal(rtrn, expected)
diff --git a/yahooquery/utils/__init__.py b/yahooquery/utils/__init__.py
index 836ab7d..27af52c 100644
--- a/yahooquery/utils/__init__.py
+++ b/yahooquery/utils/__init__.py
@@ -122,18 +122,20 @@ def _convert_to_timestamp(date=None, start=True):
 def _get_daily_index(data, index_utc, adj_timezone):
     # evalute if last indice represents a live interval
     timestamp = data["meta"]["regularMarketTime"]
-    last_trade = pd.Timestamp.fromtimestamp(timestamp, tz="UTC")
+    last_trade = pd.Timestamp.fromtimestamp(timestamp)
+    last_trade = last_trade.tz_localize("UTC")
     has_live_indice = index_utc[-1] >= last_trade - pd.Timedelta(2, "S")
     if has_live_indice:
         # remove it
         live_indice = index_utc[-1]
         index_utc = index_utc[:-1]
+        ONE_DAY = datetime.timedelta(1)
         # evaluate if it should be put back later. If the close price for
         # the day is already included in the data, i.e. if the live indice
         # is simply duplicating data represented in the prior row, then the
         # following will evaluate to False (as live_indice will now be
         # within one day of the prior indice)
-        keep_live_indice = live_indice > index_utc[-1] + datetime.timedelta(1)
+        keep_live_indice = index_utc.empty or live_indice > index_utc[-1] + ONE_DAY
 
     tz = data["meta"]["exchangeTimezoneName"]
     index_local = index_utc.tz_convert(tz)
@@ -187,7 +189,7 @@ def _history_dataframe(data, daily, adj_timezone=True):
     df = df[cols]  # determine column order
 
     index = pd.to_datetime(df.index, unit="s", utc=True)
-    if daily:
+    if daily and not df.empty:
         index = _get_daily_index(data, index, adj_timezone)
         if len(index) == len(df) - 1:
             # a live_indice was removed