data.py.patch

*** ~/.virtualenvs/default/lib/python2.7/site-packages/pandas/io/data.py	2013-05-21 00:00:55.000000000 +0300
--- ~/.virtualenvs/prometheus/lib/python2.7/site-packages/pandas/io/data.py	2013-05-21 23:54:20.000000000 +0300
***************
*** 18,24 ****


  def DataReader(name, data_source=None, start=None, end=None,
!                retry_count=3, pause=0):
      """
      Imports data from a number of online sources.

--- 18,24 ----


  def DataReader(name, data_source=None, start=None, end=None,
!                retry_count=3, pause=0, dividends=False, splits=False):
      """
      Imports data from a number of online sources.

***************
*** 55,61 ****

      if(data_source == "yahoo"):
          return get_data_yahoo(name=name, start=start, end=end,
!                               retry_count=retry_count, pause=pause)
      elif(data_source == "fred"):
          return get_data_fred(name=name, start=start, end=end)
      elif(data_source == "famafrench"):
--- 55,62 ----

      if(data_source == "yahoo"):
          return get_data_yahoo(name=name, start=start, end=end,
!                               retry_count=retry_count, pause=pause,
!                               dividends=dividends, splits=splits)
      elif(data_source == "fred"):
          return get_data_fred(name=name, start=start, end=end)
      elif(data_source == "famafrench"):
***************
*** 117,123 ****
      return DataFrame(data, index=idx)


! def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0):
      """
      Get historical data for the given name from yahoo.
      Date format is datetime
--- 118,125 ----
      return DataFrame(data, index=idx)


! def get_data_yahoo(name=None, start=None, end=None, retry_count=3, pause=0,
!                    dividends=False, splits=False):
      """
      Get historical data for the given name from yahoo.
      Date format is datetime
***************
*** 130,136 ****
          print "Need to provide a name"
          return None

!     yahoo_URL = 'http://ichart.yahoo.com/table.csv?'

      url = yahoo_URL + 's=%s' % name + \
          '&a=%s' % (start.month - 1) + \
--- 132,144 ----
          print "Need to provide a name"
          return None

!     # Yahoo! Finance doesn't show splits with 'table.csv' setting
!     if splits:
!         url_type = 'x'
!     else:
!         url_type = 'table.csv'
!
!     yahoo_URL = 'http://ichart.yahoo.com/%s?' % url_type

      url = yahoo_URL + 's=%s' % name + \
          '&a=%s' % (start.month - 1) + \
***************
*** 139,145 ****
          '&d=%s' % (end.month - 1) + \
          '&e=%s' % end.day + \
          '&f=%s' % end.year + \
!         '&g=d' + \
          '&ignore=.csv'

      for _ in range(retry_count):
--- 147,153 ----
          '&d=%s' % (end.month - 1) + \
          '&e=%s' % end.day + \
          '&f=%s' % end.year + \
!         '&g=%s' % ('v' if dividends or splits else 'd') + \
          '&ignore=.csv'

      for _ in range(retry_count):
***************
*** 154,159 ****
--- 162,240 ----
              if len(rs) > 2 and rs.index[-1] == rs.index[-2]:  # pragma: no cover
                  rs = rs[:-1]

+             rs.rename(columns={'Dividends': 'Values'}, inplace=True)
+             rs_splits, rs_dividends = DataFrame(), DataFrame()
+
+             # check to see if there is split data
+             try:
+                 has_splits = rs.xs('SPLIT')['Values'].any()
+             except AttributeError:
+                 has_splits = rs.xs('SPLIT')['Values']
+             except KeyError:
+                 # There is no split data
+                 has_splits = False
+
+             split_format = splits or has_splits
+
+             if (splits and has_splits and hasattr(rs.xs('SPLIT'), 'pivot')):
+                 # Yahoo! Finance returns additional info like 'STARTDATE' and
+                 # 'ENDDATE'. This selects only the data we want
+                 rs_splits = rs.xs('SPLIT').reset_index()
+
+                 # If Yahoo! Finance returns one value, the result of '.xs' will
+                 # be a Series instead of a DataFrame
+             elif (splits and has_splits):
+                 d = {'index': ['SPLIT'],
+                      'Date': [rs.xs('SPLIT')['Date']],
+                      'Values': [rs.xs('SPLIT')['Values']]}
+
+                 rs_splits = DataFrame(d)
+
+             if dividends and split_format:
+                 # check to see if there is dividend data
+                 try:
+                     has_dividends = rs.xs('DIVIDEND')['Values'].any()
+                 except AttributeError:
+                     has_dividends = rs.xs('DIVIDEND')['Values']
+                 except KeyError:
+                     # There is no dividend data
+                     has_dividends = False
+
+                 if (has_dividends and hasattr(rs.xs('DIVIDEND'), 'pivot')):
+                     rs_dividends = rs.xs('DIVIDEND').reset_index()
+                 elif has_dividends:
+                     d = {'index': ['DIVIDEND'],
+                          'Date': [rs.xs('DIVIDEND')['Date']],
+                          'Values': [rs.xs('DIVIDEND')['Values']]}
+
+                     rs_dividends = DataFrame(d)
+
+             elif dividends:
+                 # if there are no splits there won't be a 'DIVIDEND' section
+                 has_dividends = len(rs) > 0
+
+                 if has_dividends:
+                     rs_dividends = rs
+
+             rs = concat([rs_splits, rs_dividends])
+
+             if (split_format and not rs.empty):
+                 # Dates in split format are yyyymmdd so convert to yyyy-mm-dd
+                 rs.Date = rs.Date.apply(lambda x: str(x))
+                 rs.Date = rs.Date.apply(
+                     lambda x: '%s-%s-%s' % (x[:4], x[4:6], x[6:]))
+
+                 # pivot DataFrame to match format of a normal query
+                 rs = rs.pivot(index='Date', columns='index', values='Values')
+
+             if (splits and has_splits):
+                 rs.rename(columns={'SPLIT': 'Splits'}, inplace=True)
+
+             if (dividends and has_dividends and split_format):
+                 rs.rename(columns={'DIVIDEND': 'Dividends'}, inplace=True)
+             elif (dividends and has_dividends):
+             	rs.rename(columns={'Values': 'Dividends'}, inplace=True)
+
              return rs

          time.sleep(pause)