-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdaily_quote_inserter.py
214 lines (168 loc) · 7.96 KB
/
daily_quote_inserter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
""" Inserts Daily Quote Data into Mongodb
Author : Manohar Kuse <[email protected]>
"""
import json
from collections import OrderedDict
import sys
import time
import pymongo
# import datetime
import uuid
import socket
import os
from datetime import datetime
import pprint
import TerminalColors
tcol = TerminalColors.bcolors()
import argparse
from stockspotter.db.SourceYahoo import SourceYahoo
from stockspotter.lister.TickerLister import TickerLister
def __write( msg ):
# print msg
fp_logfile.write( msg +'\n')
def log_write( msg ):
fp_logfile.write( msg+'\n' )
def log_server( msg ):
if fp_logserver is not None:
fp_logserver.sendall( '[%s:%6d:%s] ' %(__file__, os.getpid(), str(datetime.now())) + msg +'\n' )
# fp_logfile.write( msg+'\n' )
#----- Commandline Parsing ---#
parser = argparse.ArgumentParser()
parser.add_argument( '-ld', '--lists_db_dir', required=True, help='Specify lists DB directory (eg. equities_db/lists/)' )
parser.add_argument( '-v', '--verbosity', type=int, default=0, help='Verbosity 0 is quite. 5 is most verbose' )
parser.add_argument( '-db', '--quotes_data_dir', required=True, help='Specify quotes directory (eg. equities_db/data_quotes_20170716/)' )
parser.add_argument( '--logfile', default=None, help='Logging file name' )
parser.add_argument( '--logserver', default=None, help='Logging server. eg. localhost:9276' )
parser.add_argument( '--mongodb', default=None, help='Specify mongodb instance. If not specified will use localhost:27017. eg mongodb://localhost:27017.' )
# Bourse
parser.add_argument( '--xhkex', default=False, action='store_true', help='List all HKEX Stocks' )
parser.add_argument( '--xbse', default=False, action='store_true', help='List all Bombay Stock Ex (BSE) Stocks' )
parser.add_argument( '--xnse', default=False, action='store_true', help='List all National Stock Ex India (NSE) Stocks' )
parser.add_argument( '--xnyse', default=False, action='store_true', help='List all New York Stock Exchange (NYSE) Stocks' )
parser.add_argument( '--xnasdaq', default=False, action='store_true', help='List all NASDAQ Stocks' )
parser.add_argument( '--xamex', default=False, action='store_true', help='List all Amex Stocks' )
parser.add_argument( '--xtyo', default=False, action='store_true', help='List all Japan Exchange (Tokyo) Stocks' )
args = parser.parse_args()
if args.logfile is None:
fp_logfile = sys.stdout
else:
fp_logfile = open( args.logfile, 'w' )
print 'LOGFILE NAME : ', args.logfile
__write( '```\n' + ' '.join( sys.argv ) + '\n```' )
if args.logserver is None:
fp_logserver = None
print 'LOGSERVER : ', args.logserver
else:
print 'LOGSERVER : ', args.logserver
try:
_host = args.logserver.split(':')[0]
_port = int(args.logserver.split(':')[1])
fp_logserver = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
fp_logserver.connect( (_host,_port) )
# fp_logserver.sendall( 'Connected!')
log_server( 'Connected')
except:
print tcol.FAIL, 'Cannot connect to logserver', tcol.ENDC
print 'Start a forked logserver like:'
print '$ socat TCP4-LISTEN:9595,fork STDOUT'
if args.lists_db_dir:
__write( tcol.HEADER + 'lists_db_dir : '+ args.lists_db_dir+ tcol.ENDC )
if args.quotes_data_dir:
__write( tcol.HEADER + 'quotes_data_dir : '+ args.quotes_data_dir+ tcol.ENDC )
# if args.verbosity:
__write( tcol.HEADER + 'verbosity : '+ str(args.verbosity) + tcol.ENDC )
# ----------- MAIN -------------#
# Setup DB access and file accesses
if args.mongodb is None:
__write( tcol.HEADER + 'Mongo-Server: ' + 'mongodb://localhost:27017/' + tcol.ENDC )
client = pymongo.MongoClient()
else:
__write( tcol.HEADER + 'Mongo-Server: ' + args.mongodb + tcol.ENDC )
client = pymongo.MongoClient(args.mongodb)
pprint.pprint( client.server_info() ) # The process will fail if cannot connect to mongodb
db = client.sun_dance.stock_quotes #mongodb collection
# # Lister
# # lister = TickerLister( 'equities_db/lists/' )
# lister = TickerLister( args.lists_db_dir )
# full_list = []
# full_list += lister.list_full_hkex( use_cached=True)#[520:]
# # full_list += lister.list_full_bse( use_cached=True )#[1500:]
# full_list += lister.list_full_nse( use_cached=True )
# Get List
lister = TickerLister( args.lists_db_dir )
full_list = []
n=3
__write( tcol.HEADER+ ' : Exchanges :'+ tcol.ENDC )
if args.xhkex:
__write( tcol.HEADER+ '\t(HKEX) Hong Kong Stock Exchange'+ tcol.ENDC )
full_list += lister.list_full_hkex( use_cached=True)#[0:n]
if args.xbse:
__write( tcol.HEADER+ '\t(BSE) Bombay Stock Exchange'+ tcol.ENDC )
full_list += lister.list_full_bse( use_cached=True )#[0:n]
if args.xnse:
__write( tcol.HEADER+ '\t(NSE) National Stock Exchange of India'+ tcol.ENDC )
full_list += lister.list_full_nse( use_cached=True )#[0:n]
if args.xnyse:
__write( tcol.HEADER+ '\t(NYSE) New York Stock Exchange'+ tcol.ENDC )
full_list += lister.list_full_nyse( use_cached=True )#[0:n]
if args.xnasdaq:
__write( tcol.HEADER+ '\t(NASDAQ) NASDAQ, USA'+ tcol.ENDC )
full_list += lister.list_full_nasdaq( use_cached=True )#[0:n]
if args.xamex:
__write( tcol.HEADER+ '\t(AMEX) American Stock Exchange'+ tcol.ENDC )
full_list += lister.list_full_amex( use_cached=True )#[0:n]
if args.xtyo:
__write( tcol.HEADER+ '\t(TYO) Japan Exchange Group, Tokyo'+ tcol.ENDC )
full_list += lister.list_full_tyo( use_cached=True )#[0:n]
# Loop on List
# db_prefix = 'equities_db/data_quotes_20170716/'
db_prefix = args.quotes_data_dir
startTimeTotal = time.time()
for i,l in enumerate(full_list):
startTime = time.time()
folder = db_prefix+'/'+l.ticker+'/'
__write( tcol.OKGREEN+ str(i)+' of %d ' %(len(full_list))+ str(l)+ tcol.ENDC )
log_server( tcol.OKGREEN+ str(i)+' of %d ' %(len(full_list))+ str(l)+ tcol.ENDC )
s_yahoo = SourceYahoo( l.ticker, folder, args.verbosity, fp_logfile )
q_json_obj = s_yahoo.load_quote()
if q_json_obj is None:
__write( tcol.FAIL+ 'No Quote available for '+ l.ticker+ tcol.ENDC )
continue
#Insert
daily_list = q_json_obj['quotes_daily']
failed_inserts = 0
for date_inst in daily_list.keys():
adj_close = daily_list[date_inst]['close_adj']
digest = uuid.uuid3( uuid.NAMESPACE_DNS, str(l.ticker)+str(date_inst)+str(adj_close) )
#digest is made of ticker, date and close_adj. so if you try and insert the same data
# again it will fail, but if there is a split there will be a duplicate entry
insert_query = {}
insert_query['id'] = str(digest)
insert_query['ticker'] = l.ticker
insert_query['inserted_on'] = datetime.now()
try:
insert_query['datetime'] = datetime.strptime( date_inst, '%Y-%m-%d')
except ValueError:
__write( 'ValueError, date contains unparsed things. OK to ignore.'+ str(insert_query) )
for attr in daily_list[date_inst].keys(): #attr will usually be close, close_adj, open, high, low, volume
insert_query[attr] = daily_list[date_inst][attr]
try:
db.insert( insert_query )
except pymongo.errors.DuplicateKeyError:
#'Duplicate Keys error'
failed_inserts += 1
# pass
break;
except:
failed_inserts += 1
#TODO: Consider break here. as soon as you start getting DuplicateKeyErrors means that previous data already exists. Probably no point looking ahead
try:
__write( 'Dates : '+ daily_list.keys()[-1]+ '-'+ daily_list.keys()[0]+ ',', )
__write( 'nPoints : %4d, Failed Inserts : %4d' %( len(daily_list.keys()) , failed_inserts ) )
except:
__write( '[Error]daily_list was possibly empty')
__write( 'Time taken for %s : %4.2fs' %(l.ticker, time.time() - startTime ) )
__write( 'Total Time taken : %4.2fs' %(time.time() - startTimeTotal) )
__write( 'Finished on '+ str(time.ctime()) )
log_server( 'Total Time taken : %4.2fs' %(time.time() - startTimeTotal) )
log_server( 'Finished on '+ str(time.ctime()) )