-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrapeLines.py
67 lines (55 loc) · 2.07 KB
/
scrapeLines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
__author__ = 'tanyacashorali'
import urllib2
import time
import re
import random
import datetime
import os
import sqlite3
import pandas as pd
from urlparse import urlparse
from bs4 import BeautifulSoup as bs
from datetime import date
db = sqlite3.connect('/home/ec2-user/sports/sports.db')
x=random.randint(1, 20)
time.sleep(x)
url = urllib2.urlopen('http://www.covers.com/odds/basketball/college-basketball-odds.aspx')
soup = bs(url.read(), ['fast', 'lxml'])
tables = soup.findAll('table')
lines = tables[2]
away = lines.findAll('div', {'class':'team_away'})
home = lines.findAll('div', {'class':'team_home'})
covers = lines.findAll('td', {'class':'covers_top'})
today = date.today()
today = today.strftime("%m/%d/%Y")
lines = []
spreads = []
for i in range(0, len(covers)):
line = covers[i].find('div', {'class':'line_top'}).text
line_number = re.search('\d+\.*\d*|\w+', line).group(0)
lines.append(line_number)
spread = covers[i].find('div', {'class':'covers_bottom'}).text
spread_number = re.search('[-|+]\d+\.*\d*|\w+', spread).group(0)
spreads.append(spread_number)
a_teams = filter(None, [a.strong for a in away])
h_teams = filter(None, [h.strong for h in home])
away_teams = [a.text for a in a_teams]
home_teams = [h.text for h in h_teams]
## remove @ symbol for home teams
home_teams = [re.sub('@', '', h) for h in home_teams]
date_time = str(datetime.datetime.now())
for i in range(0, len(away_teams)):
try:
with db:
db.execute('''INSERT INTO NCAALines(away_team, home_team, line, spread, game_date, game_time) VALUES(?,?,?,?,?,?)''', (away_teams[i], home_teams[i], lines[i], spreads[i], today, date_time))
db.commit()
except sqlite3.IntegrityError:
print 'Record Exists'
for i in range(0, len(away_teams)):
try:
with db:
db.execute('''INSERT INTO NCAAteamlookup(covers_team, espn_abbr) VALUES (?,?)''', (away_teams[i], None))
db.execute('''INSERT INTO NCAAteamlookup(covers_team, espn_abbr) VALUES (?,?)''', (home_teams[i], None))
except:
print 'Record Exists'
db.close()