This repository has been archived by the owner on Jan 18, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
trips-62.py
153 lines (125 loc) · 5.06 KB
/
trips-62.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 29 20:12:40 2013
@author: elwillow
"""
from BeautifulSoup import BeautifulSoup
import urllib2
import json
import time
import logging
LOCAL = True
# Logging
logger = logging.getLogger("stops")
logger.setLevel(logging.INFO)
file_log = logging.FileHandler("trip-test.log")
logger.addHandler(file_log)
std_log = logging.StreamHandler()
logger.addHandler(std_log)
# Prettify log
formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s:%(message)s')
file_log.setFormatter(formatter)
# require, in order: stop, route number + route direction and date.
URL_SHED="http://m.sto.ca/fr/horaires/resultats/?action=getCompletePassingTimes&stop=:%s&route=%s&date=%s"
URL_STOPS="http://m.sto.ca/fr/horaires/?action=getStops&date=%s&time=%s&route=%s"
DATE="2013-10-30"
TIME = "12%3A00+AM"
def stoUrl(s):
# don't ask about the replace
return urllib2.quote(s.encode("utf8"), ":/;?&=%").replace("%27","%2527")
# For the purpose of testing, the 62 only have 19 stops according to the date pulled from the site.
if LOCAL:
logger.info("Loading route 62 for testing")
routes = json.loads("""[{
"Identifiers": { "string": ":262"},
"PublicIdentifier": "62",
"Description": "GR\u00c9BER nord",
"Direction": "South",
"DirectionName": "STATION LA GAPPE via GR\u00c9BER",
"ServiceMode": "Bus",
"ServiceType": "Regular",
"Site": { "Identifier": ":","Name": "STO"},
"InternalIndexes": { "int": [1891, 1143, 2662, 1658, 2048, 3007, 3467, 150, 918, 533 ,3017, 1187, 4647, 3757, 621, 1212, 4398, 1688, 2818, 2774, 2118, 3160, 4321, 11928] },
"InternalIndexes2": { "int": [1891, 3467, 4398, 2774, 11928] }
}]""")
else:
routes = json.load(open("working-data/route-week.json"))
logger.info("Loading stops list")
for route in routes:
logger.info("Processing route %s %s %s",route["PublicIdentifier"], route["Description"], route["Direction"])
# Getting the stops list for the route
routeString = "%s_%s" % (route["PublicIdentifier"], route["DirectionName"])
stopsUrl = URL_STOPS % (DATE, TIME, routeString)
logger.debug("Stop URL: %s", stopsUrl)
u = urllib2.urlopen(stoUrl(stopsUrl))
# If there is a malform URL it fail
if u.geturl() == "http://m.sto.ca/fr/maintenance/":
logger.warning("Site maintenance or something broke...")
exit()
rawStops = u.read()
logger.info("Loading JSON for the stop list")
routeStops = json.loads(rawStops)
# We have the stop for that route, now let's build the trip schedule
## Building the passageTimes array for a given route.
# It is 2 dimensional array: first level is the stop, second is the passage time.
routeTimes = []
routeInfos = []
for stop in routeStops:
# Building trips data
id = stop['Identifier'].replace(':','')
infoClean = {"id": id}
timesClean = []
logger.info("Looking up stop id %s", id)
infobusUrl = URL_SHED % (id, routeString,DATE)
logger.debug("Infobus URL: %s", infobusUrl)
s = urllib2.urlopen(stoUrl(infobusUrl))
if u.geturl() == "http://m.sto.ca/fr/maintenance/":
# In case something broke, quit
logger.warning("Site maintenance or something broke...")
exit()
logger.debug("HTML parsing")
infoSoup = BeautifulSoup(s.read())
logger.debug("Building timetable")
times = infoSoup.findAll("div", attrs={"class" : "timeHoraire"})
# Clean the result and add it to the main array
for t in times:
timesClean.append(t.contents[0])
routeTimes.append(timesClean)
logger.debug("Building info")
infoP = infoSoup.find("div", attrs={"class":"horaires-results"}).findAll("p")
infoClean["stop"] = infoP[1].contents[1].strip()
if len(infoP) == 5:
infoClean["infobus"] = infoP[2].contents[1].strip()
else:
infoClean["infobus"] = "None"
routeInfos.append(infoClean)
logger.info("Stop complete (%s)", infoClean["stop"])
logger.info("Process complete")
print "#############################"
for r in routeInfos:
print "%(stop)s (#%(infobus)s)," % r,
print ""
for i in xrange(len(routeTimes)):
for t in routeTimes:
print "%s," % (t[i], ),
print ""
#logger.info("## Parsing page")
#
#stopSoup = BeautifulSoup(rawStop)
#times = stopSoup.findAll("div", attrs={"class" : "timeHoraire"})
#route = stopSoup.find("div", attrs={"class" : "horaires-results"}).findAll("p")[0].contents[1].strip()
#stop = stopSoup.find("div", attrs={"class" : "horaires-results"}).findAll("p")[1].contents[1].strip()
#infobus = stopSoup.find("div", attrs={"class" : "horaires-results"}).findAll("p")[2].contents[1].strip()
#
#print "RESULTS"
#print "Stop", stop
#print "Route", route
#print "Infobus", infobus
#
## Build array
#stopTimes = []
#
#for time in times:
# stopTimes.append(time.contents[0])
#print stopTimes