-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeolocator.py
91 lines (72 loc) · 2.62 KB
/
geolocator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import hashlib
import re
import numpy as np
# The geolocators in geopy that do not expect api_key
from geopy.geocoders import GeocodeFarm, Yandex, ArcGIS
from db import Session
from db import Query
locators = [GeocodeFarm(), ArcGIS()]
def _query(session, hashcode, provider):
return (
session.query(Query)
.filter(Query.hashcode == hashcode, Query.provider == provider)
.first()
)
def cached_query(address, provider):
address = re.sub(r"\s+", " ", address.upper())
session = Session(expire_on_commit=False)
provider_name = provider.__class__.__name__
hashcode = hashlib.md5(bytes(address, encoding="utf-8")).hexdigest()
cached = _query(session, hashcode, provider_name)
if not cached:
try:
response = provider.geocode(address)
except Exception as e:
print(e)
response = None
if response:
cached = Query(
hashcode=hashcode,
address=address,
latitude=response.latitude,
longitude=response.longitude,
provider=provider_name,
)
session.add(cached)
session.commit()
# session.expunge(cached)
# session.expunge_all()
session.close()
return cached
class Coordinates:
def __init__(self, latitude, longitude):
self.latitude = latitude
self.longitude = longitude
def __repr__(self):
return "%s, %s" % (self.latitude, self.longitude)
def reject_outliers(data, alpha=90):
mask = (data.lat < np.percentile(data.lat, alpha)) & (
data.long < np.percentile(data.long, alpha)
)
return data[mask]
def geocode(address):
# candidates = np.array([], dtype=[('long',float),('lat', float)])
candidates = []
for locator in locators:
rloc = cached_query(address, locator)
if rloc:
# print(rloc.raw)
candidates.append((rloc.latitude, rloc.longitude))
# coords = np.core.records.fromrecords([x.values() for x in candidates], names=candidates[0].keys())
# candidates.append(rloc)
# a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', int), ('y', float)])
if not candidates:
return None
coords = np.core.records.fromrecords(candidates, names="lat,long")
if len(coords) > 2:
coords = reject_outliers(coords)
# return {"latitude": np.average(coords.lat), "longitude": np.average(coords.long)}
return Coordinates(np.average(coords.lat), np.average(coords.long))
if __name__ == "__main__":
result = geocode("VIA DELLA CASETTA MATTEI 205")
print(result)