forked from knowitall/yelp-dataset-challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
polarity_computer.py
31 lines (25 loc) · 953 Bytes
/
polarity_computer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from util import *
from collections import Counter
REVIEW_DATA_FILE = './data/yelp_phoenix_academic_dataset/' + \
'yelp_academic_dataset_review.json'
def compute_polarities():
"""Compute the polarity of each value in the final extraction set by
averaging the stars of all reviews that value appeared in
"""
stars = {}
for review_data in load_json_lines(REVIEW_DATA_FILE):
stars[review_data['review_id']] = float(review_data['stars'])
value_scores = Counter()
value_counts = Counter()
extractions = load_json('./data/extractions_final.json')
for p in extractions:
for a in extractions[p]:
for v in extractions[p][a]:
for [s, r] in extractions[p][a][v]:
value_scores[v] += stars[r]
value_counts[v] += 1
for v in value_scores:
value_scores[v] /= value_counts[v]
write_json(value_scores, './data/polarities.json')
if __name__ == '__main__':
compute_polarities()