-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsentiment_analysis.py
45 lines (32 loc) · 1.13 KB
/
sentiment_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Importing libraries
import pickle
import numpy as np
from nltk.tokenize import word_tokenize
from sklearn.ensemble import VotingClassifier
classifier_f = open("pickled/Voting_classifier.pickle", "rb")
Voting_classifier = pickle.load(classifier_f)
classifier_f.close()
print("Sentiment Analysis Module Loaded!")
features_f = open("pickled/features.pickle", "rb")
word_features = pickle.load(features_f)
features_f.close()
print("Word Features Loaded!")
def find_features(document):
'''
document = list of all the words in a review
'''
# To extract only the unique words in a document
words = word_tokenize(document)
features = {}
#Set true or false based on the if the word is
#present in the top 3000 words
for w in word_features:
features[w] = (w in words)
return features
def sentiment_module(text):
feats = find_features(text)
feats = [items for (keys, items) in feats.items()]
feats = np.array(feats).reshape(1, -1)
prediction = Voting_classifier.predict(feats)
confidence = Voting_classifier.predict_proba(feats)
return prediction, np.max(confidence)