-
Notifications
You must be signed in to change notification settings - Fork 1
/
tagger.py
107 lines (78 loc) · 3.58 KB
/
tagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# -*- coding: utf-8 -*-
__author__ = "biavarone"
from utils import *
from activities import activities_matcher
from emotions import emotions_matcher
from interactions import interactions_matcher
from places import places_matcher
def match_places(sentence):
found = places_matcher(sentence)
all_matches = []
for match_id, start, end in found:
if check_negation(sentence[start]):
pass
else:
all_matches.append((match_id, start, end))
return all_matches
def match_interactions(sentence):
found = interactions_matcher(sentence)
all_matches = [] # TODO longest match
for match_id, start, end in found:
if check_negation(sentence[start]):
pass
else:
all_matches.append((match_id, start, end))
return all_matches
def match_emotions(sentence):
found = emotions_matcher(sentence)
all_matches = []
for match_id, start, end in found:
if check_negation(sentence[start]):
pass
else:
all_matches.append((match_id, start, end))
return all_matches
def match_activities(sentence):
found = activities_matcher(sentence) # all matches, to be checked for negation
all_matches = [] # all matches # TODO find longest match if needed
for match_id, start, end in found:
if check_negation(sentence[start]):
pass
else:
all_matches.append((match_id, start, end))
return all_matches
if __name__ == "__main__":
sentence = "Sentence to be analyzed"
# analyze sentence
sentence = substitute_dash(sentence) # substitute dash with full stop for better parsing
sentence = nlp(sentence)
for sent in sentence.sents:
sent = sent.as_doc()
activities_tags = match_activities(sent)
if activities_tags:
# match_id is a number associated to the matched category, i.e. 5133706519360878345 == 'leisure'
for match_id, start, end in activities_tags:
rule_id = nlp.vocab.strings[match_id] # get the unicode ID, i.e. 'leisure'
span = sent[start: end] # get the matched slice of the sentence
print("activities: {rule_id}: {span}")
emotions_tags = match_emotions(sent)
if emotions_tags:
# match_id is a number associated to the matched category, i.e. 9391526999249888540 == 'sad'
for match_id, start, end in emotions_tags:
rule_id = nlp.vocab.strings[match_id] # get the unicode ID, i.e. 'sad'
span = sent[start: end] # get the matched slice of the sentence
print(f"emotions: {rule_id}: {span}")
interactions_tags = match_interactions(sent)
if interactions_tags:
for match_id, start, end in interactions_tags:
# match_id is a number associated to the matched category, i.e. 18292453351080475948 == 'family'
rule_id = nlp.vocab.strings[match_id] # get the unicode ID, i.e. 'family'
span = sent[start: end] # get the matched slice of the sentence
print("interactions: {rule_id}: {span}")
places_tags = match_places(sent)
if places_tags:
for match_id, start, end in places_tags:
# match_id is a number associated to the matched category, i.e. 12006852138382633966 == 'home'
rule_id = nlp.vocab.strings[match_id] # get the unicode ID, i.e. 'home'
span = sent[start: end] # get the matched slice of the sentence
print("places: {rule_id}: {span}")