-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
120 lines (98 loc) · 3.08 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# -*- coding: utf-8 -*-
# !/usr/bin/python
__author__ = "biavarone"
import re
import spacy
nlp = spacy.load('en_core_web_sm')
def is_adjective(token):
if token.pos_ == 'ADJ':
return True
else:
return False
def is_noun(token):
if token.pos_ == 'NOUN':
return True
else:
return False
def is_verb(token):
if token.pos_ == 'VERB':
return True
else:
return False
def is_auxiliary_verb(token):
if token.pos_ == 'AUX':
return True
else:
return False
def is_pronoun(token):
if token.pos_ == 'PRON':
return True
else:
return False
def find_verbal_head(token):
if token.head.pos_ == 'VERB' or token.head.pos_ == 'AUX':
# gives back the position of the verb in the sentence
return token.head
else:
if token.head != token.head.head:
return find_verbal_head(token.head)
else:
return None
def check_negation(token):
negatives = ['nothing', 'nobody', 'no']
# TODO collapse repeated things if possible
# NEGATION WITH ADJECTIVES
if is_adjective(token):
# nothing to be/feel upset about
if is_verb(token.head) or is_auxiliary_verb(token.head):
if is_pronoun(token.head.head) and token.head.head.lemma_ in negatives:
return True
# I am not busy
for child in token.head.children:
if child.dep_ == 'neg':
return True
# Not too bad
for child in token.children:
if child.dep_ == 'neg':
return True
# Not that happy
if token.head.pos_ == 'amod' or token.head.pos_ == 'advmod':
if token.head.head and token.head.head.pos_ == 'neg':
return True
# NEGATION WITH VERBS
if is_verb(token):
# I have nothing stressing me
if is_pronoun(token.head) and token.head.lemma_ in negatives:
return True
# I have nobody stressing me
for child in token.children:
if is_pronoun(child) and child.lemma_ in negatives:
return True
# I have no one stressing me
if is_noun(token.head):
for child in token.head.children:
if child.lemma_ in negatives:
return True
# I'm not working
for child in token.children:
if child.dep_ == 'neg':
return True
# NEGATION WITH NOUNS
if is_noun(token):
# find the head of the noun
verb = find_verbal_head(token)
if verb:
for child in verb.children:
if child.dep_ == 'neg':
return True
# expression like "no stress"
for child in token.children:
if child.lemma_ == 'no':
return True
# ?
if token.head.pos_ == 'amod' or token.head.pos_ == 'advmod':
if token.head.head and token.head.head.pos_ == 'neg':
return True
def substitute_dash(sentence):
sentence = re.sub(r' - ', '. ', sentence)
return sentence