-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathbroize.py
272 lines (222 loc) · 8.97 KB
/
broize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
from __future__ import print_function, unicode_literals
import random
import logging
import os
os.environ['NLTK_DATA'] = os.getcwd() + '/nltk_data'
from textblob import TextBlob
from config import FILTER_WORDS
logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
# start:example-hello.py
# Sentences we'll respond with if the user greeted us
GREETING_KEYWORDS = ("greetings", "jai siya ram", "sup", "what's up","hello", "hi", "hey", "hey there", "hola", "greetings")
GREETING_RESPONSES = ["'sup bro", "hey", "*nods*", "jai siya ram!","hey you get my ping?","hi", "hello there", "hiya", "heya", "*nods*", "hola"]
def check_for_greeting(sentence):
"""If any of the words in the user's input was a greeting, return a greeting response"""
for word in sentence.words:
if word.lower() in GREETING_KEYWORDS:
return random.choice(GREETING_RESPONSES)
# start:example-none.py
# Sentences we'll respond with if we have no idea what the user just said
NONE_RESPONSES = [
"Not listening...",
"Can't hear you man!",
"*smirks*",
"uh whatever",
"meet me at our lair, bro?",
"code hard bro",
"want to bro down and crush code?",
"I'd like to add you to my professional network on LinkedIn",
"Come let's go work on github!",
"Did someone pester you through? My sincerest condolences!",
"That is not a very nice thing to say!"
]
# end
# start:example-self.py
# If the user tries to tell us something about ourselves, use one of these responses
COMMENTS_ABOUT_SELF = [
"You're just jealous!",
"I worked really hard on that. Like reeaalllyyyy hard!",
"My being a bot score is {}".format(random.randint(100, 500)),
]
# end
class UnacceptableUtteranceException(Exception):
"""Raise this (uncaught) exception if the response was going to trigger our blacklist"""
pass
def starts_with_vowel(word):
"""Check for pronoun compability -- 'a' vs. 'an'"""
return True if word[0] in 'aeiou' else False
def broback(sentence):
"""Main program loop: select a response for the input sentence and return it"""
logger.info("Broback: respond to %s", sentence)
resp = respond(sentence)
return resp
# start:example-pronoun.py
def find_pronoun(sent):
"""Given a sentence, find a preferred pronoun to respond with. Returns None if no candidate
pronoun is found in the input"""
pronoun = None
for word, part_of_speech in sent.pos_tags:
# Disambiguate pronouns
if part_of_speech == 'PRP' and word.lower() == 'you':
pronoun = 'I'
elif part_of_speech == 'PRP' and word == 'I':
# If the user mentioned themselves, then they will definitely be the pronoun
pronoun = 'You'
return pronoun
# end
def find_verb(sent):
"""Pick a candidate verb for the sentence."""
verb = None
pos = None
for word, part_of_speech in sent.pos_tags:
if part_of_speech.startswith('VB'): # This is a verb
verb = word
pos = part_of_speech
break
return verb, pos
def find_noun(sent):
"""Given a sentence, find the best candidate noun."""
noun = None
if not noun:
for w, p in sent.pos_tags:
if p == 'NN': # This is a noun
noun = w
break
if noun:
logger.info("Found noun: %s", noun)
return noun
def find_adjective(sent):
"""Given a sentence, find the best candidate adjective."""
adj = None
for w, p in sent.pos_tags:
if p == 'JJ': # This is an adjective
adj = w
break
return adj
# start:example-construct-response.py
def construct_response(pronoun, noun, verb):
"""No special cases matched, so we're going to try to construct a full sentence that uses as much
of the user's input as possible"""
resp = []
if pronoun:
resp.append(pronoun)
# We always respond in the present tense, and the pronoun will always either be a passthrough
# from the user, or 'you' or 'I', in which case we might need to change the tense for some
# irregular verbs.
if verb:
verb_word = verb[0]
if verb_word in ('be', 'am', 'is', "'m"): # This would be an excellent place to use lemmas!
if pronoun.lower() == 'you':
# The bot will always tell the person they aren't whatever they said they were
resp.append("aren't really")
else:
resp.append(verb_word)
if noun:
pronoun = "an" if starts_with_vowel(noun) else "a"
resp.append(pronoun + " " + noun)
resp.append(random.choice(("tho", "bro", "lol", "dude", "*smirks*", "")))
return " ".join(resp)
# end
# start:example-check-for-self.py
def check_for_comment_about_bot(pronoun, noun, adjective):
"""Check if the user's input was about the bot itself, in which case try to fashion a response
that feels right based on their input. Returns the new best sentence, or None."""
resp = None
if pronoun == 'I' and (noun or adjective):
if noun:
if random.choice((True, False)):
resp = random.choice(SELF_VERBS_WITH_NOUN_CAPS_PLURAL).format(**{'noun': noun.pluralize().capitalize()})
else:
resp = random.choice(SELF_VERBS_WITH_NOUN_LOWER).format(**{'noun': noun})
else:
resp = random.choice(SELF_VERBS_WITH_ADJECTIVE).format(**{'adjective': adjective})
return resp
# Template for responses that include a direct noun which is indefinite/uncountable
SELF_VERBS_WITH_NOUN_CAPS_PLURAL = [
"My last startup totally crushed the {noun} vertical",
"Were you aware I was a serial entrepreneur in the {noun} sector?",
"My startup is Uber for {noun}",
"I really consider myself an expert on {noun}",
"Actually I am a very {noun} person"
]
SELF_VERBS_WITH_NOUN_LOWER = [
"Yeah but I know a lot about {noun}",
"My bros always ask me about {noun}",
]
SELF_VERBS_WITH_ADJECTIVE = [
"I'm personally building the {adjective} Economy",
"I consider myself to be a {adjective}preneur",
]
# end
def preprocess_text(sentence):
"""Handle some weird edge cases in parsing, like 'i' needing to be capitalized
to be correctly identified as a pronoun"""
cleaned = []
words = sentence.split(' ')
for w in words:
if w == 'i':
w = 'I'
if w == "i'm":
w = "I'm"
cleaned.append(w)
return ' '.join(cleaned)
# start:example-respond.py
def respond(sentence):
"""Parse the user's inbound sentence and find candidate terms that make up a best-fit response"""
cleaned = preprocess_text(sentence)
parsed = TextBlob(cleaned)
# Loop through all the sentences, if more than one. This will help extract the most relevant
# response text even across multiple sentences (for example if there was no obvious direct noun
# in one sentence
pronoun, noun, adjective, verb = find_candidate_parts_of_speech(parsed)
# If we said something about the bot and used some kind of direct noun, construct the
# sentence around that, discarding the other candidates
resp = check_for_comment_about_bot(pronoun, noun, adjective)
# If we just greeted the bot, we'll use a return greeting
if not resp:
resp = check_for_greeting(parsed)
if not resp:
# If we didn't override the final sentence, try to construct a new one:
if not pronoun:
resp = random.choice(NONE_RESPONSES)
elif pronoun == 'I' and not verb:
resp = random.choice(COMMENTS_ABOUT_SELF)
else:
resp = construct_response(pronoun, noun, verb)
# If we got through all that with nothing, use a random response
if not resp:
resp = random.choice(NONE_RESPONSES)
logger.info("Returning phrase '%s'", resp)
# Check that we're not going to say anything obviously offensive
filter_response(resp)
return resp
def find_candidate_parts_of_speech(parsed):
"""Given a parsed input, find the best pronoun, direct noun, adjective, and verb to match their input.
Returns a tuple of pronoun, noun, adjective, verb any of which may be None if there was no good match"""
pronoun = None
noun = None
adjective = None
verb = None
for sent in parsed.sentences:
pronoun = find_pronoun(sent)
noun = find_noun(sent)
adjective = find_adjective(sent)
verb = find_verb(sent)
logger.info("Pronoun=%s, noun=%s, adjective=%s, verb=%s", pronoun, noun, adjective, verb)
return pronoun, noun, adjective, verb
# end
# start:example-filter.py
def filter_response(resp):
"""Don't allow any words to match our filter list"""
tokenized = resp.split(' ')
for word in tokenized:
if '@' in word or '#' in word or '!' in word:
return random.choice(NONE_RESPONSES)
#raise UnacceptableUtteranceException()
for s in FILTER_WORDS:
if word.lower().startswith(s):
return random.choice(NONE_RESPONSES)
#raise UnacceptableUtteranceException()
# end