-
Notifications
You must be signed in to change notification settings - Fork 0
/
chatbot copy 2.py
426 lines (331 loc) · 14.3 KB
/
chatbot copy 2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
from flask import Flask, render_template, request, redirect, url_for,flash, jsonify
import nltk
import numpy as np
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from bcrypt import hashpw, checkpw, gensalt
import mysql.connector
from mysql.connector import Error
from uuid import uuid4
import pandas as pd
import speech_recognition as sr
import pyttsx3
import matplotlib.pyplot as plt
from io import BytesIO
import base64
app = Flask(__name__)
app.secret_key = '123'
FILE_PATH = 'All_Info.txt'
f = open(FILE_PATH, 'r', errors='ignore')
raw = f.read()
raw = raw.lower()
nltk.download('punkt')
nltk.download('wordnet')
sentence_tokens = nltk.sent_tokenize(raw)
word_tokens = nltk.word_tokenize(raw)
[sentence_tokens[:2], word_tokens[:2]]
lemmer = nltk.stem.WordNetLemmatizer()
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def lem_tokens(tokens):
return [lemmer.lemmatize(token) for token in tokens]
def lem_normalize(text):
return lem_tokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
GREETING_INPUTS = ('hello', 'hi', 'greetings', 'sup', 'what\'s up', 'hey',)
GREETING_RESPONSES = ['hi', 'hey', 'hi there', 'hello', 'I am glad! You are talking to me']
def greeting(sentence):
for word in sentence.split():
if word.lower() in GREETING_INPUTS:
return random.choice(GREETING_RESPONSES)
general_info = ['college', 'located', 'wce']
placement_info = ['placement', 'tpo', 'package']
library_info = ['library', 'book']
credit_system = ['credit', 'system']
course_content = ['course','content']
fees_info = ['fees', 'open', 'obc']
other = []
keyword_lists = {
'placement_info': placement_info,
'general_info': general_info,
'library_info': library_info,
'credit_system': credit_system,
'course_content': course_content,
'fees_info': fees_info,
'other': other
}
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import linear_kernel
# Initialize the text-to-speech engine
engine = pyttsx3.init()
engine_busy = True # Add this variable to track whether the engine is busy
def response(user_response):
robo_response = ''
sentence_tokens.append(user_response)
# Append the user response to the dataset
vectorizer = TfidfVectorizer(tokenizer=lem_normalize, stop_words='english')
tfidf = vectorizer.fit_transform(sentence_tokens)
# Calculate cosine similarity
values = cosine_similarity(tfidf[-1], tfidf)
idx = values.argsort()[0][-2]
flat = values.flatten()
flat.sort()
req_tfidf = flat[-2]
if req_tfidf == 0:
robo_response = '{} Sorry, I don\'t understand you. How can I improve?'.format(robo_response)
else:
robo_response = sentence_tokens[idx] + ' Any other questions or feedback?'
# Update count in freqt_query table
list_name = ''
max_matches = 0
for name, keyword_list in keyword_lists.items():
matches = sum([1 for keyword in keyword_list if keyword in robo_response])
if matches > max_matches:
max_matches = matches
list_name = name
try:
if db.is_connected():
cursor = db.cursor()
# Check if the query already exists in freqt_query table
select_query = "SELECT count FROM freqt_query WHERE query = %s"
cursor.execute(select_query, (list_name,))
result = cursor.fetchone()
if result:
# If the query exists, update the count
update_query = "UPDATE freqt_query SET count = count + 1 WHERE query = %s"
cursor.execute(update_query, (list_name,))
else:
# If the query does not exist, insert a new row
insert_query = "INSERT INTO freqt_query (query, count) VALUES (%s, 1)"
cursor.execute(insert_query, (list_name,))
db.commit()
cursor.close()
except Error as e:
print(f"Error updating freqt_query table: {e}")
# Convert text response to speech
global engine_busy
if not engine_busy:
engine_busy = True
try:
engine.say(robo_response)
engine.runAndWait()
except Exception as e:
print(f"Error converting text to speech: {e}")
finally:
engine_busy = False
# Remove the user response from sentence_tokens
sentence_tokens.pop() # Remove the last item, which is the user's input
return robo_response
# def response(user_response):
# robo_response = ''
# sentence_tokens.append(user_response)
# # Append the user response to the dataset
# vectorizer = TfidfVectorizer(tokenizer=lem_normalize, stop_words='english')
# tfidf = vectorizer.fit_transform(sentence_tokens)
# # Calculate cosine similarity
# values = cosine_similarity(tfidf[-1], tfidf)
# idx = values.argsort()[0][-2]
# flat = values.flatten()
# flat.sort()
# req_tfidf = flat[-2]
# if req_tfidf == 0:
# robo_response = '{} Sorry, I don\'t understand you. How can I improve?'.format(robo_response)
# else:
# robo_response = sentence_tokens[idx] + ' Any other questions or feedback?'
# max_matches = 0
# list_name = ''
# for name, keyword_list in keyword_lists.items():
# matches = sum([1 for keyword in keyword_list if keyword in robo_response])
# if matches > max_matches:
# max_matches = matches
# list_name = name
# # Append the list name to ans.txt
# with open('ans.txt', 'a') as f:
# f.write(list_name + '\n')
# # Convert text response to speech
# global engine_busy
# if not engine_busy:
# engine_busy = True
# try:
# engine.say(robo_response)
# engine.runAndWait()
# except Exception as e:
# print(f"Error converting text to speech: {e}")
# finally:
# engine_busy = False
# # Remove the user response from sentence_tokens
# sentence_tokens.pop() # Remove the last item, which is the user's input
# return robo_response
db = mysql.connector.connect(
host='localhost',
user='root',
password='Anuraggb',
database='wce_chatbot',
port=3306
)
if db.is_connected():
print("Connected to MySQL database")
else:
print("Failed to connect to MySQL database")
cursor = db.cursor()
@app.route('/faq')
def faq():
try:
query_time_series = "SELECT query, query_date FROM topquery WHERE query_date IS NOT NULL"
cursor.execute(query_time_series)
top_queries_time_series = cursor.fetchall()
df_time_series = pd.DataFrame(top_queries_time_series, columns=['query', 'query_date'])
df_time_series = df_time_series.dropna(subset=['query_date'])
df_time_series['query_date'] = pd.to_datetime(df_time_series['query_date']).dt.date
query_counts_time_series = df_time_series.groupby('query_date').size()
# Plotting the time series graph
plt.figure(figsize=(10, 6))
query_counts_time_series.plot(kind='line', marker='o', color='b')
plt.title('Number of Queries Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Queries')
# Save the time series graph to a bytes object
img_time_series = BytesIO()
plt.savefig(img_time_series, format='png')
img_time_series.seek(0)
chart_url_time_series = base64.b64encode(img_time_series.getvalue()).decode()
img_time_series.close()
# Query to fetch data for the pie chart
query_pie_chart = "SELECT count, query FROM freqt_query"
cursor.execute(query_pie_chart)
# Fetching query results for the pie chart
top_queries_pie_chart = cursor.fetchall()
# Extracting data for the pie chart
queries_pie_chart = [query[1] for query in top_queries_pie_chart]
counts_pie_chart = [query[0] for query in top_queries_pie_chart]
# Creating Pie Chart
plt.figure(figsize=(8, 6))
plt.pie(counts_pie_chart, labels=queries_pie_chart, autopct='%1.1f%%', startangle=140)
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title('Top Queries Distribution')
# Save the pie chart to a bytes object
img_pie_chart = BytesIO()
plt.savefig(img_pie_chart, format='png')
img_pie_chart.seek(0)
chart_url_pie_chart = base64.b64encode(img_pie_chart.getvalue()).decode()
img_pie_chart.close()
topquery_query = "SELECT count(*),query FROM topquery where query is not null group by query order by count(*) desc limit 14 "
cursor.execute(topquery_query)
topquery_data = cursor.fetchall()
# Render the FAQ page with both charts and the top query data
return render_template('faq.html', chart_time_series=chart_url_time_series, chart_pie_chart=chart_url_pie_chart, topquery_data=topquery_data)
except Error as e:
print(f"Error: {e}")
@app.route('/', methods=['GET', 'POST'])
def login():
if request.method == 'POST':
# Check login credentials here
username = request.form.get('username')
password = request.form.get('password')
cursor.execute('SELECT * FROM users WHERE username=%s', (username,))
user = cursor.fetchone()
if user and checkpw(password.encode('utf-8'), user[2].encode('utf-8')):
return redirect(url_for('home'))
else:
return "Invalid credentials. Please try again."
return render_template('index.html')
@app.route('/signup', methods=['GET', 'POST'])
def signup():
if request.method == 'POST':
username = request.form.get('username')
password = request.form.get('password')
# Check if the username already exists in the database
cursor.execute('SELECT * FROM users WHERE username = %s', (username,))
existing_user = cursor.fetchone()
if existing_user:
flash('Username already exists. login .', 'error')
return render_template('index.html')
# Hash the password before storing it
hashed_password = hashpw(password.encode('utf-8'), gensalt())
# Insert user into the database
cursor.execute('INSERT INTO users (username, password) VALUES (%s, %s)', (username, hashed_password))
db.commit()
return redirect(url_for('home'))
return render_template('signup.html')
def save_feedback(feedback):
with open('feedback.txt', 'a') as file:
file.write(feedback + '\n')
def generate_short_user_id():
return str(uuid4())[:10] # Generate a shorter user ID, e.g., the first 10 characters of the UUID
def insert_into_database(user_query):
try:
if db.is_connected():
print("Connected to the database")
cursor = db.cursor()
user_id = generate_short_user_id() # Generate a shorter user ID
insert_query = "INSERT INTO TopQuery (userId, query,query_date) VALUES (%s, %s,CURDATE())"
cursor.execute(insert_query, (user_id, user_query))
db.commit()
cursor.close()
print("Data inserted into the database")
except Error as e:
print(f"Error: {e}")
@app.route("/home")
def home():
return render_template("home.html")
@app.route('/get_response', methods=['POST'])
def get_response():
try:
user_response = None
# Check if the request contains an audio file
if 'audio_input' in request.files:
recognizer = sr.Recognizer()
audio_file = request.files['audio_input']
# Handle audio processing (speech-to-text)
try:
with sr.AudioFile(audio_file) as source:
audio_data = recognizer.record(source)
audio_text = recognizer.recognize_google(audio_data)
user_response = audio_text.lower()
except sr.UnknownValueError:
user_response = "Sorry, I could not understand the audio."
except sr.RequestError as e:
user_response = f"Error accessing the Google Speech Recognition API: {e}"
# If no audio input or the processing failed, use text input
if user_response is None:
user_response = request.form.get('user_input', '').lower()
if user_response != 'bye':
if user_response == 'thanks' or user_response == 'thank you':
return "You're welcome!"
else:
if greeting(user_response) is not None:
return greeting(user_response)
else:
insert_into_database(user_response)
return response(user_response)
sentence_tokens.remove(user_response)
else:
return "Bye! Have a great day. Please Provide Feedback"
except Exception as e:
print(f"Error in get_response: {e}")
return "An error occurred"
# Create a new route to handle user feedback
@app.route('/feedback', methods=['GET','POST'])
def handle_feedback():
if request.method == 'POST':
# Retrieve feedback data from the form
response = int(request.form.get('response', 0))
correctness = int(request.form.get('correctness', 0))
clarity = int(request.form.get('clarity', 0))
comment = request.form.get('comment', '')
try:
if db.is_connected():
cursor = db.cursor()
insert_query = "INSERT INTO feedbacked (response, correctness, clarity, comment) VALUES (%s, %s, %s, %s)"
cursor.execute(insert_query, (response, correctness, clarity, comment))
db.commit()
cursor.close()
flash('Feedback submitted successfully!', 'success')
else:
flash('Failed to submit feedback. Please try again later.', 'error')
except Exception as e:
print(f"Error: {e}")
flash('An error occurred while submitting your feedback. Please try again later.', 'error')
return redirect('/feedback.html')
if __name__ == "__main__":
app.run(debug=True)