Skip to content

Commit

Permalink
Added only necessary files and edited ignore
Browse files Browse the repository at this point in the history
  • Loading branch information
nlght committed May 9, 2019
1 parent 372ac0a commit 0e81f17
Show file tree
Hide file tree
Showing 27 changed files with 754 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
/data/
/.idea/
/__pycache__/
*.npy
*.tar.gz
Binary file added MeDown.wav
Binary file not shown.
Binary file added MeFour.wav
Binary file not shown.
Binary file added MeLeft.wav
Binary file not shown.
Binary file added MeOne.wav
Binary file not shown.
Binary file added MeRight.wav
Binary file not shown.
Binary file added MeThree.wav
Binary file not shown.
Binary file added MeTwo.wav
Binary file not shown.
Binary file added MumDown.wav
Binary file not shown.
Binary file added MumOne.wav
Binary file not shown.
Binary file added MumUp.wav
Binary file not shown.
199 changes: 199 additions & 0 deletions SPECtogram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import numpy
import scipy.io.wavfile
import matplotlib.pyplot as plt
#from scipy.fftpack import dct
from mfcc_bro import do_mfcc

def gimmeDaSPECtogram(input, window_size_ms=30.0, stride_ms=10.0, pre_emphasis=0.97, NFFT=512, triangular_filters=40, magnitude_squared=False, name=None):
#print(input)
sample_rate, signal = scipy.io.wavfile.read(input) # File assumed to be in the same directory
#print(sample_rate)
signal = signal[0:int(1.0 * sample_rate)] # Keep the first 3.5 seconds
paddedSignal = numpy.repeat(numpy.mean(signal[0:500]), 16000 - signal.shape[0])
numpy.append(signal, paddedSignal)
window_size_ms = window_size_ms/1000
stride_ms = stride_ms/1000

#ifitspadded = False


emphasized_signal = numpy.append(signal[0], signal[1:] - pre_emphasis * signal[:-1])
#emphasized_signal = signal
if emphasized_signal.shape[0] > 16000:
emphasized_signal = emphasized_signal[0:int(1.0 * sample_rate)]
elif emphasized_signal.shape[0] < 16000:
mean = numpy.mean(numpy.abs(emphasized_signal))
while emphasized_signal.shape[0] < 16000:
i = 0

#print(mean)
last_value = 0
for value in emphasized_signal[0:(16000-emphasized_signal.shape[0])]:
if i > 0 and numpy.abs(value) - numpy.abs(last_value) > 200:
break
last_value = value
i += 1

#paddedSignal = numpy.repeat(numpy.mean(emphasized_signal[0:500]), 16000 - signal.shape[0])
paddedSignal = emphasized_signal[0:i]
emphasized_signal = numpy.append(emphasized_signal, paddedSignal)
#print(emphasized_signal.shape)
#while emphasized_signal.shape[0] < 16000:
# distanceToEnd = 16000-emphasized_signal.shape[0]
# emphasized_signal = emphasized_signal[:emphasized_signal,]
# #numpy.append(emphasized_signal, [0], 0)
# print(emphasized_signal.shape)
window_size_ms, stride_ms = window_size_ms * sample_rate, stride_ms * sample_rate # Convert from seconds to samples
signal_length = len(emphasized_signal)
window_size_ms = int(round(window_size_ms))
stride_ms = int(round(stride_ms))
num_frames = int(numpy.ceil(
float(numpy.abs(signal_length - window_size_ms)) / stride_ms)) # Make sure that we have at least 1 frame

#print(sample_rate)
""" FIXED
print(len(signal)) #16k ofc
print(sample_rate) #16k ofc
print(len(emphasized_signal)) #16k ofc
print(frame_length) #480k, wth?
print(frame_step) #160k
print(num_frames) #3 motherfucking frames bois, is it frames per window?
"""


pad_signal_length = num_frames * stride_ms + window_size_ms
z = numpy.zeros((pad_signal_length - signal_length))
pad_signal = numpy.append(emphasized_signal,
z) # Pad Signal to make sure that all frames have equal number of samples without truncating any samples from the original signal

indices = numpy.tile(numpy.arange(0, window_size_ms), (num_frames, 1)) + numpy.tile(
numpy.arange(0, num_frames * stride_ms, stride_ms), (window_size_ms, 1)).T
frames = pad_signal[indices.astype(numpy.int32, copy=False)] #cast the array to be of type int32.

frames *= numpy.hamming(window_size_ms)
# frames *= 0.54 - 0.46 * numpy.cos((2 * numpy.pi * n) / (frame_length - 1)) # Explicit Implementation **

mag_frames = numpy.absolute(numpy.fft.rfft(frames, NFFT)) # Magnitude of the FFT
#plt.plot(mag_frames)
pow_frames = ((1.0 / NFFT) * ((mag_frames) ** 2)) # Power Spectrum
#plt.plot(pow_frames)

low_freq_mel = 0
high_freq_mel = (2595 * numpy.log10(1 + (sample_rate / 4) / 700)) #Ask liming shi #Why is this shit divided by 2? huh? is it because it's half of 8k that they are using? do we need to divide it by 4 then? # Convert Hz to Mel
mel_points = numpy.linspace(low_freq_mel, high_freq_mel, triangular_filters + 2) # Equally spaced in Mel scale
hz_points = (700 * (10 ** (mel_points / 2595) - 1)) # Convert Mel to Hz
bin = numpy.floor((NFFT + 1) * hz_points / sample_rate)

fbank = numpy.zeros((triangular_filters, int(numpy.floor(NFFT / 2 + 1))))
for m in range(1, triangular_filters + 1):
f_m_minus = int(bin[m - 1]) # left
f_m = int(bin[m]) # center
f_m_plus = int(bin[m + 1]) # right

for k in range(f_m_minus, f_m):
fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
for k in range(f_m, f_m_plus):
fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
filter_banks = numpy.dot(pow_frames, fbank.T)
filter_banks = numpy.where(filter_banks == 0, numpy.finfo(float).eps, filter_banks) # Numerical Stability

filter_banks = 20 * numpy.log10(filter_banks) # dB




"""
filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8)
plt.imshow(filter_banks.T, cmap=plt.cm.jet, aspect='auto')
plt.xticks(numpy.arange(0, (filter_banks.T).shape[1],
int((filter_banks.T).shape[1] / 4)),
['0s', '0.25s', '0.5s', '0.75s', '1s'])
ax = plt.gca()
ax.invert_yaxis()
plt.title('the spectrum image')
plt.show()
"""

"""
plt.subplot(312)
filter_banks = do_mfcc(filter_banks, upper_frequency_limit=4000, lower_frequency_limit=0, dct_coefficient_count=12)
# filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8)
plt.imshow(filter_banks.T, cmap=plt.cm.jet, aspect='auto')
plt.xticks(numpy.arange(0, (filter_banks.T).shape[1],
int((filter_banks.T).shape[1] / 4)),
['0s', '0.25s', '0.5s', '0.75s', '1s'])
plt.yticks(numpy.arange(1, (filter_banks.T).shape[0],
int((filter_banks.T).shape[0] / 4)),
['0', '3', '6', '9', '12'])
ax = plt.gca()
ax.invert_yaxis()
plt.title('the mfcc image')
plt.show()
"""



#plt.subplot(312)
filter_banks = do_mfcc(filter_banks, upper_frequency_limit=4000, lower_frequency_limit=0, dct_coefficient_count=12)
#print(filter_banks.shape)
## filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8)
#plt.imshow(filter_banks.T, cmap=plt.cm.jet, aspect='auto')
#plt.xticks(numpy.arange(0, (filter_banks.T).shape[1],
# int((filter_banks.T).shape[1] / 4)),
# ['0s', '0.25s', '0.5s', '0.75s', '1s'])
#plt.yticks(numpy.arange(1, (filter_banks.T).shape[0],
# int((filter_banks.T).shape[0] / 4)),
# ['0', '3', '6', '9', '12'])
#ax = plt.gca()
#ax.invert_yaxis()
#plt.show()

mfccs_graph = filter_banks.T

return mfccs_graph

#plt.imshow(filter_banks)

#plt.imshow(do_mfcc(filter_banks, upper_frequency_limit=4000, lower_frequency_limit=0, dct_coefficient_count=12))

#plt.show()

#mfcc plot

"""
plt.subplot(312)
filter_banks = do_mfcc(filter_banks, upper_frequency_limit=4000, lower_frequency_limit=0, dct_coefficient_count=12)
#filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8)
plt.imshow(filter_banks.T, cmap=plt.cm.jet, aspect='auto')
plt.xticks(numpy.arange(0, (filter_banks.T).shape[1],
int((filter_banks.T).shape[1] / 4)),
['0s', '0.25s', '0.5s', '0.75s', '1s'])
plt.yticks(numpy.arange(1, (filter_banks.T).shape[0],
int((filter_banks.T).shape[0]/4)),
['0', '3', '6', '9', '12'])
ax = plt.gca()
ax.invert_yaxis()
plt.title('the mfcc image')
#Spectrum
"""
"""
filter_banks -= (numpy.mean(filter_banks, axis=0) + 1e-8)
plt.imshow(filter_banks.T, cmap=plt.cm.jet, aspect='auto')
plt.xticks(numpy.arange(0, (filter_banks.T).shape[1],
int((filter_banks.T).shape[1] / 4)),
['0s', '0.25s', '0.5s', '0.75s', '1s'])
ax = plt.gca()
ax.invert_yaxis()
plt.title('the spectrum image')
"""

plt.show()




#gimmeDaSPECtogram("samples/left.wav", window_size_ms=30.0, stride_ms=10.0, pre_emphasis=0.97)
116 changes: 116 additions & 0 deletions all_preprocessing_done.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from preprocess import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical
from keras import backend as K
import tensorflow as tf
from SPECtogram import gimmeDaSPECtogram

# Second dimension of the feature is dim2
feature_dim_2 = 12

# Save data to array file first
save_data_to_array(max_len=feature_dim_2)

# # Loading train set and test set
X_train, X_test, y_train, y_test = get_train_test()

# # Feature dimension
feature_dim_1 = 97
channel = 1
epochs = 50
batch_size = 100
verbose = 1
labels_local, _, _ = get_labels()
num_classes = len(labels_local)

# Reshaping to perform 2D convolution
X_train = X_train.reshape(X_train.shape[0], feature_dim_1, feature_dim_2, channel)
X_test = X_test.reshape(X_test.shape[0], feature_dim_1, feature_dim_2, channel)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)




def get_model():
model = Sequential()
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(feature_dim_1, feature_dim_2, channel)))
model.add(Conv2D(48, kernel_size=(2, 2), activation='relu'))
model.add(Conv2D(120, kernel_size=(2, 2), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
return model

# Predicts one sample
def predict(filepath, model):
sample = wav2mfcc(filepath, feature_dim_2)
sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel)
return get_labels()[0][
np.argmax(model.predict(sample_reshaped))
]


model = get_model()
model.fit(X_train, y_train_hot, batch_size=batch_size, epochs=epochs, verbose=verbose, validation_data=(X_test, y_test_hot))

# serialize model to JSON
model_json = model.to_json()
with open("model_4.json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model_4.h5")
print("Saved model to disk")









def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
"""
Freezes the state of a session into a pruned computation graph.
Creates a new computation graph where variable nodes are replaced by
constants taking their current value in the session. The new graph will be
pruned so subgraphs that are not necessary to compute the requested
outputs are removed.
@param session The TensorFlow session to be frozen.
@param keep_var_names A list of variable names that should not be frozen,
or None to freeze all the variables in the graph.
@param output_names Names of the relevant graph outputs.
@param clear_devices Remove the device directives from the graph for better portability.
@return The frozen graph definition.
"""
graph = session.graph
with graph.as_default():
freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
output_names = output_names or []
output_names += [v.op.name for v in tf.global_variables()]
input_graph_def = graph.as_graph_def()
if clear_devices:
for node in input_graph_def.node:
node.device = ""
frozen_graph = tf.graph_util.convert_variables_to_constants(
session, input_graph_def, output_names, freeze_var_names)
return frozen_graph



frozen_graph = freeze_session(K.get_session(), output_names=[out.op.name for out in model.outputs])

tf.train.write_graph(frozen_graph, "/home/night/PycharmProjects/APMiniProject/", "my_model_4.pb", as_text=False)
16 changes: 16 additions & 0 deletions mfcc_bro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import numpy
from scipy.fftpack import dct

def do_mfcc(spectrogram, upper_frequency_limit=4000, lower_frequency_limit=0, dct_coefficient_count=12):

mfcc = dct(spectrogram, type=2, axis=1, norm='ortho')[:, 1: (dct_coefficient_count + 1)] # Keep 2-13

mfcc -= (numpy.mean(mfcc, axis=0) + 1e-8) # Mean normalization of mfcc


(nframes, ncoeff) = mfcc.shape
n = numpy.arange(ncoeff)
lift = 1 + (22 / 2) * numpy.sin(numpy.pi * n / 22) #ceplifter – apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
mfcc *= lift

return mfcc
Binary file added model.h5
Binary file not shown.
1 change: 1 addition & 0 deletions model.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "batch_input_shape": [null, 97, 12, 1], "dtype": "float32", "filters": 32, "kernel_size": [2, 2], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "filters": 48, "kernel_size": [2, 2], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Conv2D", "config": {"name": "conv2d_3", "trainable": true, "filters": 120, "kernel_size": [2, 2], "strides": [1, 1], "padding": "valid", "data_format": "channels_last", "dilation_rate": [1, 1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "MaxPooling2D", "config": {"name": "max_pooling2d_1", "trainable": true, "pool_size": [2, 2], "padding": "valid", "strides": [2, 2], "data_format": "channels_last"}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.25, "noise_shape": null, "seed": null}}, {"class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true, "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 128, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "rate": 0.25, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "rate": 0.4, "noise_shape": null, "seed": null}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 10, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "keras_version": "2.2.4", "backend": "tensorflow"}
Binary file added model_2.h5
Binary file not shown.
Loading

0 comments on commit 0e81f17

Please sign in to comment.