-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
124 lines (86 loc) · 3.3 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import librosa
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import numpy as np
from tqdm import tqdm
from SPECtogram import gimmeDaSPECtogram
DATA_PATH = "./data/"
# Input: Folder Path
# Output: Tuple (Label, Indices of the labels, one-hot encoded labels)
def get_labels(path=DATA_PATH):
#labels = os.listdir(path)
labels = ['up', 'down', 'left', 'right', 'one', 'two', 'three', 'four', 'stop', 'go']
#print(labels)
label_indices = np.arange(0, len(labels))
return labels, label_indices, to_categorical(label_indices)
# Handy function to convert wav2mfcc
def wav2mfcc(file_path, max_len=11):
wave, sr = librosa.load(file_path, mono=True, sr=None)
wave = wave[::3]
mfcc = librosa.feature.mfcc(wave, sr=16000)
# If maximum length exceeds mfcc lengths then pad the remaining ones
if (max_len > mfcc.shape[1]):
pad_width = max_len - mfcc.shape[1]
mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
# Else cutoff the remaining parts
else:
mfcc = mfcc[:, :max_len]
return mfcc
def save_data_to_array(path=DATA_PATH, max_len=12):
labels, _, _ = get_labels(path)
for label in labels:
# Init mfcc vectors
mfcc_vectors = []
wavfiles = [path + label + '/' + wavfile for wavfile in os.listdir(path + '/' + label)]
#i = 0
for wavfile in tqdm(wavfiles, "Saving vectors of label - '{}'".format(label)):
#if i == 3700:
# break
mfcc = gimmeDaSPECtogram(wavfile)
#print(mfcc.shape)
mfcc_vectors.append(mfcc)
#i += 1
np.save(label + '.npy', mfcc_vectors)
# Run this to prepare data for tensorflow feeding
def get_train_test(split_ratio=0.92, random_state=42):
# Get available labels
labels, indices, _ = get_labels(DATA_PATH)
print(labels[1:])
# Getting first arrays
X = np.load(labels[0] + '.npy')
print(X)
y = np.zeros(X.shape[0])
# Append all of the dataset into one single array, same goes for y
for i, label in enumerate(labels[1:]):
x = np.load(label + '.npy')
X = np.vstack((X, x))
y = np.append(y, np.full(x.shape[0], fill_value=(i + 1)))
print(X.shape[0])
print(len(y))
assert X.shape[0] == len(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=(1 - split_ratio), random_state=random_state, shuffle=True)
return X_train, X_test, y_train, y_test
def prepare_dataset(path=DATA_PATH):
labels, _, _ = get_labels(path)
data = {}
for label in labels:
data[label] = {}
data[label]['path'] = [path + label + '/' + wavfile for wavfile in os.listdir(path + '/' + label)]
vectors = []
for wavfile in data[label]['path']:
wave, sr = librosa.load(wavfile, mono=True, sr=None)
# Downsampling
wave = wave[::3]
mfcc = librosa.feature.mfcc(wave, sr=16000)
vectors.append(mfcc)
data[label]['mfcc'] = vectors
return data
def load_dataset(path=DATA_PATH):
data = prepare_dataset(path)
dataset = []
for key in data:
for mfcc in data[key]['mfcc']:
dataset.append((key, mfcc))
return dataset[:100]
# print(prepare_dataset(DATA_PATH))