-
Notifications
You must be signed in to change notification settings - Fork 33
/
script_recording.py
143 lines (119 loc) · 4.35 KB
/
script_recording.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#! /usr/bin/env python
# encoding: utf-8
from __future__ import unicode_literals
import argparse
from codecs import open
import json
import datetime
import os
import time
import pyaudio
import wave
from utils import Audio
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 2.2
SNR_TRIM = 18
FOLDER_BASE = ""
MAX_RECORD_DURATION = 1.6
MAX_DIFFERENCE_DURATION = 0.3
DATE_FORMAT = '%Y_%m_%dT%H_%M_%S'
def record_one(directory, i):
dest_path = os.path.join(directory, "{0}.wav".format(i))
audio = pyaudio.PyAudio()
raw_input(
"""\n\nPress enter to record one sample, say your hotword when "recording..." shows up""")
time.sleep(0.5)
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
print "recording..."
frames = []
for j in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print "finished recording\n"
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(dest_path, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(2)
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
def check_audios(durations):
if any([d > MAX_RECORD_DURATION for d in durations]):
print "WARNING: at least one your record seems to have a too long " \
"duration, you are going to have"
def record_and_trim(hotword_key, nb_records=3):
raw_input("Your will have to record your personal hotword." \
" Please be sure to be in a quiet environment." \
" Press enter once you are ready.\n".format(
nb_records))
directory = os.path.join(FOLDER_BASE, "personal_{0}".format(str(datetime.datetime.now().strftime(DATE_FORMAT))))
os.makedirs(directory)
is_validated = False
while not is_validated:
audios = []
for i in range(nb_records):
record_one(directory, i)
dest_path = os.path.join(directory, "{0}.wav".format(i))
audio = Audio.from_file(dest_path)
audio.trim_silences(SNR_TRIM)
while audio.duration() > MAX_RECORD_DURATION:
print "WARNING: there seems to be too much noise in your" \
" environment please retry to record this sample by " \
"following the instructions."
record_one(directory, i)
audio = Audio.from_file(dest_path)
audio.trim_silences(SNR_TRIM)
audios.append(audio)
if any([abs(
audio_1.duration() - audio_2.duration()) > MAX_DIFFERENCE_DURATION
for i, audio_1 in enumerate(audios) for j, audio_2 in
enumerate(audios) if i < j]):
print "WARNING: there seems to be too much difference between " \
"your records please retry to record all of them by following " \
"the instructions."
else:
is_validated = True
for i, audio in enumerate(audios):
dest_path = os.path.join(directory, "{0}.wav".format(i))
audio.write(dest_path)
config = {
"hotword_key": hotword_key,
"kind": "personal",
"dtw_ref": 0.22,
"from_mfcc": 1,
"to_mfcc": 13,
"band_radius": 10,
"shift": 10,
"window_size": 10,
"sample_rate": RATE,
"frame_length_ms": 25.0,
"frame_shift_ms": 10.0,
"num_mfcc": 13,
"num_mel_bins": 13,
"mel_low_freq": 20,
"cepstral_lifter": 22.0,
"dither": 0.0,
"window_type": "povey",
"use_energy": False,
"energy_floor": 0.0,
"raw_energy": True,
"preemphasis_coefficient": 0.97,
"model_version": 1
}
with open(os.path.join(directory, "config.json"), "wb") as f:
json.dump(config, f, indent=4)
print "Your model has been saved in {0}".format(
os.path.join(os.path.dirname(os.path.realpath(__file__)), directory))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('hotword_key', type=str,
help="the name of your personal hotword (no special characters)")
args = parser.parse_args()
record_and_trim(args.hotword_key)