-
Notifications
You must be signed in to change notification settings - Fork 2
/
mic_streaming.py
147 lines (130 loc) · 4.75 KB
/
mic_streaming.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import time
import argparse
import threading
import collections
import sounddevice as sd
import numpy as np
import logging
from kws import TFLiteKWS, SILENCE, NOT_KW, VERBOSE
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
'-l', '--list-devices', action='store_true',
help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
'-m', '--model', type=str, required=True,
help='tflite model')
parser.add_argument(
'-n', '--num-keywords', type=int, default=1,
help='number of keywords (default 1)')
parser.add_argument(
'-i', '--input-device', type=int_or_str,
help='input device (numeric ID or substring)')
parser.add_argument(
'-c', '--channel', type=int, default=None,
help='specify the channel index of input device (start from 0)')
parser.add_argument(
'-r', '--sample-rate', type=int, default=16000,
help='input sample rate')
parser.add_argument(
'-b', '--block-len-ms', type=int, default=20,
help='input block (window stride) length (ms)')
parser.add_argument(
'-s', '--score-strategy', choices=['smoothed_confidence', 'hit_ratio'], default='hit_ratio',
help='score strategy, choose between "smoothed_confidence" or "hit_ratio" (default)'),
parser.add_argument(
'--score-threshold', type=float,
help='score threshold, if not specified, this is automatically determined by strategy and softmax options')
parser.add_argument(
'--hit-threshold', type=float, default=7,
help='hit threshold')
parser.add_argument(
'--tailroom-ms', type=int, default=100,
help='tail room in ms')
parser.add_argument(
'--add-softmax', action='store_true',
help='do not add softmax layer to output')
parser.add_argument(
'--silence-on', action='store_true',
help='turn on silence detection')
parser.add_argument(
'--delay-trigger', action='store_true',
help='only trigger after uttrance end')
parser.add_argument(
'--max-kw', type=int, default=1,
help='max number of kw in one utterance')
parser.add_argument(
'--measure', action='store_true',
help='measure and report processing time')
parser.add_argument(
'-v', '--verbose', type=int, default=1,
help='verbose level: 0 - quiet, 1 - info, 2 - debug, 3 - verbose'
)
args = parser.parse_args(remaining)
if args.verbose > 0:
logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s:\t%(message)s', datefmt='%Y-%m-%d %H:%M:%S')
if args.verbose == 1:
logging.getLogger().setLevel(logging.INFO)
if args.verbose == 2:
logging.getLogger().setLevel(logging.DEBUG)
if args.verbose == 3:
logging.getLogger().setLevel(VERBOSE)
if not args.score_threshold:
if args.score_strategy == 'hit_ratio':
threshold = 0.01
else:
threshold = 0.8
else:
threshold = args.score_threshold
labels = [SILENCE, NOT_KW] + ['kw%s'%(i+1) for i in range(args.num_keywords)]
gkws = TFLiteKWS(args.model, labels, add_softmax=args.add_softmax, silence_off=not args.silence_on,
score_strategy=args.score_strategy, score_threshold=threshold, hit_threshold=args.hit_threshold,
tailroom_ms=args.tailroom_ms, immediate_trigger=not args.delay_trigger, max_kw_cnt=args.max_kw)
t_ring = collections.deque(maxlen=128)
def callback(indata, frames, buf_time, status):
global gkws, t_ring, args
if args.measure:
start_time = time.time()
if status:
logging.warning(status)
if args.channel is not None:
indata = indata[:, [args.channel]]
kw = gkws.process(indata)
if len(kw) > 0:
logging.info("API returned: %s", kw)
if args.measure:
t_ring.append(time.time() - start_time)
def open_stream():
block_shift = int(np.round(args.sample_rate * (args.block_len_ms / 1000)))
with sd.InputStream(device=args.input_device, samplerate=args.sample_rate,
blocksize=block_shift,
dtype=np.float32, channels=1 if args.channel is None else None,
callback=callback):
print('#' * 80)
print('Ctrl-C to exit')
print('#' * 80)
if args.measure:
while True:
time.sleep(1)
print('Processing time: {:.2f} ms'.format( 1000 * np.average(t_ring) ), end='\r')
else:
threading.Event().wait()
try:
open_stream()
except KeyboardInterrupt:
parser.exit('')
except Exception as e:
parser.exit(type(e).__name__ + ': ' + str(e))