forked from speechlab-ntu/asr-client-code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclient2.py
153 lines (127 loc) · 6.77 KB
/
client2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import argparse
from ws4py.client.threadedclient import WebSocketClient
import time
import threading
import sys
import urllib
import Queue
import json
import time
import os
import pyaudio
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = int(RATE / 10) # 100ms
# audio = pyaudio.PyAudio()
def rate_limited(maxPerSecond):
minInterval = 1.0 / float(maxPerSecond)
def decorate(func):
lastTimeCalled = [0.0]
def rate_limited_function(*args,**kargs):
elapsed = time.clock() - lastTimeCalled[0]
leftToWait = minInterval - elapsed
if leftToWait>0:
time.sleep(leftToWait)
ret = func(*args,**kargs)
lastTimeCalled[0] = time.clock()
return ret
return rate_limited_function
return decorate
class MyClient(WebSocketClient):
def __init__(self, mode, audiofile, url, protocols=None, extensions=None, heartbeat_freq=None, byterate=32000,
save_adaptation_state_filename=None, send_adaptation_state_filename=None):
super(MyClient, self).__init__(url, protocols, extensions, heartbeat_freq)
self.final_hyps = []
self.byterate = byterate
self.final_hyp_queue = Queue.Queue()
self.save_adaptation_state_filename = save_adaptation_state_filename
self.send_adaptation_state_filename = send_adaptation_state_filename
self.isStop = False
self.audio = pyaudio.PyAudio()
self.audiofile = audiofile
self.mode = mode
@rate_limited(4)
def send_data(self, data):
self.send(data, binary=True)
def opened(self):
#print "Socket opened!"
def send_data_to_ws():
if self.send_adaptation_state_filename is not None:
print >> sys.stderr, "Sending adaptation state from %s" % self.send_adaptation_state_filename
try:
adaptation_state_props = json.load(open(self.send_adaptation_state_filename, "r"))
self.send(json.dumps(dict(adaptation_state=adaptation_state_props)))
except:
e = sys.exc_info()[0]
print >> sys.stderr, "Failed to send adaptation state: ", e
print("Start transcribing...")
if self.mode == 'stream':
stream = self.audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK)
while not self.isStop:
data = stream.read(int(self.byterate / 8))
self.send_data(data) # send data
stream.stop_stream()
stream.close()
self.audio.terminate()
elif self.mode == 'file':
with self.audiofile as audiostream:
for block in iter(lambda: audiostream.read(int(self.byterate/4)), ""):
self.send_data(block)
print >> sys.stderr, "Audio sent, now sending EOS"
self.send("EOS")
t = threading.Thread(target=send_data_to_ws)
t.start()
def received_message(self, m):
response = json.loads(str(m))
if response['status'] == 0:
if 'result' in response:
trans = response['result']['hypotheses'][0]['transcript']
if response['result']['final']:
#print >> sys.stderr, trans,
self.final_hyps.append(trans)
print >> sys.stderr, '\r%s' % trans.replace("\n", "\\n")
else:
print_trans = trans.replace("\n", "\\n")
if len(print_trans) > 80:
print_trans = "... %s" % print_trans[-76:]
print >> sys.stderr, '\r%s' % print_trans,
if 'adaptation_state' in response:
if self.save_adaptation_state_filename:
print >> sys.stderr, "Saving adaptation state to %s" % self.save_adaptation_state_filename
with open(self.save_adaptation_state_filename, "w") as f:
f.write(json.dumps(response['adaptation_state']))
else:
print >> sys.stderr, "Received error from server (status %d)" % response['status']
if 'message' in response:
print >> sys.stderr, "Error message:", response['message']
def get_full_hyp(self, timeout=60):
return self.final_hyp_queue.get(timeout)
def closed(self, code, reason=None):
self.final_hyp_queue.put(" ".join(self.final_hyps))
def main():
parser = argparse.ArgumentParser(description='Command line client for kaldigstserver')
parser.add_argument('-m', '--mode', default="file", dest="mode", help="Mode of transcribing: audio file or streaming")
parser.add_argument('-u', '--uri', default="ws://localhost:8888/client/ws/speech", dest="uri", help="Server websocket URI")
parser.add_argument('-r', '--rate', default=32000, dest="rate", type=int, help="Rate in bytes/sec at which audio should be sent to the server. NB! For raw 16-bit audio it must be 2*samplerate!")
parser.add_argument('-t', '--token', default="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VyX2lkIjoiNWNkMjM2YThiZWNlMWE4ZmI5MTNlNTlmIiwiZXhwIjoxNTU5ODcyNDI0fQ.aBfqru_0T_kg2x7kaxvGTrpJiwChAfJnvmKGz27gcF8", dest="token", help="User token")
parser.add_argument('--save-adaptation-state', help="Save adaptation state to file")
parser.add_argument('--send-adaptation-state', help="Send adaptation state from file")
parser.add_argument('--content-type', default='', help="Use the specified content type (empty by default, for raw files the default is audio/x-raw, layout=(string)interleaved, rate=(int)<rate>, format=(string)S16LE, channels=(int)1")
parser.add_argument('audiofile', nargs='?', help="Audio file to be sent to the server", type=argparse.FileType('rb'), default=sys.stdin)
args = parser.parse_args()
if args.mode == 'file' or args.mode == 'stream':
content_type = args.content_type
if content_type == '' and args.audiofile.name.endswith(".raw") or args.mode == 'stream':
content_type = "audio/x-raw, layout=(string)interleaved, rate=(int)%d, format=(string)S16LE, channels=(int)1" %(args.rate/2)
ws = MyClient(args.mode, args.audiofile, args.uri + '?%s' % (urllib.urlencode([("content-type", content_type)])) + '?%s' % (urllib.urlencode([("token", args.token)])), byterate=args.rate,
save_adaptation_state_filename=args.save_adaptation_state, send_adaptation_state_filename=args.send_adaptation_state)
ws.connect()
result = ws.get_full_hyp()
print result.encode('utf-8')
else:
print('\nTranscribe mode must be file or stream!\n')
if __name__ == "__main__":
main()