-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_dialog.py
141 lines (119 loc) · 5.11 KB
/
generate_dialog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# replica_dialog_generator
# Auto-generate dialog audio files using the Replica Studios 'AI Voices' API.
#
# Copyright (C) 2021 Ben Ackland (@bfackland)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import requests
import sys
from loguru import logger
from dialog import Dialog, DIALOG_PATH
import json
from os.path import exists
REPLICA_API_CREDENTIALS_FILENAME = "./replica_api_credentials.json"
if not exists(REPLICA_API_CREDENTIALS_FILENAME):
REPLICA_API_CREDENTIALS_FILENAME = "./replica_dialog_generator/replica_api_credentials.json"
def make_api_request(url, request_type='get', data={}, params={}, headers={}):
"""
GET and POST requests send data in different formats
use this common method for both, which also handles HTTP response codes
"""
if request_type == 'get':
api_response = requests.get(url, params=params, headers=headers)
elif request_type == 'post':
api_response = requests.post(url, data=data, headers=headers)
if api_response.status_code not in [200, 202]:
logger.error("Received unknown response code from API; aborting.")
sys.exit()
try:
api_response_json = api_response.json()
except:
logger.error("Couldn't get JSON from API response; aborting.")
sys.exit()
return api_response.status_code, api_response_json
def get_access_token():
"""
in order to use the Replica API we must first have an access token
which is generated by authenticating with our Replica credentials
more info: https://docs.replicastudios.com/?python#replica-api-api-endpoints
we load our personal Replica credentials from a local json file
"""
logger.info(f"Using Replica API credentials from {REPLICA_API_CREDENTIALS_FILENAME}")
with open(REPLICA_API_CREDENTIALS_FILENAME, 'r') as rapifile:
replica_api_credentials = json.loads(rapifile.read())
_, api_response_json = make_api_request(
"https://api.replicastudios.com/auth", request_type='post',
data=replica_api_credentials,
)
try:
access_token = api_response_json['access_token']
except:
logger.error("Couldn't get an access_token from remote API; aborting.")
sys.exit()
return access_token
def get_speech(access_token, text_key, text, voice_uid):
"""
using a Replica API access token that we have already obtained, make a
request to the API to generate one line of dialog
if that's successful, we get back a download URL where we then fetch the
speech file from, returning the binary data
"""
# TODO: could be looked up automatically by 'make_api_request'
# e.g. stored on a class
headers = {
'Authorization': f'Bearer {access_token}'
}
params={
'txt' : text, 'speaker_id' : voice_uid,
'extension' : 'ogg', 'bit_rate' : '128', 'sample_rate' : '44100',
}
api_response_status_code, api_response_json = make_api_request(
"https://api.replicastudios.com/speech", request_type='get',
params=params, headers=headers
)
if api_response_status_code == 202:
# fetch speech file already generated
pass
else:
# store newly generated speech file to disk
download_url = api_response_json['url']
logger.info(f"Downloading generated audio from {download_url}...")
download_response = requests.get(download_url)
return download_response.content
return None
def generate_dialog():
access_token = get_access_token()
dialog = Dialog()
voice, voice_uid = list(dialog.replica_voice_uids.items())[0]
responses = dialog.load_responses()
count = 0
for text_key, text in responses.items():
if text_key.startswith("PA_") or text_key.startswith("CLERK_"): continue
dialog_file = dialog.get_dialog_file_for_text(text)
if not dialog_file:
logger.info(f"""Generating dialog for text "{text}"...""")
speech_data = get_speech(access_token, text_key, text, voice_uid)
if speech_data:
text_md5 = dialog.get_text_md5(text)
dialog_filename = f"{voice}_{text_key}_{text_md5}.ogg"
with open(f"{DIALOG_PATH}/{dialog_filename}", 'wb') as f:
f.write(speech_data)
logger.info(f"Wrote {dialog_filename} to disk")
count += 1
if count > 0:
logger.info(f"Generated {count} dialog files; total = {len(responses)}")
else:
logger.info("Didn't need to generate any new dialog files.")
if __name__ == "__main__":
generate_dialog()