Skip to content

Commit

Permalink
test options
Browse files Browse the repository at this point in the history
  • Loading branch information
gorkemgoknar committed Oct 30, 2023
1 parent 84f9970 commit d76c97c
Showing 1 changed file with 35 additions and 7 deletions.
42 changes: 35 additions & 7 deletions test/test_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@ def stream_ffplay(audio_stream, output_file, save=True):
ffplay_proc.wait()


def tts(text, speaker) -> Iterator[bytes]:
def tts(text, speaker,language, server_url , decoder, stream_chunk_size) -> Iterator[bytes]:
start = time.perf_counter()
speaker["text"] = text
speaker["language"] = "en"
speaker["language"] = language
speaker["decoder"] = decoder # "hifigan" or "ne_hifigan" for TTS>0.19.0
speaker["stream_chunk_size"] = stream_chunk_size # you can reduce it to get faster response, but degrade quality
res = requests.post(
"http://localhost:8000/tts_stream",
f"{server_url}/tts_stream",
json=speaker,
stream=True,
)
Expand All @@ -66,9 +68,9 @@ def tts(text, speaker) -> Iterator[bytes]:
print("⏱️ response.elapsed:", res.elapsed)


def get_speaker(ref_audio):
def get_speaker(ref_audio,server_url):
files = {"wav_file": ("reference.wav", open(ref_audio, "rb"))}
response = requests.post("http://localhost:8000/clone_speaker", files=files)
response = requests.post(f"{server_url}/clone_speaker", files=files)
return response.json()


Expand All @@ -77,22 +79,48 @@ def get_speaker(ref_audio):
parser.add_argument(
"--text",
default="It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
help="text input for TTS"
)
parser.add_argument(
"--language",
default="en",
help="Language to use default is 'en' (English)"
)

parser.add_argument(
"--output_file",
default=None,
help="Save TTS output to given filename"
)
parser.add_argument(
"--ref_file",
default=None,
help="Reference audio file to use, when not given will use default"
)
parser.add_argument(
"--server_url",
default="http://localhost:8000",
help="Server url http://localhost:8000 default, change to your server location "
)
parser.add_argument(
"--decoder",
default="ne_hifigan",
help="Decoder for vocoder, ne_hifigan default, options ne_hifigan or hifigan"
)

parser.add_argument(
"--stream_chunk_size",
default="20",
help="Stream chunk size , 20 default, reducing will get faster latency but may degrade quality"
)

args = parser.parse_args()

with open("./default_speaker.json", "r") as file:
speaker = json.load(file)

if args.ref_file is not None:
print("Computing the latents for a new reference...")
speaker = get_speaker(args.ref_file)
speaker = get_speaker(args.ref_file,args.server_url)

audio = stream_ffplay(tts(args.text, speaker), args.output_file, save=bool(args.output_file))
audio = stream_ffplay(tts(args.text, speaker,args.language,args.server_url,args.decoder,args.stream_chunk_size), args.output_file, save=bool(args.output_file))

0 comments on commit d76c97c

Please sign in to comment.