Skip to content

Commit

Permalink
makes synthesizer methods static
Browse files Browse the repository at this point in the history
  • Loading branch information
ajar98 committed Sep 12, 2023
1 parent 4575006 commit a8133f7
Show file tree
Hide file tree
Showing 12 changed files with 37 additions and 19 deletions.
2 changes: 1 addition & 1 deletion vocode/streaming/synthesizer/azure_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def get_message_up_to(
self,
message: str,
ssml: str,
seconds: int,
seconds: float,
word_boundary_event_pool: WordBoundaryEventPool,
) -> str:
events = word_boundary_event_pool.get_events_sorted()
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/bark_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ async def create_speech(
write_wav(output_bytes_io, self.SAMPLE_RATE, int_audio_arr)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
35 changes: 20 additions & 15 deletions vocode/streaming/synthesizer/base_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self, chunk: bytes, is_last_chunk: bool):
def __init__(
self,
chunk_generator: AsyncGenerator[ChunkResult, None],
get_message_up_to: Callable[[int], str],
get_message_up_to: Callable[[float], str],
):
self.chunk_generator = chunk_generator
self.get_message_up_to = get_message_up_to
Expand Down Expand Up @@ -172,20 +172,23 @@ def ready_synthesizer(self):
pass

# given the number of seconds the message was allowed to go until, where did we get in the message?
@staticmethod
def get_message_cutoff_from_total_response_length(
self, message: BaseMessage, seconds: int, size_of_output: int
synthesizer_config: SynthesizerConfig,
message: BaseMessage,
seconds: float,
size_of_output: int,
) -> str:
estimated_output_seconds = (
size_of_output / self.synthesizer_config.sampling_rate
)
estimated_output_seconds = size_of_output / synthesizer_config.sampling_rate
if not message.text:
return message.text

estimated_output_seconds_per_char = estimated_output_seconds / len(message.text)
return message.text[: int(seconds / estimated_output_seconds_per_char)]

@staticmethod
def get_message_cutoff_from_voice_speed(
self, message: BaseMessage, seconds: int, words_per_minute: int
message: BaseMessage, seconds: float, words_per_minute: int
) -> str:
words_per_second = words_per_minute / 60
estimated_words_spoken = math.floor(words_per_second * seconds)
Expand All @@ -203,19 +206,21 @@ async def create_speech(
raise NotImplementedError

# @param file - a file-like object in wav format
@staticmethod
def create_synthesis_result_from_wav(
self, file: Any, message: BaseMessage, chunk_size: int
synthesizer_config: SynthesizerConfig,
file: Any,
message: BaseMessage,
chunk_size: int,
) -> SynthesisResult:
output_bytes = convert_wav(
file,
output_sample_rate=self.synthesizer_config.sampling_rate,
output_encoding=self.synthesizer_config.audio_encoding,
output_sample_rate=synthesizer_config.sampling_rate,
output_encoding=synthesizer_config.audio_encoding,
)

if self.synthesizer_config.should_encode_as_wav:
chunk_transform = lambda chunk: encode_as_wav(
chunk, self.synthesizer_config
)
if synthesizer_config.should_encode_as_wav:
chunk_transform = lambda chunk: encode_as_wav(chunk, synthesizer_config)
else:
chunk_transform = lambda chunk: chunk

Expand All @@ -232,8 +237,8 @@ async def chunk_generator(output_bytes):

return SynthesisResult(
chunk_generator(output_bytes),
lambda seconds: self.get_message_cutoff_from_total_response_length(
message, seconds, len(output_bytes)
lambda seconds: BaseSynthesizer.get_message_cutoff_from_total_response_length(
synthesizer_config, message, seconds, len(output_bytes)
),
)

Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/coqui_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ async def create_speech(
)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=io.BytesIO(read_response),
message=message,
chunk_size=chunk_size,
Expand Down
5 changes: 4 additions & 1 deletion vocode/streaming/synthesizer/coqui_tts_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ async def create_speech(
audio_segment.export(output_bytes_io, format="wav") # type: ignore

result = self.create_synthesis_result_from_wav(
file=output_bytes_io, message=message, chunk_size=chunk_size
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
)

convert_span.end()
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/eleven_labs_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ async def create_speech(
output_bytes_io = decode_mp3(audio_data)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/google_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ async def create_speech(
output_bytes_io.seek(0)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/gtts_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def thread():
audio_segment.export(output_bytes_io, format="wav") # type: ignore

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
1 change: 1 addition & 0 deletions vocode/streaming/synthesizer/play_ht_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ async def create_speech(
output_bytes_io = decode_mp3(read_response)

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down
2 changes: 1 addition & 1 deletion vocode/streaming/synthesizer/polly_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def get_speech_marks(self, message: str) -> Any:
def get_message_up_to(
self,
message: str,
seconds: int,
seconds: float,
word_events,
) -> str:
for event in word_events:
Expand Down
5 changes: 4 additions & 1 deletion vocode/streaming/synthesizer/rime_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,10 @@ async def create_speech(
audio_file = io.BytesIO(base64.b64decode(data.get("audioContent")))

result = self.create_synthesis_result_from_wav(
file=audio_file, message=message, chunk_size=chunk_size
synthesizer_config=self.synthesizer_config,
file=audio_file,
message=message,
chunk_size=chunk_size,
)
convert_span.end()
return result
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ async def create_speech(
audio_segment.export(output_bytes_io, format="wav") # type: ignore

result = self.create_synthesis_result_from_wav(
synthesizer_config=self.synthesizer_config,
file=output_bytes_io,
message=message,
chunk_size=chunk_size,
Expand Down

0 comments on commit a8133f7

Please sign in to comment.