Skip to content

Commit

Permalink
OpenAI TTS
Browse files Browse the repository at this point in the history
  • Loading branch information
AndraxDev committed Nov 29, 2023
1 parent e793872 commit cd2c5aa
Show file tree
Hide file tree
Showing 10 changed files with 399 additions and 32 deletions.
2 changes: 1 addition & 1 deletion app/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ dependencies {
implementation 'androidx.appcompat:appcompat:1.6.1'
implementation 'com.google.android.material:material:1.10.0'
implementation 'com.google.code.gson:gson:2.10.1'
implementation 'com.aallam.openai:openai-client:3.5.0'
implementation 'com.aallam.openai:openai-client:3.6.1'
implementation 'io.ktor:ktor-client-android:2.3.5'
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3'
implementation 'org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,43 @@ class Preferences private constructor(private var preferences: SharedPreferences
putString("voice", model)
}

/**
* Set TTS engine
*
* @param engine - TTS engine (google or openai)
* */
fun setTtsEngine(engine: String) {
putString("tts_engine", engine)
}

/**
* Get TTS engine
*
* @return TTS engine (google or openai)
* */
fun getTtsEngine() : String {
return getString("tts_engine", "google")
}

/**
* Set OpenAI voice
*
* @param voice - voice name
* */

fun setOpenAIVoice(voice: String) {
putString("openai_voice", voice)
}

/**
* Get OpenAI voice
*
* @return voice name
* */
fun getOpenAIVoice() : String {
return getString("openai_voice", "alloy")
}

/**
* Retrieves the encrypted API key from the shared preferences.
*
Expand Down
113 changes: 95 additions & 18 deletions app/src/main/java/org/teslasoft/assistant/ui/ChatActivity.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import android.content.Context
import android.content.Intent
import android.content.pm.PackageManager
import android.graphics.drawable.Drawable
import android.media.MediaPlayer
import android.media.MediaRecorder
import android.net.Uri
import android.os.Bundle
Expand All @@ -46,14 +47,12 @@ import android.widget.ListView
import android.widget.ProgressBar
import android.widget.TextView
import android.widget.Toast

import androidx.activity.result.contract.ActivityResultContracts
import androidx.appcompat.content.res.AppCompatResources
import androidx.core.content.ContextCompat
import androidx.core.graphics.drawable.DrawableCompat
import androidx.fragment.app.FragmentActivity

import com.aallam.openai.api.LegacyOpenAI
import com.aallam.openai.api.audio.SpeechRequest
import com.aallam.openai.api.audio.TranscriptionRequest
import com.aallam.openai.api.chat.ChatCompletion
import com.aallam.openai.api.chat.ChatCompletionChunk
Expand All @@ -78,13 +77,11 @@ import com.aallam.openai.client.OpenAI
import com.aallam.openai.client.OpenAIConfig
import com.aallam.openai.client.OpenAIHost
import com.aallam.openai.client.RetryStrategy

import com.google.android.material.dialog.MaterialAlertDialogBuilder
import com.google.android.material.elevation.SurfaceColors
import com.google.gson.Gson
import com.google.mlkit.nl.languageid.LanguageIdentification
import com.google.mlkit.nl.languageid.LanguageIdentifier

import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.flow.Flow
Expand All @@ -95,10 +92,8 @@ import kotlinx.serialization.json.jsonPrimitive
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonArray
import kotlinx.serialization.json.putJsonObject

import okio.FileSystem
import okio.Path.Companion.toPath

import org.jetbrains.annotations.TestOnly
import org.teslasoft.assistant.R
import org.teslasoft.assistant.preferences.ChatPreferences
Expand All @@ -108,18 +103,17 @@ import org.teslasoft.assistant.ui.onboarding.WelcomeActivity
import org.teslasoft.assistant.ui.permission.MicrophonePermissionActivity
import org.teslasoft.assistant.util.Hash
import org.teslasoft.assistant.util.LocaleParser

import java.io.File
import java.io.FileInputStream
import java.io.FileNotFoundException
import java.io.FileOutputStream
import java.io.IOException

import java.net.URL

import java.util.Base64
import java.util.Locale
import kotlin.time.Duration.Companion.seconds


class ChatActivity : FragmentActivity() {

// Init UI
Expand Down Expand Up @@ -166,6 +160,9 @@ class ChatActivity : FragmentActivity() {
private var recognizer: SpeechRecognizer? = null
private var recorder: MediaRecorder? = null

// Media player for OpenAI TTS
private var mediaPlayer: MediaPlayer? = null

private val speechListener = object : RecognitionListener {
override fun onReadyForSpeech(params: Bundle?) { /* unused */ }
override fun onBeginningOfSpeech() { /* unused */ }
Expand Down Expand Up @@ -278,6 +275,8 @@ class ChatActivity : FragmentActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)

mediaPlayer = MediaPlayer()

setContentView(R.layout.activity_chat)
languageIdentifier = LanguageIdentification.getClient()

Expand All @@ -304,6 +303,10 @@ class ChatActivity : FragmentActivity() {
tts!!.stop()
tts!!.shutdown()
}
if (mediaPlayer!!.isPlaying) {
mediaPlayer!!.stop()
mediaPlayer!!.reset()
}
super.onDestroy()
}

Expand Down Expand Up @@ -428,7 +431,13 @@ class ChatActivity : FragmentActivity() {
btnMicro?.setOnLongClickListener {
if (isRecording) {
cancelState = true
tts!!.stop()
try {
if (mediaPlayer!!.isPlaying) {
mediaPlayer!!.stop()
mediaPlayer!!.reset()
}
tts!!.stop()
} catch (_: java.lang.Exception) {/**/}
btnMicro?.setImageResource(R.drawable.ic_microphone)
if (Preferences.getPreferences(this, chatId).getAudioModel() == "google") recognizer?.stopListening()
isRecording = false
Expand Down Expand Up @@ -684,12 +693,24 @@ class ChatActivity : FragmentActivity() {

private fun handleGoogleSpeechRecognition() {
if (isRecording) {
tts!!.stop()
try {
if (mediaPlayer!!.isPlaying) {
mediaPlayer!!.stop()
mediaPlayer!!.reset()
}
tts!!.stop()
} catch (_: java.lang.Exception) {/**/}
btnMicro?.setImageResource(R.drawable.ic_microphone)
recognizer?.stopListening()
isRecording = false
} else {
tts!!.stop()
try {
if (mediaPlayer!!.isPlaying) {
mediaPlayer!!.stop()
mediaPlayer!!.reset()
}
tts!!.stop()
} catch (_: java.lang.Exception) {/**/}
btnMicro?.setImageResource(R.drawable.ic_stop_recording)
if (ContextCompat.checkSelfPermission(
this, Manifest.permission.RECORD_AUDIO
Expand Down Expand Up @@ -820,7 +841,13 @@ class ChatActivity : FragmentActivity() {
}

private fun parseMessage(message: String) {
tts!!.stop()
try {
if (mediaPlayer!!.isPlaying) {
mediaPlayer!!.stop()
mediaPlayer!!.reset()
}
tts!!.stop()
} catch (_: java.lang.Exception) {/**/}
if (message != "") {
messageInput?.setText("")

Expand Down Expand Up @@ -916,7 +943,6 @@ class ChatActivity : FragmentActivity() {
startActivity(intent)
}

@OptIn(LegacyOpenAI::class)
private suspend fun generateResponse(request: String, shouldPronounce: Boolean) {
disableAutoScroll = false
chat?.transcriptMode = ListView.TRANSCRIPT_MODE_ALWAYS_SCROLL
Expand Down Expand Up @@ -1223,16 +1249,67 @@ class ChatActivity : FragmentActivity() {
)
}

tts!!.speak(message, TextToSpeech.QUEUE_FLUSH, null, "")
speak(message)
}.addOnFailureListener {
// Ignore auto language detection if an error is occurred
autoLangDetect = false
ttsPostInit()

tts!!.speak(message, TextToSpeech.QUEUE_FLUSH, null, "")
speak(message)
}
} else {
tts!!.speak(message, TextToSpeech.QUEUE_FLUSH, null, "")
speak(message)
}
}
}

private fun speak(message: String) {
val preferences = Preferences.getPreferences(this, chatId)
val preferences2 = Preferences.getPreferences(this, "")

if (preferences.getTtsEngine() == "google") {
tts!!.speak(message, TextToSpeech.QUEUE_FLUSH, null, "")
} else {
CoroutineScope(Dispatchers.Main).launch {
val rawAudio = ai!!.speech(
request = SpeechRequest(
model = ModelId("tts-1"),
input = message,
// TODO: Replace with voice setting
voice = com.aallam.openai.api.audio.Voice(preferences2.getOpenAIVoice()),
)
)

runOnUiThread {
try {
// create temp file that will hold byte array
val tempMp3 = File.createTempFile("audio", "mp3", cacheDir)
tempMp3.deleteOnExit()
val fos = FileOutputStream(tempMp3)
fos.write(rawAudio)
fos.close()

// resetting mediaplayer instance to evade problems
mediaPlayer?.reset()

// In case you run into issues with threading consider new instance like:
// MediaPlayer mediaPlayer = new MediaPlayer();

// Tried passing path directly, but kept getting
// "Prepare failed.: status=0x1"
// so using file descriptor instead
val fis = FileInputStream(tempMp3)
mediaPlayer?.setDataSource(fis.getFD())
mediaPlayer?.prepare()
mediaPlayer?.start()
} catch (ex: IOException) {
MaterialAlertDialogBuilder(this@ChatActivity, R.style.App_MaterialAlertDialog)
.setTitle("Audio error")
.setMessage(ex.stackTraceToString())
.setPositiveButton("Close") { _, _ -> }
.show()
}
}
}
}
}
Expand Down
18 changes: 18 additions & 0 deletions app/src/main/java/org/teslasoft/assistant/ui/SettingsActivity.kt
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class SettingsActivity : FragmentActivity() {
private var btnVoiceSelector: LinearLayout? = null
private var activitySettingsTitle: TextView? = null
private var globalSettingsTip: LinearLayout? = null
private var btnGoogleTTS: MaterialButton? = null
private var btnOpenAITTS: MaterialButton? = null

private var preferences: Preferences? = null
private var chatId = ""
Expand Down Expand Up @@ -207,6 +209,8 @@ class SettingsActivity : FragmentActivity() {
btnModelGroup = findViewById(R.id.btn_model_s_for)
btnAutoLanguageDetect = findViewById(R.id.btn_auto_lang_detect)
btnVoiceSelector = findViewById(R.id.btn_manage_voices)
btnGoogleTTS = findViewById(R.id.tts_google)
btnOpenAITTS = findViewById(R.id.tts_openai)

btnChangeApi?.background = getDarkAccentDrawable(
ContextCompat.getDrawable(this, R.drawable.t_menu_top_item_background)!!, this)
Expand Down Expand Up @@ -247,6 +251,9 @@ class SettingsActivity : FragmentActivity() {
findViewById<LinearLayout>(R.id.btn_audio_source)!!.background = getDarkAccentDrawable(
ContextCompat.getDrawable(this, R.drawable.t_menu_center_item_background_noclick)!!, this)

findViewById<LinearLayout>(R.id.btn_tts_source)!!.background = getDarkAccentDrawable(
ContextCompat.getDrawable(this, R.drawable.t_menu_center_item_background_noclick)!!, this)

findViewById<LinearLayout>(R.id.btn_model_s)!!.background = getDarkAccentDrawable(
ContextCompat.getDrawable(this, R.drawable.t_menu_center_item_background_noclick)!!, this)

Expand Down Expand Up @@ -293,6 +300,9 @@ class SettingsActivity : FragmentActivity() {
functionCallingSwitch?.isChecked = preferences?.getFunctionCalling() == true
imagineSwitch?.isChecked = preferences?.getImagineCommand() == true

btnGoogleTTS?.isChecked = preferences?.getTtsEngine() == "google"
btnOpenAITTS?.isChecked = preferences?.getTtsEngine() == "openai"

if (preferences?.getSilence() == true) {
alwaysSpeak?.isEnabled = false
}
Expand Down Expand Up @@ -431,6 +441,14 @@ class SettingsActivity : FragmentActivity() {
audioGoogle?.setOnClickListener { preferences?.setAudioModel("google") }
audioWhisper?.setOnClickListener { preferences?.setAudioModel("whisper") }

btnGoogleTTS?.setOnClickListener {
preferences?.setTtsEngine("google")
}

btnOpenAITTS?.setOnClickListener {
preferences?.setTtsEngine("openai")
}

gpt30?.setOnClickListener {
model = "gpt-3.5-turbo"
preferences?.setModel(model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class VoiceListAdapter(private val context: Context, private val items: ArrayLis
val item = getItem(position) as String
viewHolder.textView.text = item

if (Preferences.getPreferences(context, "").getVoice() == item) {
if (Preferences.getPreferences(context, "").getVoice() == item || Preferences.getPreferences(context, "").getOpenAIVoice() == item) {
viewHolder.voiceBg.background = getDarkAccentDrawableV2(
ContextCompat.getDrawable(context, R.drawable.btn_accent_tonal_selector_v4)!!, context)

Expand Down
Loading

0 comments on commit cd2c5aa

Please sign in to comment.