Allow voice selection and personal voices (#8)

# Allow voice selection and personal voices ## ♻️ Current situation & Problem Users can optionally specify a language for the voice to use when synthesizing speech, but cannot select a specific voice. As there are many voice options in iOS, including [personal voices](https://support.apple.com/en-us/104993), this would be a great feature to support. ## ⚙️ Release Notes - Allows users to specify a voice when generating speech. - Provides an array of voices in the user's current locale. - Provides a function for requesting permission and accessing personal voices. ## ✅ Testing - Updates the test app with a selection of voices that can be chosen to synthesize speech. - Personal voices cannot be tested on a simulator. ## 📝 Documentation - Updates the README with an example of voice selection in a SwiftUI interface - Adds instructions for requesting access to Personal Voices. ## 📝 Code of Conduct & Contributing Guidelines By submitting creating this pull request, you agree to follow our [Code of Conduct](https://github.com/StanfordSpezi/.github/blob/main/CODE_OF_CONDUCT.md) and [Contributing Guidelines](https://github.com/StanfordSpezi/.github/blob/main/CONTRIBUTING.md): - [X] I agree to follow the [Code of Conduct](https://github.com/StanfordSpezi/.github/blob/main/CODE_OF_CONDUCT.md) and [Contributing Guidelines](https://github.com/StanfordSpezi/.github/blob/main/CONTRIBUTING.md). --------- Co-authored-by: Paul Schmiedmayer <[email protected]>
StanfordSpezi · Jul 1, 2024 · 0b79f72 · 0b79f72
1 parent 60b8cdb
commit 0b79f72
Show file tree

Hide file tree

Showing 10 changed files with 249 additions and 13 deletions.
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
@@ -137,3 +137,5 @@ jobs:
     uses: StanfordSpezi/.github/.github/workflows/create-and-upload-coverage-report.yml@v2
     with:
       coveragereports: 'SpeziSpeech-iOS.xcresult SpeziSpeech-visionOS.xcresult SpeziSpeech-macOS.xcresult TestApp-iOS.xcresult TestApp-iPad.xcresult TestApp-visionOS.xcresult'
+    secrets:
+      token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/Package.swift b/Package.swift
@@ -8,9 +8,16 @@
 // SPDX-License-Identifier: MIT
 //
 
+import class Foundation.ProcessInfo
 import PackageDescription
 
 
+#if swift(<6)
+let strictConcurrency: SwiftSetting = .enableExperimentalFeature("StrictConcurrency")
+#else
+let strictConcurrency: SwiftSetting = .enableUpcomingFeature("StrictConcurrency")
+#endif
+
 let package = Package(
     name: "SpeziSpeech",
     defaultLocalization: "en",
@@ -25,32 +32,56 @@ let package = Package(
     ],
     dependencies: [
         .package(url: "https://github.com/StanfordSpezi/Spezi", from: "1.2.1")
-    ],
+    ] + swiftLintPackage(),
     targets: [
         .target(
             name: "SpeziSpeechRecognizer",
             dependencies: [
                 .product(name: "Spezi", package: "Spezi")
             ],
             swiftSettings: [
-                .enableExperimentalFeature("StrictConcurrency")
-            ]
+                strictConcurrency
+            ],
+            plugins: [] + swiftLintPlugin()
         ),
         .target(
             name: "SpeziSpeechSynthesizer",
             dependencies: [
                 .product(name: "Spezi", package: "Spezi")
             ],
             swiftSettings: [
-                .enableExperimentalFeature("StrictConcurrency")
-            ]
+                strictConcurrency
+            ],
+            plugins: [] + swiftLintPlugin()
         ),
         .testTarget(
             name: "SpeziSpeechTests",
             dependencies: [
                 .target(name: "SpeziSpeechRecognizer"),
                 .target(name: "SpeziSpeechSynthesizer")
-            ]
+            ],
+            swiftSettings: [
+                strictConcurrency
+            ],
+            plugins: [] + swiftLintPlugin()
         )
     ]
 )
+
+
+func swiftLintPlugin() -> [Target.PluginUsage] {
+    // Fully quit Xcode and open again with `open --env SPEZI_DEVELOPMENT_SWIFTLINT /Applications/Xcode.app`
+    if ProcessInfo.processInfo.environment["SPEZI_DEVELOPMENT_SWIFTLINT"] != nil {
+        [.plugin(name: "SwiftLintBuildToolPlugin", package: "SwiftLint")]
+    } else {
+        []
+    }
+}
+
+func swiftLintPackage() -> [PackageDescription.Package.Dependency] {
+    if ProcessInfo.processInfo.environment["SPEZI_DEVELOPMENT_SWIFTLINT"] != nil {
+        [.package(url: "https://github.com/realm/SwiftLint.git", .upToNextMinor(from: "0.55.1"))]
+    } else {
+        []
+    }
+}
diff --git a/README.md b/README.md
@@ -130,6 +130,45 @@ struct SpeechTestView: View {
 }
 ```
 
+SpeziSpeech also supports selecting voices, including [personal voices](https://support.apple.com/en-us/104993). 
+
+The following example shows how a user can be given a choice of voices in their current locale and the selected voice can be used to synthesize speech.
+
+```swift
+struct SpeechVoiceSelectionExample: View {
+   @Environment(SpeechSynthesizer.self) private var speechSynthesizer
+   @State private var selectedVoiceIndex = 0
+   @State private var message = ""
+
+
+   var body: some View {
+      VStack {
+         TextField("Enter text to be spoken", text: $message)
+            .textFieldStyle(RoundedBorderTextFieldStyle())
+            .padding()
+          Picker("Voice", selection: $selectedVoiceIndex) {
+              ForEach(speechSynthesizer.voices.indices, id: \.self) { index in
+                  Text(speechSynthesizer.voices[index].name)
+                      .tag(index)
+              }
+          }
+              .pickerStyle(.inline)
+              .accessibilityIdentifier("voicePicker")
+              .padding()
+         Button("Speak") {
+            speechSynthesizer.speak(
+                message,
+                voice: speechSynthesizer.voices[selectedVoiceIndex]
+            )
+         }
+      }
+         .padding()
+   }
+}
+```
+
+Personal voices are supported on iOS 17 and above. Users must first [create a personal voice](https://support.apple.com/en-us/104993). Using personal voices also requires obtaining authorization from the user. To request access to any available personal voices, you can use the `getPersonalVoices()` method of the `SpeechSynthesizer`. Personal voices will then become available alongside system voices.
+
 For more information, please refer to the [API documentation](https://swiftpackageindex.com/StanfordSpezi/SpeziSpeech/documentation).
 
 

diff --git a/Sources/SpeziSpeechSynthesizer/SpeechSynthesizer.swift b/Sources/SpeziSpeechSynthesizer/SpeechSynthesizer.swift
@@ -67,7 +67,12 @@ public final class SpeechSynthesizer: NSObject, Module, DefaultInitializable, En
     public private(set) var isSpeaking = false
     /// A Boolean value that indicates whether a speech synthesizer is in a paused state.
     public private(set) var isPaused = false
-
+    /// An Array of voices in the user's current locale.
+    public var voices: [AVSpeechSynthesisVoice] {
+        AVSpeechSynthesisVoice.speechVoices().filter {
+            $0.language == AVSpeechSynthesisVoice.currentLanguageCode()
+        }
+    }
 
     override public required init() {
         super.init()
@@ -89,6 +94,16 @@ public final class SpeechSynthesizer: NSObject, Module, DefaultInitializable, En
         speak(utterance)
     }
 
+    /// Adds the text to the speech synthesizer's queue.
+    /// - Parameters:
+    ///   - text: A string that contains the text to speak.
+    ///   - voice: The `AVSpeechSynthesisVoice` to use.
+    public func speak(_ text: String, voice: AVSpeechSynthesisVoice) {
+        let utterance = AVSpeechUtterance(string: text)
+        utterance.voice = voice
+        speak(utterance)
+    }
+
     /// Adds the utterance to the speech synthesizer’s queue.
     /// - Parameter utterance: An `AVSpeechUtterance` instance that contains text to speak.
     public func speak(_ utterance: AVSpeechUtterance) {
@@ -121,6 +136,24 @@ public final class SpeechSynthesizer: NSObject, Module, DefaultInitializable, En
         }
     }
 
+    /// Requests permission for and fetches any personal voices the user may have created on the device.
+    /// - Returns: An Array of personal voices
+    public func getPersonalVoices() async -> [AVSpeechSynthesisVoice] {
+        await withCheckedContinuation { continuation in
+            AVSpeechSynthesizer.requestPersonalVoiceAuthorization { status in
+                switch status {
+                case .authorized:
+                    let personalVoices = AVSpeechSynthesisVoice.speechVoices().filter {
+                        $0.voiceTraits == .isPersonalVoice
+                    }
+                    continuation.resume(returning: personalVoices)
+                default:
+                    continuation.resume(returning: [])
+                }
+            }
+        }
+    }
+
 
     // MARK: - AVSpeechSynthesizerDelegate
     @_documentation(visibility: internal)

diff --git a/...es/SpeziSpeechSynthesizer/SpeziSpeechSynthesizer.docc/SpeziSpeechSynthesizer.md b/...es/SpeziSpeechSynthesizer/SpeziSpeechSynthesizer.docc/SpeziSpeechSynthesizer.md
@@ -75,6 +75,46 @@ struct SpeechTestView: View {
 }
 ```
 
+SpeziSpeech also supports selecting voices, including [personal voices](https://support.apple.com/en-us/104993). 
+
+The following example shows how a user can be given a choice of voices in their current locale and the selected voice can be used to synthesize speech.
+
+```swift
+struct SpeechVoiceSelectionExample: View {
+   @Environment(SpeechSynthesizer.self) private var speechSynthesizer
+   @State private var selectedVoiceIndex = 0
+   @State private var message = ""
+
+
+   var body: some View {
+      VStack {
+         TextField("Enter text to be spoken", text: $message)
+            .textFieldStyle(RoundedBorderTextFieldStyle())
+            .padding()
+          Picker("Voice", selection: $selectedVoiceIndex) {
+              ForEach(speechSynthesizer.voices.indices, id: \.self) { index in
+                  Text(speechSynthesizer.voices[index].name)
+                      .tag(index)
+              }
+          }
+              .pickerStyle(.inline)
+              .accessibilityIdentifier("voicePicker")
+              .padding()
+         Button("Speak") {
+            speechSynthesizer.speak(
+                message,
+                voice: speechSynthesizer.voices[selectedVoiceIndex]
+            )
+         }
+      }
+         .padding()
+   }
+}
+```
+
+Personal voices are supported on iOS 17 and above. Users must first [create a personal voice](https://support.apple.com/en-us/104993). Using personal voices also requires obtaining authorization from the user. To request access to any available personal voices, you can use the `getPersonalVoices()` method of the `SpeechSynthesizer`. Personal voices will then become available alongside system voices.
+
+
 ## Topics
 
 - ``SpeechSynthesizer``
diff --git a/Tests/UITests/TestApp/SpeechTestView.swift b/Tests/UITests/TestApp/SpeechTestView.swift
@@ -21,8 +21,6 @@ struct SpeechTestView: View {
 
     var body: some View {
         VStack {
-            Text("SpeziSpeech")
-
             ScrollView {
                 Text(message)
                     .padding()

diff --git a/Tests/UITests/TestApp/SpeechVoiceSelectionTestView.swift b/Tests/UITests/TestApp/SpeechVoiceSelectionTestView.swift
@@ -0,0 +1,44 @@
+//
+// This source file is part of the Stanford Spezi open-source project
+//
+// SPDX-FileCopyrightText: 2024 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+import Speech
+import SpeziSpeechRecognizer
+import SpeziSpeechSynthesizer
+import SwiftUI
+
+struct SpeechVoiceSelectionTestView: View {
+   @Environment(SpeechSynthesizer.self) private var speechSynthesizer
+   @State private var selectedVoiceIndex = 0
+   @State private var message = ""
+
+   var body: some View {
+      VStack {
+         TextField("Enter text to be spoken", text: $message)
+            .textFieldStyle(RoundedBorderTextFieldStyle())
+            .padding()
+
+          Picker("Voice", selection: $selectedVoiceIndex) {
+              ForEach(speechSynthesizer.voices.indices, id: \.self) { index in
+                  Text(speechSynthesizer.voices[index].name)
+                      .tag(index)
+              }
+          }
+              .pickerStyle(.inline)
+              .accessibilityIdentifier("voicePicker")
+              .padding()
+
+         Button("Speak") {
+            speechSynthesizer.speak(
+                message,
+                voice: speechSynthesizer.voices[selectedVoiceIndex]
+            )
+         }
+      }
+      .padding()
+   }
+}
diff --git a/Tests/UITests/TestApp/TestApp.swift b/Tests/UITests/TestApp/TestApp.swift
@@ -17,8 +17,24 @@ struct UITestsApp: App {
 
     var body: some Scene {
         WindowGroup {
-            SpeechTestView()
-                .spezi(appDelegate)
+            NavigationStack {
+                MenuView()
+            }
+            .spezi(appDelegate)
         }
     }
 }
+
+struct MenuView: View {
+    var body: some View {
+        List {
+            NavigationLink(destination: SpeechTestView()) {
+                Text("Speech Test View")
+            }
+            NavigationLink(destination: SpeechVoiceSelectionTestView()) {
+                Text("Speech Voice Selection Test View")
+            }
+        }
+        .navigationTitle("Spezi Speech Tests")
+    }
+}
diff --git a/Tests/UITests/TestAppUITests/TestAppUITests.swift b/Tests/UITests/TestAppUITests/TestAppUITests.swift
@@ -16,10 +16,39 @@ class TestAppUITests: XCTestCase {
         continueAfterFailure = false
     }
 
-
     func testSpeziSpeech() throws {
         let app = XCUIApplication()
         app.launch()
-        XCTAssert(app.staticTexts["SpeziSpeech"].waitForExistence(timeout: 1))
+
+        XCTAssertTrue(app.staticTexts["Spezi Speech Tests"].waitForExistence(timeout: 1))
+    }
+
+    func testSynthesisWithVoiceSelection() throws {
+        let app = XCUIApplication()
+        app.launch()
+
+        let voiceSelectionTestViewButton = app.staticTexts["Speech Voice Selection Test View"]
+
+        XCTAssertTrue(voiceSelectionTestViewButton.waitForExistence(timeout: 1))
+        voiceSelectionTestViewButton.tap()
+
+        #if !os(visionOS)
+        let picker = app.pickers["voicePicker"]
+        let optionToSelect = picker.pickerWheels.element(boundBy: 0)
+        optionToSelect.adjust(toPickerWheelValue: "Kathy")
+        #endif
+
+        let textField = app.textFields["Enter text to be spoken"]
+        XCTAssertTrue(textField.waitForExistence(timeout: 1))
+
+        textField.tap()
+        textField.typeText("Hello, this is a test of the Spezi Speech module.")
+
+        let speakButton = app.buttons["Speak"]
+        XCTAssertTrue(speakButton.waitForExistence(timeout: 1))
+        speakButton.tap()
+
+        // Waits for speech to generate
+        sleep(5)
     }
 }
diff --git a/Tests/UITests/UITests.xcodeproj/project.pbxproj b/Tests/UITests/UITests.xcodeproj/project.pbxproj
@@ -10,6 +10,7 @@
 		2F6D139A28F5F386007C25D6 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 2F6D139928F5F386007C25D6 /* Assets.xcassets */; };
 		2F8A431329130A8C005D2B8F /* TestAppUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2F8A431229130A8C005D2B8F /* TestAppUITests.swift */; };
 		2FA7382C290ADFAA007ACEB9 /* TestApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2FA7382B290ADFAA007ACEB9 /* TestApp.swift */; };
+		63E92BD82C32293F0070D826 /* SpeechVoiceSelectionTestView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 63E92BD72C32293F0070D826 /* SpeechVoiceSelectionTestView.swift */; };
 		979087112AFF07FF00F78FA4 /* SpeechTestView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 979087102AFF07FF00F78FA4 /* SpeechTestView.swift */; };
 		97E117752AFF0A89002EA48A /* SpeziSpeechRecognizer in Frameworks */ = {isa = PBXBuildFile; productRef = 97E117742AFF0A89002EA48A /* SpeziSpeechRecognizer */; };
 		97E117772AFF0A89002EA48A /* SpeziSpeechSynthesizer in Frameworks */ = {isa = PBXBuildFile; productRef = 97E117762AFF0A89002EA48A /* SpeziSpeechSynthesizer */; };
@@ -47,6 +48,7 @@
 		2F8A431229130A8C005D2B8F /* TestAppUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TestAppUITests.swift; sourceTree = "<group>"; };
 		2FA7382B290ADFAA007ACEB9 /* TestApp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestApp.swift; sourceTree = "<group>"; };
 		2FB0758A299DDB9000C0B37F /* TestApp.xctestplan */ = {isa = PBXFileReference; lastKnownFileType = text; path = TestApp.xctestplan; sourceTree = "<group>"; };
+		63E92BD72C32293F0070D826 /* SpeechVoiceSelectionTestView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeechVoiceSelectionTestView.swift; sourceTree = "<group>"; };
 		973B3CE42AFC725B00FBC8B1 /* Speech.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Speech.framework; path = System/Library/Frameworks/Speech.framework; sourceTree = SDKROOT; };
 		979087102AFF07FF00F78FA4 /* SpeechTestView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SpeechTestView.swift; sourceTree = "<group>"; };
 		97FC62772B02AEDF0025D933 /* TestAppDelegate.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestAppDelegate.swift; sourceTree = "<group>"; };
@@ -98,6 +100,7 @@
 			children = (
 				97FC62772B02AEDF0025D933 /* TestAppDelegate.swift */,
 				2FA7382B290ADFAA007ACEB9 /* TestApp.swift */,
+				63E92BD72C32293F0070D826 /* SpeechVoiceSelectionTestView.swift */,
 				979087102AFF07FF00F78FA4 /* SpeechTestView.swift */,
 				2F6D139928F5F386007C25D6 /* Assets.xcassets */,
 			);
@@ -226,6 +229,7 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				63E92BD82C32293F0070D826 /* SpeechVoiceSelectionTestView.swift in Sources */,
 				979087112AFF07FF00F78FA4 /* SpeechTestView.swift in Sources */,
 				2FA7382C290ADFAA007ACEB9 /* TestApp.swift in Sources */,
 				97FC62782B02AEDF0025D933 /* TestAppDelegate.swift in Sources */,