Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Offline Speech Recognition #2089 #2242

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@
SelectedItem="{Binding CurrentLocale}"
ItemDisplayBinding="{Binding ., Converter={StaticResource PickerLocaleDisplayConverter}}"/>

<Label
Text="State"
FontAttributes="Bold"/>

<Label
Text="{Binding State}"
FontSize="18"
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
MinimumHeightRequest="100" />

<Label
Text="Language Output"
FontAttributes="Bold"/>
Expand All @@ -53,7 +64,6 @@
StrokeShape="RoundRectangle 8,8,8,8"
Padding="12">
<Border.Content>

<Grid RowDefinitions="*,60"
ColumnDefinitions="*,*"
RowSpacing="12"
Expand All @@ -62,21 +72,21 @@
<Button
Grid.Row="0"
Grid.Column="0"
Text="ListenAsync"
Command="{Binding ListenCommand}"
Text="StartListenAsync"
Command="{Binding StartListenCommand}"
HorizontalOptions="End" />

<Button
Grid.Row="0"
Grid.Column="1"
Text="Cancel Token"
Command="{Binding ListenCancelCommand}"
Text="StopListenAsync"
Command="{Binding StopListenCommand}"
HorizontalOptions="Start" />

<Label
Grid.Row="1"
Grid.ColumnSpan="2"
Text="The `ListenAsync` API allows you to await the final speech recognition results using async/await. `ListenAsync` is cancelled via CancellationToken."
Text="The `StartListenAsync` API starts the speech-to-text service and shares the results using `RecognitionResultUpdated` event and `RecognitionResultCompleted` event."
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
FontSize="12"/>
Expand All @@ -99,21 +109,21 @@
<Button
Grid.Row="0"
Grid.Column="0"
Text="StartListenAsync"
Command="{Binding StartListenCommand}"
Text="StartOfflineListenAsync"
Command="{Binding StartOfflineListenCommand}"
HorizontalOptions="End" />

<Button
Grid.Row="0"
Grid.Column="1"
Text="StopListenAsync"
Command="{Binding StopListenCommand}"
Text="StopOfflineListenAsync"
Command="{Binding StopOfflineListenCommand}"
HorizontalOptions="Start" />

<Label
Grid.Row="1"
Grid.ColumnSpan="2"
Text="The `StartListenAsync` API starts the speech-to-text service and shares the results using `RecognitionResultUpdated` event and `RecognitionResultCompleted` event."
Text="The `StartOfflineListenAsync` API starts the speech-to-text service and shares the results using `RecognitionResultUpdated` event and `RecognitionResultCompleted` event."
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
FontSize="12"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,23 @@ public partial class SpeechToTextViewModel : BaseViewModel
[ObservableProperty]
Locale? currentLocale;

public SpeechToTextState? State => speechToText.CurrentState;

[ObservableProperty]
string? recognitionText = "Welcome to .NET MAUI Community Toolkit!";

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(ListenCommand))]
bool canListenExecute = true;


[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StartListenCommand))]
bool canStartListenExecute = true;

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StopListenCommand))]
bool canStopListenExecute = false;

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StartOfflineListenCommand))]
bool canStartOfflineListenExecute = true;

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StopOfflineListenCommand))]
bool canStopOfflineListenExecute = false;

public SpeechToTextViewModel(ITextToSpeech textToSpeech, ISpeechToText speechToText)
{
this.textToSpeech = textToSpeech;
Expand Down Expand Up @@ -82,94 +87,94 @@ async Task Play(CancellationToken cancellationToken)
}
}

[RelayCommand(IncludeCancelCommand = true, CanExecute = nameof(CanListenExecute))]
async Task Listen(CancellationToken cancellationToken)
[RelayCommand(CanExecute = nameof(CanStartListenExecute))]
async Task StartListen()
{
CanStartListenExecute = false;
CanStartOfflineListenExecute = false;
CanStopOfflineListenExecute = false;
CanStopListenExecute = true;

try
var isGranted = await speechToText.RequestPermissions(CancellationToken.None);
if (!isGranted)
{
var isGranted = await speechToText.RequestPermissions(cancellationToken);
if (!isGranted)
{
await Toast.Make("Permission not granted").Show(cancellationToken);
return;
}

const string beginSpeakingPrompt = "Begin speaking...";
await Toast.Make("Permission not granted").Show(CancellationToken.None);
return;
}

RecognitionText = beginSpeakingPrompt;
if (Connectivity.NetworkAccess != NetworkAccess.Internet)
{
await Toast.Make("Internet connection is required").Show(CancellationToken.None);
return;
}

var recognitionResult = await speechToText.ListenAsync(
CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage),
new Progress<string>(partialText =>
{
if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText += partialText + " ";
}), cancellationToken);
RecognitionText = beginSpeakingPrompt;

if (recognitionResult.IsSuccessful)
{
RecognitionText = recognitionResult.Text;
}
else
{
await Toast.Make(recognitionResult.Exception?.Message ?? "Unable to recognize speech").Show(CancellationToken.None);
}
speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}
finally
await speechToText.StartListenAsync(CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage), CancellationToken.None);

if (RecognitionText is beginSpeakingPrompt)
{
CanStartListenExecute = true;
RecognitionText = string.Empty;
}
}

[RelayCommand(CanExecute = nameof(CanStartListenExecute))]
async Task StartListen(CancellationToken cancellationToken)
[RelayCommand(CanExecute = nameof(CanStopListenExecute))]
Task StopListen()
{
CanStartListenExecute = true;
CanStartOfflineListenExecute = true;
CanStopOfflineListenExecute = false;
CanStopListenExecute = false;

speechToText.RecognitionResultUpdated -= HandleRecognitionResultUpdated;

return speechToText.StopListenAsync(CancellationToken.None);
}

[RelayCommand(CanExecute = nameof(CanStartOfflineListenExecute))]
async Task StartOfflineListen()
{
CanListenExecute = false;
CanStartListenExecute = false;
CanStopListenExecute = true;
CanStopListenExecute = false;
CanStartOfflineListenExecute = false;
CanStopOfflineListenExecute = true;

var isGranted = await speechToText.RequestPermissions(cancellationToken);
var isGranted = await speechToText.RequestPermissions(CancellationToken.None);
if (!isGranted)
{
await Toast.Make("Permission not granted").Show(cancellationToken);
await Toast.Make("Permission not granted").Show(CancellationToken.None);
return;
}

const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText = beginSpeakingPrompt;

await speechToText.StartListenAsync(CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage), cancellationToken);

speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;

await speechToText.StartOfflineListenAsync(CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage), CancellationToken.None);

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}

[RelayCommand(CanExecute = nameof(CanStopListenExecute))]
Task StopListen(CancellationToken cancellationToken)
[RelayCommand(CanExecute = nameof(CanStopOfflineListenExecute))]
Task StopOfflineListen()
{
CanListenExecute = true;
CanStartOfflineListenExecute = true;
CanStartListenExecute = true;
CanStopListenExecute = false;
CanStopOfflineListenExecute = false;

speechToText.RecognitionResultUpdated -= HandleRecognitionResultUpdated;

return speechToText.StopListenAsync(cancellationToken);
return speechToText.StopOfflineListenAsync(CancellationToken.None);
}

void HandleRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResultUpdatedEventArgs e)
Expand All @@ -179,12 +184,12 @@ void HandleRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResul

void HandleRecognitionResultCompleted(object? sender, SpeechToTextRecognitionResultCompletedEventArgs e)
{
RecognitionText = e.RecognitionResult;
RecognitionText = e.RecognitionResult.IsSuccessful ? e.RecognitionResult.Text : e.RecognitionResult.Exception.Message;
}

async void HandleSpeechToTextStateChanged(object? sender, SpeechToTextStateChangedEventArgs e)
void HandleSpeechToTextStateChanged(object? sender, SpeechToTextStateChangedEventArgs e)
{
await Toast.Make($"State Changed: {e.State}").Show(CancellationToken.None);
OnPropertyChanged(nameof(State));
}

void HandleLocalesCollectionChanged(object? sender, NotifyCollectionChangedEventArgs e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ public class SpeechToTextRecognitionResultCompletedEventArgs : EventArgs
/// <summary>
/// Initialize a new instance of <see cref="SpeechToTextRecognitionResultCompletedEventArgs"/>
/// </summary>
public SpeechToTextRecognitionResultCompletedEventArgs(string recognitionResult)
public SpeechToTextRecognitionResultCompletedEventArgs(SpeechToTextResult recognitionResult)
{
RecognitionResult = recognitionResult;
}

/// <summary>
/// Speech recognition result
/// </summary>
public string RecognitionResult { get; }
public SpeechToTextResult RecognitionResult { get; }
}
bijington marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -28,32 +28,42 @@ public interface ISpeechToText : IAsyncDisposable
SpeechToTextState CurrentState { get; }

/// <summary>
/// Converts speech to text in real time.
/// Starts the SpeechToText service
/// </summary>
/// <remarks>
/// Real time speech recognition results will be surfaced via <see cref="RecognitionResultUpdated"/> and <see cref="RecognitionResultCompleted"/>
/// </remarks>
/// <param name="culture">Speak language</param>
/// <param name="recognitionResult">Intermediate recognition result.</param>
/// <param name="cancellationToken"><see cref="CancellationToken"/></param>
/// <returns>Final recognition result</returns>
Task<SpeechToTextResult> ListenAsync(CultureInfo culture, IProgress<string>? recognitionResult, CancellationToken cancellationToken = default);
Task StartListenAsync(CultureInfo culture, CancellationToken cancellationToken = default);

/// <summary>
/// Starts the SpeechToText service
/// Stops the SpeechToText service
/// </summary>
/// <remarks>
/// Speech recognition results will be surfaced via <see cref="RecognitionResultCompleted"/>
/// </remarks>
/// <param name="cancellationToken"><see cref="CancellationToken"/></param>
Task StopListenAsync(CancellationToken cancellationToken = default);

/// <summary>
/// Starts the Offline SpeechToText service
/// </summary>
/// <remarks>
/// Real time speech recognition results will be surfaced via <see cref="RecognitionResultUpdated"/> and <see cref="RecognitionResultCompleted"/>
/// </remarks>
/// <param name="culture">Speak language</param>
/// <param name="cancellationToken"><see cref="CancellationToken"/></param>
Task StartListenAsync(CultureInfo culture, CancellationToken cancellationToken = default);
Task StartOfflineListenAsync(CultureInfo culture, CancellationToken cancellationToken = default);
VladislavAntonyuk marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Stops the SpeechToText service
/// Stops the OfflineSpeechToText service
/// </summary>
/// <remarks>
/// Speech recognition results will be surfaced via <see cref="RecognitionResultCompleted"/>
/// </remarks>
/// <param name="cancellationToken"><see cref="CancellationToken"/></param>
Task StopListenAsync(CancellationToken cancellationToken = default);
Task StopOfflineListenAsync(CancellationToken cancellationToken = default);

/// <summary>
/// Request permissions for speech to text.
Expand Down
Loading