Skip to content

Commit

Permalink
feat: <voice /> and MSTTS viseme extension
Browse files Browse the repository at this point in the history
  • Loading branch information
decahedron1 committed Oct 16, 2023
1 parent fa91fe9 commit f64b567
Show file tree
Hide file tree
Showing 6 changed files with 243 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/audio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub enum AudioRepeat {

/// An SSML `<audio />` element. [`Audio`] supports the insertion of recorded audio files and the insertion of other
/// audio formats in conjunction with synthesized speech output.
#[derive(Default, Clone)]
#[derive(Debug, Default, Clone)]
pub struct Audio {
src: String,
desc: Option<String>,
Expand Down
18 changes: 17 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,20 @@ use std::{error::Error, fmt::Debug, io::Write};

mod audio;
mod error;
pub mod mstts;
mod speak;
mod text;
mod unit;
mod util;
mod voice;

pub(crate) use self::error::{error, GenericError};
pub use self::{
audio::{audio, Audio, AudioRepeat},
speak::{speak, Speak, SpeakableElement},
text::{text, Text},
unit::{Decibels, DecibelsError, TimeDesignation, TimeDesignationError}
unit::{Decibels, DecibelsError, TimeDesignation, TimeDesignationError},
voice::{voice, Voice, VoiceConfig, VoiceGender}
};

/// Vendor-specific flavor of SSML. Specifying this can be used to enable compatibility checks & add additional
Expand Down Expand Up @@ -75,3 +78,16 @@ pub trait Serialize {
Ok(std::str::from_utf8(&write)?.to_owned())
}
}

/// A [`SpeakableElement`] that outputs a simple string.
///
/// It differs from [`Text`] in that the contents of `Meta` are not escaped, meaning `Meta` can be used to write raw
/// XML into the document.
#[derive(Debug, Clone)]
pub struct Meta(pub String);

impl Serialize for Meta {
fn serialize<W: Write>(&self, writer: &mut W, _: Flavor) -> Result<(), Box<dyn Error>> {
Ok(writer.write_all(self.0.as_bytes())?)
}
}
53 changes: 53 additions & 0 deletions src/mstts/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use std::fmt::Display;

use crate::{voice::Voice, Meta, SpeakableElement};

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MicrosoftViseme {
/// Receive visemes as an ID. (equivalent to `<mstts:viseme type="redlips_front" />`)
ById,
/// Receive visemes as blend shapes.
FacialExpression
}

impl Display for MicrosoftViseme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
MicrosoftViseme::ById => "redlips_front",
MicrosoftViseme::FacialExpression => "FacialExpression"
})
}
}

pub trait MicrosoftVoiceExt {
/// For ACSS, configures a [`Voice`] section to send back viseme animations in the specified format.
///
/// ```
/// # use ssml::{Flavor, mstts::{MicrosoftVoiceExt, MicrosoftViseme}, Serialize};
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let doc = ssml::Speak::new(
/// Some("en-US"),
/// [ssml::Voice::new(
/// "en-US-JennyNeural",
/// ["Rainbow has seven colors: Red, orange, yellow, green, blue, indigo, and violet."]
/// )
/// .with_mstts_viseme(MicrosoftViseme::FacialExpression)]
/// );
///
/// assert_eq!(
/// doc.serialize_to_string(Flavor::MicrosoftAzureCognitiveSpeechServices)?,
/// r#"<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US" xmlns:mstts="http://www.w3.org/2001/mstts"><voice name="en-US-JennyNeural"><mstts:viseme type="FacialExpression" />Rainbow has seven colors: Red, orange, yellow, green, blue, indigo, and violet. </voice></speak>"#
/// );
/// # Ok(())
/// # }
/// ```
fn with_mstts_viseme(self, config: MicrosoftViseme) -> Self;
}

impl MicrosoftVoiceExt for Voice {
fn with_mstts_viseme(mut self, config: MicrosoftViseme) -> Self {
self.elements
.insert(0, SpeakableElement::Meta(Meta(format!("<mstts:viseme type=\"{config}\" />"))));
self
}
}
14 changes: 8 additions & 6 deletions src/speak.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use std::{error::Error, io::Write};

use crate::{util, Audio, Flavor, Serialize, Text};
use crate::{util, Audio, Flavor, Meta, Serialize, Text, Voice};

macro_rules! el {
(
$(#[$outer:meta])*
pub enum $name:ident {
$(
$(#[$innermeta:meta])*
$variant:ident($inner:ident)
$variant:ident($inner:ty)
),*
}
) => {
Expand Down Expand Up @@ -37,10 +37,12 @@ macro_rules! el {
}

el! {
#[derive(Clone)]
#[derive(Debug, Clone)]
pub enum SpeakableElement {
Text(Text),
Audio(Audio)
Audio(Audio),
Voice(Voice),
Meta(Meta)
// Break(BreakElement),
// Emphasis(EmphasisElement),
// Lang(LangElement),
Expand All @@ -63,7 +65,7 @@ impl<T: ToString> From<T> for SpeakableElement {
}

/// The root element of an SSML document.
#[derive(Default)]
#[derive(Default, Debug, Clone)]
pub struct Speak {
elements: Vec<SpeakableElement>,
marks: (Option<String>, Option<String>),
Expand Down Expand Up @@ -134,7 +136,7 @@ impl Serialize for Speak {

// Include `mstts` namespace for ACSS.
if flavor == Flavor::MicrosoftAzureCognitiveSpeechServices {
util::write_attr(writer, "xmlns:mstts", "https://www.w3.org/2001/mstts")?;
util::write_attr(writer, "xmlns:mstts", "http://www.w3.org/2001/mstts")?;
}

if let Some(start_mark) = &self.marks.0 {
Expand Down
2 changes: 1 addition & 1 deletion src/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{error::Error, io::Write};
use crate::{Flavor, Serialize};

/// A non-marked-up string of text for use as a spoken element.
#[derive(Clone)]
#[derive(Default, Debug, Clone)]
pub struct Text(pub String);

impl<T: ToString> From<T> for Text {
Expand Down
163 changes: 163 additions & 0 deletions src/voice.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
use std::{error::Error, fmt::Display, io::Write};

use crate::{util, Flavor, Serialize, SpeakableElement};

#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub enum VoiceGender {
#[default]
Unspecified,
Neutral,
Female,
Male
}

impl Display for VoiceGender {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
VoiceGender::Unspecified => "",
VoiceGender::Neutral => "neutral",
VoiceGender::Female => "female",
VoiceGender::Male => "male"
})
}
}

/// Configuration for the [`Voice`] element.
#[derive(Default, Debug, Clone)]
pub struct VoiceConfig {
pub gender: Option<VoiceGender>,
pub age: Option<u8>,
pub name: Option<String>,
pub variant: Option<String>
}

impl VoiceConfig {
/// Creates a new [`VoiceConfig`] with the specified voice name and no other attributes.
///
/// ```
/// let doc = ssml::VoiceConfig::named("en-US-JennyNeural");
/// ```
pub fn named(name: impl ToString) -> Self {
Self {
name: Some(name.to_string()),
..VoiceConfig::default()
}
}
}

impl<S: ToString> From<S> for VoiceConfig {
fn from(value: S) -> Self {
VoiceConfig::named(value)
}
}

impl Serialize for VoiceConfig {
fn serialize<W: Write>(&self, writer: &mut W, _: Flavor) -> Result<(), Box<dyn Error>> {
if let Some(gender) = &self.gender {
util::write_attr(writer, "gender", gender.to_string())?;
}
if let Some(age) = &self.age {
util::write_attr(writer, "age", age.to_string())?;
}
if let Some(name) = &self.name {
util::write_attr(writer, "name", name)?;
}
if let Some(variant) = &self.variant {
util::write_attr(writer, "variant", variant)?;
}
Ok(())
}
}

/// The [`Voice`] element allows you to specify a voice or use multiple different voices in one document.
#[derive(Default, Debug, Clone)]
pub struct Voice {
pub(crate) elements: Vec<SpeakableElement>,
config: VoiceConfig
}

impl Voice {
/// Creates a new `voice` element to change the voice of a section spoken elements.
///
/// ```
/// # use ssml::{Flavor, Serialize};
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let doc = ssml::Speak::new(None, [ssml::Voice::new("en-US-Neural2-F", ["Hello, world!"])]);
///
/// assert_eq!(
/// doc.serialize_to_string(Flavor::GoogleCloudTextToSpeech)?,
/// r#"<speak><voice name="en-US-Neural2-F">Hello, world! </voice></speak>"#
/// );
/// # Ok(())
/// # }
/// ```
pub fn new<S: Into<SpeakableElement>, I: IntoIterator<Item = S>>(config: impl Into<VoiceConfig>, elements: I) -> Self {
Self {
elements: elements.into_iter().map(|f| f.into()).collect(),
config: config.into()
}
}

/// Extend this `voice` section with additional spoken elements.
///
/// ```
/// # use ssml::{Flavor, Serialize};
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let mut voice = ssml::voice("en-US-Neural2-F", ["Hello, world!"]);
/// voice = voice.with_elements(["This is an SSML document."]);
/// let doc = ssml::Speak::new(None, [voice]);
///
/// assert_eq!(
/// doc.serialize_to_string(Flavor::GoogleCloudTextToSpeech)?,
/// r#"<speak><voice name="en-US-Neural2-F">Hello, world! This is an SSML document. </voice></speak>"#
/// );
/// # Ok(())
/// # }
/// ```
pub fn with_elements<S: Into<SpeakableElement>, I: IntoIterator<Item = S>>(mut self, elements: I) -> Self {
self.elements.extend(elements.into_iter().map(|f| f.into()));
self
}

/// Modifies the voice configuration of this `voice` section.
///
/// ```
/// let mut voice = ssml::Voice::default();
/// voice = voice.with_voice(ssml::VoiceConfig { age: Some(42), ..Default::default() });
/// ```
pub fn with_voice(mut self, config: impl Into<VoiceConfig>) -> Self {
self.config = config.into();
self
}
}

impl Serialize for Voice {
fn serialize<W: Write>(&self, writer: &mut W, flavor: Flavor) -> Result<(), Box<dyn Error>> {
writer.write_all(b"<voice")?;
self.config.serialize(writer, flavor)?;
writer.write_all(b">")?;
for el in &self.elements {
el.serialize(writer, flavor)?;
}
writer.write_all(b"</voice>")?;
Ok(())
}
}

/// Creates a new `voice` element to change the voice of a section spoken elements.
///
/// ```
/// # use ssml::{Flavor, Serialize};
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let doc = ssml::speak(None, [ssml::voice("en-US-Neural2-F", ["Hello, world!"])]);
///
/// assert_eq!(
/// doc.serialize_to_string(Flavor::GoogleCloudTextToSpeech)?,
/// r#"<speak><voice name="en-US-Neural2-F">Hello, world! </voice></speak>"#
/// );
/// # Ok(())
/// # }
/// ```
pub fn voice<S: Into<SpeakableElement>, I: IntoIterator<Item = S>>(config: impl Into<VoiceConfig>, elements: I) -> Voice {
Voice::new(config, elements)
}

0 comments on commit f64b567

Please sign in to comment.