From 8654f4a6a54af17489b98452f1fd77002425a0f2 Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Sun, 7 Apr 2024 02:44:29 +0800 Subject: [PATCH 01/11] Mengasingkan kod teras dalam subcrate 'onc' Fixes #3 --- wasm-rs/Cargo.lock | 11 ++- wasm-rs/Cargo.toml | 3 +- wasm-rs/onc/Cargo.toml | 11 +++ wasm-rs/onc/src/lib.rs | 3 + wasm-rs/onc/src/phonotactics/mod.rs | 91 ++++++++++++++++++ wasm-rs/{ => onc}/src/phonotactics/tags.rs | 12 +-- wasm-rs/src/imbuhan.rs | 5 +- wasm-rs/src/phonotactics.rs | 105 ++------------------- 8 files changed, 131 insertions(+), 110 deletions(-) create mode 100644 wasm-rs/onc/Cargo.toml create mode 100644 wasm-rs/onc/src/lib.rs create mode 100644 wasm-rs/onc/src/phonotactics/mod.rs rename wasm-rs/{ => onc}/src/phonotactics/tags.rs (94%) diff --git a/wasm-rs/Cargo.lock b/wasm-rs/Cargo.lock index bb28b9e..af0206e 100644 --- a/wasm-rs/Cargo.lock +++ b/wasm-rs/Cargo.lock @@ -169,6 +169,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "onc" +version = "0.1.0" +dependencies = [ + "itertools", + "nom", + "serde", +] + [[package]] name = "once_cell" version = "1.19.0" @@ -357,7 +366,7 @@ version = "0.1.0" dependencies = [ "clap", "itertools", - "nom", + "onc", "serde", "serde-wasm-bindgen", "toml", diff --git a/wasm-rs/Cargo.toml b/wasm-rs/Cargo.toml index 387980c..7b89a99 100644 --- a/wasm-rs/Cargo.toml +++ b/wasm-rs/Cargo.toml @@ -1,3 +1,4 @@ +workspace = { members = ["onc"] } [package] name = "wasm-rs" version = "0.1.0" @@ -10,7 +11,7 @@ crate-type = ["cdylib"] [dependencies] clap = { version = "4.5.4", features = ["cargo"] } itertools = "0.12.1" -nom = "7.1.3" +onc = { version = "0.1.0", path = "onc" } serde = { version = "1.0.197", features = ["derive"] } serde-wasm-bindgen = "0.6.5" toml = "0.8.11" diff --git a/wasm-rs/onc/Cargo.toml b/wasm-rs/onc/Cargo.toml new file mode 100644 index 0000000..19c4eb4 --- /dev/null +++ b/wasm-rs/onc/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "onc" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +itertools = "0.12.1" +nom = "7.1.3" +serde = "1.0.197" diff --git a/wasm-rs/onc/src/lib.rs b/wasm-rs/onc/src/lib.rs new file mode 100644 index 0000000..c0bfc40 --- /dev/null +++ b/wasm-rs/onc/src/lib.rs @@ -0,0 +1,3 @@ +pub mod phonotactics; + +pub use nom::IResult; diff --git a/wasm-rs/onc/src/phonotactics/mod.rs b/wasm-rs/onc/src/phonotactics/mod.rs new file mode 100644 index 0000000..bbd3b75 --- /dev/null +++ b/wasm-rs/onc/src/phonotactics/mod.rs @@ -0,0 +1,91 @@ +pub mod tags; + +use itertools::Itertools; +use std::fmt::Display; + +use crate::phonotactics::tags::SyllableTags; + +pub struct Phrase { + pub syllables: Vec>, +} + +impl<'a> Phrase<&'a str> { + pub fn with_postprocessing(mut self, tags: &'a SyllableTags) -> Self { + let cloned = self.syllables.clone(); + for (index, (lead, lag)) in cloned.iter().tuple_windows().enumerate() { + match (lead.coda, lag.onset) { + (Some(s), None) => { + self.syllables[index].coda = None; + self.syllables[index + 1].onset = Some(s); + } + (Some(s), Some(t)) => { + let t: Vec<&'a String> = Vec::from_iter( + tags.onset + .items + .iter() + .filter(|a| a == &&format!("{}{}", &s, &t)), + ); + if t.len() != 0 { + self.syllables[index].coda = None; + self.syllables[index + 1].onset = t.first().map(|a| a.as_str()); + } + } + (_, _) => {} + } + } + self + } +} + +impl Phrase +where + SyllableUnit: Display, +{ + pub fn as_separated(&self, separator: &Option) -> String { + let default_string = &String::from("·"); + let separator = match separator { + Some(val) => val, + None => default_string, + }; + self.syllables.iter().join(&separator) + } + + pub fn as_contiguous(&self) -> String { + self.syllables.iter().join(&"") + } +} + +impl From>> for Phrase { + fn from(value: Vec>) -> Self { + Self { syllables: value } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct SyllableUnit { + pub onset: Option, + pub nucleus: O, + pub coda: Option, +} + +impl<'a> Display for SyllableUnit<&'a str> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}{}{}", + self.onset.unwrap_or(""), + self.nucleus, + self.coda.unwrap_or("") + ) + } +} + +impl From<(Option, O, Option)> for SyllableUnit { + fn from(value: (Option, O, Option)) -> Self { + Self { + onset: value.0, + nucleus: value.1, + coda: value.2, + } + } +} diff --git a/wasm-rs/src/phonotactics/tags.rs b/wasm-rs/onc/src/phonotactics/tags.rs similarity index 94% rename from wasm-rs/src/phonotactics/tags.rs rename to wasm-rs/onc/src/phonotactics/tags.rs index 128b855..07b3319 100644 --- a/wasm-rs/src/phonotactics/tags.rs +++ b/wasm-rs/onc/src/phonotactics/tags.rs @@ -1,5 +1,3 @@ -use std::io::Stdout; - use nom::{ bytes::complete::tag_no_case, combinator::opt, error::ParseError, multi::many0, sequence::tuple, Compare, IResult, InputLength, InputTake, Parser, @@ -9,17 +7,17 @@ use serde::Deserialize; use super::{Phrase, SyllableUnit}; #[derive(Clone, Deserialize, Default)] -pub(crate) struct AltTagVec { - pub(crate) items: Vec, +pub struct AltTagVec { + pub items: Vec, #[serde(skip)] index: usize, } #[derive(Clone, Deserialize, Default)] pub struct SyllableTags { - pub(crate) onset: AltTagVec, - pub(crate) nucleus: AltTagVec, - pub(crate) coda: AltTagVec, + pub onset: AltTagVec, + pub nucleus: AltTagVec, + pub coda: AltTagVec, } impl AltTagVec { diff --git a/wasm-rs/src/imbuhan.rs b/wasm-rs/src/imbuhan.rs index bf71597..686669d 100644 --- a/wasm-rs/src/imbuhan.rs +++ b/wasm-rs/src/imbuhan.rs @@ -1,6 +1,5 @@ use std::collections::HashMap; -use itertools::Itertools; use serde::{Deserialize, Serialize}; use wasm_bindgen::prelude::wasm_bindgen; @@ -59,7 +58,7 @@ impl Imbuhan { awal: Some(a), akhir: _, }) => { - if let Ok((rest, phrase)) = phonotactic.parse_syllables(&text) { + if let Ok((_rest, phrase)) = phonotactic.parse_syllables(&text) { if let Some(first) = phrase.syllables.first() { let mut offset = 0; let default = &"".to_string(); @@ -91,7 +90,7 @@ impl Imbuhan { akhir: Some(a), awal: _, }) => { - if let Ok((rest, phrase)) = phonotactic.parse_syllables(&text) { + if let Ok((_rest, phrase)) = phonotactic.parse_syllables(&text) { if let Some(first) = phrase.syllables.first() { let mut offset = 0; let default = &"".to_string(); diff --git a/wasm-rs/src/phonotactics.rs b/wasm-rs/src/phonotactics.rs index 1a44850..94c4d74 100644 --- a/wasm-rs/src/phonotactics.rs +++ b/wasm-rs/src/phonotactics.rs @@ -1,18 +1,12 @@ -pub mod tags; +use onc::phonotactics::tags::SyllableTags; +use onc::phonotactics::Phrase; +use onc::IResult; -use itertools::Itertools; -use nom::IResult; use serde::Deserialize; -use std::{ - fmt::{format, Display}, - option, -}; use wasm_bindgen::prelude::*; use crate::functions::alert; -use self::tags::SyllableTags; - #[wasm_bindgen] #[derive(Deserialize, Clone, Default)] pub struct Phonotactic { @@ -26,7 +20,7 @@ impl Phonotactic { } pub fn parse_syllables<'a>(&'a self, input: &'a String) -> IResult<&'a str, Phrase<&'a str>> { self.definition - .as_str() + .as_str .parse_tags(&input) .map(|(r, p)| (r, p.with_postprocessing(&self.definition))) } @@ -51,9 +45,9 @@ impl PhonotacticToml { #[wasm_bindgen] pub struct SyllableTagsJson { - onset: Vec, - nucleus: Vec, - coda: Vec, + pub(crate) onset: Vec, + pub(crate) nucleus: Vec, + pub(crate) coda: Vec, } #[wasm_bindgen] @@ -225,91 +219,6 @@ impl Phonotactic { } } -pub struct Phrase { - pub(crate) syllables: Vec>, -} - -impl<'a> Phrase<&'a str> { - fn with_postprocessing(mut self, tags: &'a SyllableTags) -> Self { - let cloned = self.syllables.clone(); - for (index, (lead, lag)) in cloned.iter().tuple_windows().enumerate() { - match (lead.coda, lag.onset) { - (Some(s), None) => { - self.syllables[index].coda = None; - self.syllables[index + 1].onset = Some(s); - } - (Some(s), Some(t)) => { - let t: Vec<&'a String> = Vec::from_iter( - tags.onset - .items - .iter() - .filter(|a| a == &&format!("{}{}", &s, &t)), - ); - if t.len() != 0 { - self.syllables[index].coda = None; - self.syllables[index + 1].onset = t.first().map(|a| a.as_str()); - } - } - (_, _) => {} - } - } - self - } -} - -impl Phrase -where - SyllableUnit: Display, -{ - pub fn as_separated(&self, separator: &Option) -> String { - let default_string = &String::from("·"); - let separator = match separator { - Some(val) => val, - None => default_string, - }; - self.syllables.iter().join(&separator) - } - - pub fn as_contiguous(&self) -> String { - self.syllables.iter().join(&"") - } -} - -impl From>> for Phrase { - fn from(value: Vec>) -> Self { - Self { syllables: value } - } -} - -#[derive(Clone, Debug, PartialEq)] -pub(crate) struct SyllableUnit { - pub(crate) onset: Option, - pub(crate) nucleus: O, - pub(crate) coda: Option, -} - -impl<'a> Display for SyllableUnit<&'a str> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}{}{}", - self.onset.unwrap_or(""), - self.nucleus, - self.coda.unwrap_or("") - ) - } -} - -impl From<(Option, O, Option)> for SyllableUnit { - fn from(value: (Option, O, Option)) -> Self { - Self { - onset: value.0, - nucleus: value.1, - coda: value.2, - } - } -} - #[cfg(test)] mod test { use crate::phonotactics::Phonotactic; From 0cf076016f6e489d4350b600e8772e95782a04af Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Sun, 7 Apr 2024 02:46:48 +0800 Subject: [PATCH 02/11] =?UTF-8?q?=E2=9C=8F=20Typo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wasm-rs/src/phonotactics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm-rs/src/phonotactics.rs b/wasm-rs/src/phonotactics.rs index 94c4d74..5e6feeb 100644 --- a/wasm-rs/src/phonotactics.rs +++ b/wasm-rs/src/phonotactics.rs @@ -20,7 +20,7 @@ impl Phonotactic { } pub fn parse_syllables<'a>(&'a self, input: &'a String) -> IResult<&'a str, Phrase<&'a str>> { self.definition - .as_str + .as_str() .parse_tags(&input) .map(|(r, p)| (r, p.with_postprocessing(&self.definition))) } From 17db0a11bc1b367806a116c54cd404c163a381d4 Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Mon, 15 Apr 2024 20:22:03 +0800 Subject: [PATCH 03/11] :recycle: Decoupled PhonotacticRules logic from Phonotactic config --- wasm-rs/onc/src/phonotactics/mod.rs | 18 ++++++++++++ wasm-rs/src/imbuhan.rs | 10 +++++-- wasm-rs/src/phonotactics.rs | 45 +++++++++++++---------------- 3 files changed, 46 insertions(+), 27 deletions(-) diff --git a/wasm-rs/onc/src/phonotactics/mod.rs b/wasm-rs/onc/src/phonotactics/mod.rs index bbd3b75..1b02669 100644 --- a/wasm-rs/onc/src/phonotactics/mod.rs +++ b/wasm-rs/onc/src/phonotactics/mod.rs @@ -1,10 +1,28 @@ pub mod tags; use itertools::Itertools; +use nom::IResult; use std::fmt::Display; use crate::phonotactics::tags::SyllableTags; +#[derive(Clone, Default)] +pub struct PhonotacticRule { + definition: SyllableTags, +} + +impl PhonotacticRule { + pub fn with_definitions(definition: SyllableTags) -> Self { + PhonotacticRule { definition } + } + pub fn parse_syllables<'a>(&'a self, input: &'a String) -> IResult<&'a str, Phrase<&'a str>> { + self.definition + .as_str() + .parse_tags(&input) + .map(|(r, p)| (r, p.with_postprocessing(&self.definition))) + } +} + pub struct Phrase { pub syllables: Vec>, } diff --git a/wasm-rs/src/imbuhan.rs b/wasm-rs/src/imbuhan.rs index 686669d..b38849b 100644 --- a/wasm-rs/src/imbuhan.rs +++ b/wasm-rs/src/imbuhan.rs @@ -1,3 +1,5 @@ +use onc::phonotactics::PhonotacticRule; + use std::collections::HashMap; use serde::{Deserialize, Serialize}; @@ -58,7 +60,9 @@ impl Imbuhan { awal: Some(a), akhir: _, }) => { - if let Ok((_rest, phrase)) = phonotactic.parse_syllables(&text) { + if let Ok((_rest, phrase)) = + PhonotacticRule::from(phonotactic.clone()).parse_syllables(&text) + { if let Some(first) = phrase.syllables.first() { let mut offset = 0; let default = &"".to_string(); @@ -90,7 +94,9 @@ impl Imbuhan { akhir: Some(a), awal: _, }) => { - if let Ok((_rest, phrase)) = phonotactic.parse_syllables(&text) { + if let Ok((_rest, phrase)) = + PhonotacticRule::from(phonotactic.clone()).parse_syllables(&text) + { if let Some(first) = phrase.syllables.first() { let mut offset = 0; let default = &"".to_string(); diff --git a/wasm-rs/src/phonotactics.rs b/wasm-rs/src/phonotactics.rs index 5e6feeb..505902c 100644 --- a/wasm-rs/src/phonotactics.rs +++ b/wasm-rs/src/phonotactics.rs @@ -1,5 +1,5 @@ use onc::phonotactics::tags::SyllableTags; -use onc::phonotactics::Phrase; +use onc::phonotactics::{PhonotacticRule, Phrase}; use onc::IResult; use serde::Deserialize; @@ -14,15 +14,27 @@ pub struct Phonotactic { definition: SyllableTags, } +impl From for PhonotacticRule { + fn from(value: Phonotactic) -> Self { + PhonotacticRule::with_definitions(value.definition) + } +} + +#[wasm_bindgen] impl Phonotactic { - pub fn new(name: String, definition: SyllableTags) -> Self { - Self { name, definition } + pub fn parse_string(&mut self, input: String, options: ParseResultOptions) -> ParseResults { + let text = input.to_lowercase(); + let rule = PhonotacticRule::from(self.clone()); + let s = rule.parse_syllables(&text); + InnerParseResult::from(s).render(options) } - pub fn parse_syllables<'a>(&'a self, input: &'a String) -> IResult<&'a str, Phrase<&'a str>> { - self.definition - .as_str() - .parse_tags(&input) - .map(|(r, p)| (r, p.with_postprocessing(&self.definition))) + #[wasm_bindgen(getter)] + pub fn name(&self) -> String { + self.name.clone() + } + #[wasm_bindgen(getter)] + pub fn tags(&self) -> SyllableTagsJson { + self.definition.clone().into() } } @@ -202,23 +214,6 @@ impl<'a> From>> for InnerParseResult<'a> { } } -#[wasm_bindgen] -impl Phonotactic { - pub fn parse_string(&mut self, input: String, options: ParseResultOptions) -> ParseResults { - let text = input.to_lowercase(); - let s = self.parse_syllables(&text); - InnerParseResult::from(s).render(options) - } - #[wasm_bindgen(getter)] - pub fn name(&self) -> String { - self.name.clone() - } - #[wasm_bindgen(getter)] - pub fn tags(&self) -> SyllableTagsJson { - self.definition.clone().into() - } -} - #[cfg(test)] mod test { use crate::phonotactics::Phonotactic; From d4b8f914bfc11cf8564498ddd590c2e5d553c8e2 Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Tue, 16 Apr 2024 15:52:28 +0800 Subject: [PATCH 04/11] :recycle: Remove impl From in exchange for Phonotactic.to_rule() method --- wasm-rs/src/imbuhan.rs | 10 ++-------- wasm-rs/src/phonotactics.rs | 17 +++++++++++------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/wasm-rs/src/imbuhan.rs b/wasm-rs/src/imbuhan.rs index b38849b..5fe2cac 100644 --- a/wasm-rs/src/imbuhan.rs +++ b/wasm-rs/src/imbuhan.rs @@ -1,5 +1,3 @@ -use onc::phonotactics::PhonotacticRule; - use std::collections::HashMap; use serde::{Deserialize, Serialize}; @@ -60,9 +58,7 @@ impl Imbuhan { awal: Some(a), akhir: _, }) => { - if let Ok((_rest, phrase)) = - PhonotacticRule::from(phonotactic.clone()).parse_syllables(&text) - { + if let Ok((_rest, phrase)) = phonotactic.as_rule().parse_syllables(&text) { if let Some(first) = phrase.syllables.first() { let mut offset = 0; let default = &"".to_string(); @@ -94,9 +90,7 @@ impl Imbuhan { akhir: Some(a), awal: _, }) => { - if let Ok((_rest, phrase)) = - PhonotacticRule::from(phonotactic.clone()).parse_syllables(&text) - { + if let Ok((_rest, phrase)) = phonotactic.as_rule().parse_syllables(&text) { if let Some(first) = phrase.syllables.first() { let mut offset = 0; let default = &"".to_string(); diff --git a/wasm-rs/src/phonotactics.rs b/wasm-rs/src/phonotactics.rs index 505902c..c3595a9 100644 --- a/wasm-rs/src/phonotactics.rs +++ b/wasm-rs/src/phonotactics.rs @@ -14,20 +14,25 @@ pub struct Phonotactic { definition: SyllableTags, } -impl From for PhonotacticRule { - fn from(value: Phonotactic) -> Self { - PhonotacticRule::with_definitions(value.definition) - } -} +// impl From for PhonotacticRule { +// fn from(value: Phonotactic) -> Self { +// PhonotacticRule::with_definitions(value.definition) +// } +// } #[wasm_bindgen] impl Phonotactic { pub fn parse_string(&mut self, input: String, options: ParseResultOptions) -> ParseResults { let text = input.to_lowercase(); - let rule = PhonotacticRule::from(self.clone()); + let rule = self.as_rule(); let s = rule.parse_syllables(&text); InnerParseResult::from(s).render(options) } + + pub(crate) fn as_rule(&self) -> PhonotacticRule { + PhonotacticRule::with_definitions(self.definition.clone()) + } + #[wasm_bindgen(getter)] pub fn name(&self) -> String { self.name.clone() From e6da59de21bb7a42d63b92f74de42b8d02db992f Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Wed, 24 Apr 2024 02:12:48 +0800 Subject: [PATCH 05/11] =?UTF-8?q?=F0=9F=8E=A8=20Add=20example=20+=20Move?= =?UTF-8?q?=20tests=20to=20onc=20crate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wasm-rs/Cargo.lock | 160 --------------- wasm-rs/Cargo.toml | 4 +- wasm-rs/examples/simple.rs | 52 +++++ wasm-rs/onc/Cargo.toml | 2 +- .../{phonotactics/mod.rs => phonotactics.rs} | 35 ++++ wasm-rs/src/functions.rs | 8 +- wasm-rs/src/lib.rs | 12 +- wasm-rs/src/phonotactics.rs | 194 ++---------------- 8 files changed, 120 insertions(+), 347 deletions(-) create mode 100644 wasm-rs/examples/simple.rs rename wasm-rs/onc/src/{phonotactics/mod.rs => phonotactics.rs} (71%) diff --git a/wasm-rs/Cargo.lock b/wasm-rs/Cargo.lock index af0206e..b6f65e1 100644 --- a/wasm-rs/Cargo.lock +++ b/wasm-rs/Cargo.lock @@ -2,54 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "anstream" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" - -[[package]] -name = "anstyle-parse" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" -dependencies = [ - "anstyle", - "windows-sys", -] - [[package]] name = "bumpalo" version = "3.15.4" @@ -62,39 +14,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "clap" -version = "4.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" -dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", -] - -[[package]] -name = "clap_lex" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" - -[[package]] -name = "colorchoice" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" - [[package]] name = "either" version = "1.10.0" @@ -242,12 +161,6 @@ dependencies = [ "serde", ] -[[package]] -name = "strsim" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" - [[package]] name = "syn" version = "2.0.58" @@ -299,12 +212,6 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "utf8parse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" - [[package]] name = "wasm-bindgen" version = "0.2.92" @@ -364,7 +271,6 @@ checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" name = "wasm-rs" version = "0.1.0" dependencies = [ - "clap", "itertools", "onc", "serde", @@ -373,72 +279,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" - [[package]] name = "winnow" version = "0.6.5" diff --git a/wasm-rs/Cargo.toml b/wasm-rs/Cargo.toml index 7b89a99..cebfd9e 100644 --- a/wasm-rs/Cargo.toml +++ b/wasm-rs/Cargo.toml @@ -1,4 +1,5 @@ workspace = { members = ["onc"] } + [package] name = "wasm-rs" version = "0.1.0" @@ -6,10 +7,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] -crate-type = ["cdylib"] +crate-type = ["lib", "cdylib"] [dependencies] -clap = { version = "4.5.4", features = ["cargo"] } itertools = "0.12.1" onc = { version = "0.1.0", path = "onc" } serde = { version = "1.0.197", features = ["derive"] } diff --git a/wasm-rs/examples/simple.rs b/wasm-rs/examples/simple.rs new file mode 100644 index 0000000..3b01de6 --- /dev/null +++ b/wasm-rs/examples/simple.rs @@ -0,0 +1,52 @@ +use wasm_rs::{ + functions::parse_default_tatabunyi_toml, + phonotactics::{ParseResultOptions, PhonotacticToml}, +}; + +const INPUT: [&'static str; 5] = ["susyi", "Penerangan", "English", "Ramadhan", "ramadan"]; +const CONFIG: &'static str = r#" +default = "Melayu Moden" + +[[phonotactic]] +name = "Melayu Lama" +definition.onset = [["ny", "ng", "sw", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", "w", "r"]] +definition.nucleus = [["a", "e", "i", "o", "u"]] +definition.coda = [["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] + +[[phonotactic]] +name = "Melayu Klasik" +definition.onset = [["kh", "sy", "gh", "ny", "ng", "dh", "q", "f", "sw", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", "w", "r"]] +definition.nucleus = [["a", "e", "i", "o", "u"]] +definition.coda = [["kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] + +[[phonotactic]] +name = "Melayu Moden" +definition.onset = [["sp", "spr", "sw", "sk", "skr", "st", "str", "kl", "fl", "bl", "pl", "pr", "kr", "gr", "tr", "dr", "kh", "sy", "gh", "ny", "ng", "v", "x", "q", "f", "y", "w", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "r"]] +definition.nucleus = [["a", "e", "i", "o", "u"]] +definition.coda = [["ks", "ns", "nk", "lf", "rt", "rd", "rt", "kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] +"#; + +fn main() { + for (n, &i) in INPUT.iter().enumerate() { + println!("\n[E.g. {}] ========", n + 1); + let input = String::from(i); + println!("Input\t\t: {}", &input); + + let mut phonotactic = parse_default_tatabunyi_toml(CONFIG.to_string()); + + let result = phonotactic.parse_string(input, ParseResultOptions::new(Some("/".into()))); + + println!( + "{}", + match result.error() { + true => format!( + "Err parse\t: {}{}", + result.head().unwrap(), + result.mid().unwrap(), + result.tail().unwrap() + ), + false => format!("Ok parse\t: {}", result.full().unwrap()), + } + ) + } +} diff --git a/wasm-rs/onc/Cargo.toml b/wasm-rs/onc/Cargo.toml index 19c4eb4..1dd8667 100644 --- a/wasm-rs/onc/Cargo.toml +++ b/wasm-rs/onc/Cargo.toml @@ -8,4 +8,4 @@ edition = "2021" [dependencies] itertools = "0.12.1" nom = "7.1.3" -serde = "1.0.197" +serde = { version = "1.0.197", features = ["derive"] } diff --git a/wasm-rs/onc/src/phonotactics/mod.rs b/wasm-rs/onc/src/phonotactics.rs similarity index 71% rename from wasm-rs/onc/src/phonotactics/mod.rs rename to wasm-rs/onc/src/phonotactics.rs index 1b02669..8d21334 100644 --- a/wasm-rs/onc/src/phonotactics/mod.rs +++ b/wasm-rs/onc/src/phonotactics.rs @@ -107,3 +107,38 @@ impl From<(Option, O, Option)> for SyllableUnit { } } } + +// TODO: BUG PLASTIK SEPATUTNYA PLAS/TIK BUKAN PLA/STIK +// TODO: BUG SWASTA SEPATUTNYA SWAS/TA BUKAN SWA/STA + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn main() { + // TODO Make a list of string to test and iterate through all. + let word = "penerangan".to_string(); + let definition = SyllableTags::new_ordered( + vec![ + "ny", "ng", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", + "w", "r", + ], + vec!["a", "e", "i", "o", "u"], + vec!["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"], + ) + .as_string(); + let phonotactic = PhonotacticRule::with_definitions(definition.to_owned()); + + let (_rest, word) = phonotactic.parse_syllables(&word).expect("Error"); + let w = word.with_postprocessing(&definition); + assert_eq!( + w.syllables, + vec![ + SyllableUnit::from((Some("p"), "e", None)), + SyllableUnit::from((Some("n"), "e", None)), + SyllableUnit::from((Some("r"), "a", None)), + SyllableUnit::from((Some("ng"), "a", Some("n"))) + ] + ); + } +} diff --git a/wasm-rs/src/functions.rs b/wasm-rs/src/functions.rs index 5709f18..b996520 100644 --- a/wasm-rs/src/functions.rs +++ b/wasm-rs/src/functions.rs @@ -38,12 +38,8 @@ pub fn parse_tatabunyi_toml(data: String) -> Vec { pub fn parse_default_tatabunyi_toml(data: String) -> Phonotactic { match PhonotacticToml::from_toml_str(data) { Ok(v) => v - .get_phonotactics() - .iter() - .filter(|p| p.name() == v.default) - .collect::>() - .first() - .unwrap_or(&&Phonotactic::default()) + .get_default_phonotactic() + .unwrap_or(Phonotactic::default()) .to_owned() .clone(), Err(e) => { diff --git a/wasm-rs/src/lib.rs b/wasm-rs/src/lib.rs index 262d887..9095dcf 100644 --- a/wasm-rs/src/lib.rs +++ b/wasm-rs/src/lib.rs @@ -1,7 +1,7 @@ -mod functions; -mod imbuhan; -mod phonology; -mod phonotactics; +pub mod functions; +pub mod imbuhan; +pub mod phonology; +pub mod phonotactics; -// TODO: BUG PLASTIK SEPATUTNYA PLAS/TIK BUKAN PLA/STIK -// TODO: BUG SWASTA SEPATUTNYA SWAS/TA BUKAN SWA/STA +// TODO: Pindahkan Imbuhan ke onc +// TODO: Pindahkan Phonology ke onc diff --git a/wasm-rs/src/phonotactics.rs b/wasm-rs/src/phonotactics.rs index c3595a9..4598386 100644 --- a/wasm-rs/src/phonotactics.rs +++ b/wasm-rs/src/phonotactics.rs @@ -20,6 +20,16 @@ pub struct Phonotactic { // } // } +impl Phonotactic { + pub fn as_rule(&self) -> PhonotacticRule { + PhonotacticRule::with_definitions(self.definition.clone()) + } + + pub fn definitions(&self) -> SyllableTags { + self.definition.clone() + } +} + #[wasm_bindgen] impl Phonotactic { pub fn parse_string(&mut self, input: String, options: ParseResultOptions) -> ParseResults { @@ -29,10 +39,6 @@ impl Phonotactic { InnerParseResult::from(s).render(options) } - pub(crate) fn as_rule(&self) -> PhonotacticRule { - PhonotacticRule::with_definitions(self.definition.clone()) - } - #[wasm_bindgen(getter)] pub fn name(&self) -> String { self.name.clone() @@ -45,7 +51,7 @@ impl Phonotactic { #[derive(Deserialize)] pub struct PhonotacticToml { - pub(crate) default: String, + pub default: String, phonotactic: Vec, } @@ -58,6 +64,16 @@ impl PhonotacticToml { pub fn get_phonotactics(&self) -> Vec { self.phonotactic.clone() } + + pub fn get_default_phonotactic(&self) -> Option { + self.phonotactic + .iter() + .filter(|p| p.name == self.default) + .collect::>() + .first() + .cloned() + .cloned() + } } #[wasm_bindgen] @@ -99,12 +115,12 @@ struct InnerParseResult<'a> { phrase: Phrase<&'a str>, } +// TODO: Proper Result #[wasm_bindgen] pub struct ParseResults { options: ParseResultOptions, full: Option, partial: Option<(String, String, String)>, - // details: String, } #[wasm_bindgen] @@ -218,169 +234,3 @@ impl<'a> From>> for InnerParseResult<'a> { } } } - -#[cfg(test)] -mod test { - use crate::phonotactics::Phonotactic; - - use super::{tags::SyllableTags, ParseResultOptions, Phrase, SyllableUnit}; - - #[test] - fn test_panic_condition() { - // Test some combination of words here to get the compiler to panic - let word = "byi".to_string(); - let definition = SyllableTags::new_ordered( - vec![ - "ny", "ng", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", - "w", "r", - ], - vec!["a", "e", "i", "o", "u"], - vec!["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"], - ) - .as_string(); - let mut malay_phonotactic = Phonotactic::new("Melayu Klasik".into(), definition.clone()); - let result = - malay_phonotactic.parse_string(word, ParseResultOptions::new(Some("/".into()))); - // let w = word.with_postprocessing(&definition); - // assert_eq!( - // w.syllables, - // vec![ - // SyllableUnit::from((Some("p"), "e", None)), - // SyllableUnit::from((Some("n"), "e", None)), - // SyllableUnit::from((Some("r"), "a", None)), - // SyllableUnit::from((Some("ng"), "a", Some("n"))) - // ] - // ); - } - #[test] - fn test_penerangan_melayu_lama() { - let word = "penerangan".to_string(); - let definition = SyllableTags::new_ordered( - vec![ - "ny", "ng", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", - "w", "r", - ], - vec!["a", "e", "i", "o", "u"], - vec!["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"], - ) - .as_string(); - let mut malay_phonotactic = Phonotactic::new("Melayu Klasik".into(), definition.clone()); - - let (_rest, word) = malay_phonotactic.parse_syllables(&word).expect("Error"); - let w = word.with_postprocessing(&definition); - assert_eq!( - w.syllables, - vec![ - SyllableUnit::from((Some("p"), "e", None)), - SyllableUnit::from((Some("n"), "e", None)), - SyllableUnit::from((Some("r"), "a", None)), - SyllableUnit::from((Some("ng"), "a", Some("n"))) - ] - ); - } - #[test] - fn test_menyanyi_melayu_lama() { - let word = "menyanyi".to_string(); - let definition = SyllableTags::new_ordered( - vec![ - "ny", "ng", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", - "w", "r", - ], - vec!["a", "e", "i", "o", "u"], - vec!["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"], - ) - .as_string(); - let mut malay_phonotactic = Phonotactic::new("Melayu Klasik".into(), definition.clone()); - - let (_rest, word) = malay_phonotactic.parse_syllables(&word).expect("Error"); - let w = word.with_postprocessing(&definition); - assert_eq!( - w.syllables, - vec![ - SyllableUnit::from((Some("m"), "e", None)), - SyllableUnit::from((Some("ny"), "a", None)), - SyllableUnit::from((Some("ny"), "i", None)), - ] - ); - } - #[test] - fn test_mesyuarat_melayu_klasik() { - let word = "mesyuarat".to_string(); - let definition = SyllableTags::new_ordered( - vec![ - "kh", "sy", "gh", "ny", "ng", "q", "f", "m", "n", "p", "t", "c", "k", "b", "d", - "j", "g", "s", "h", "l", "y", "w", "r", - ], - vec!["a", "e", "i", "o", "u"], - vec![ - "kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r", - ], - ) - .as_string(); - let mut arab_phonotactic = Phonotactic::new("Melayu Klasik".into(), definition.clone()); - let (_rem, word) = arab_phonotactic.parse_syllables(&word).expect("Error"); - let w = word.with_postprocessing(&definition); - assert_eq!( - w.syllables, - vec![ - SyllableUnit::from((Some("m"), "e", None)), - SyllableUnit::from((Some("sy"), "u", None)), - SyllableUnit::from((None, "a", None)), - SyllableUnit::from((Some("r"), "a", Some("t"))) - ] - ); - } - - #[test] - fn test_musytari_melayu_klasik() { - let word = "musytari".to_string(); - let definition = SyllableTags::new_ordered( - vec![ - "kh", "sy", "gh", "ny", "ng", "q", "f", "m", "n", "p", "t", "c", "k", "b", "d", - "j", "g", "s", "h", "l", "y", "w", "r", - ], - vec!["a", "e", "i", "o", "u"], - vec![ - "kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r", - ], - ) - .as_string(); - let mut arab_phonotactic = Phonotactic::new("Melayu Klasik".into(), definition.clone()); - let (_rem, word) = arab_phonotactic.parse_syllables(&word).expect("Error"); - let w = word.with_postprocessing(&definition); - assert_eq!( - w.syllables, - vec![ - SyllableUnit::from((Some("m"), "u", Some("sy"))), - SyllableUnit::from((Some("t"), "a", None)), - SyllableUnit::from((Some("r"), "i", None)) - ] - ); - } - - #[test] - fn test_ghaib_melayu_klasik() { - let word = "ghaib".to_string(); - let definition = SyllableTags::new_ordered( - vec![ - "kh", "sy", "gh", "ny", "ng", "q", "f", "m", "n", "p", "t", "c", "k", "b", "d", - "j", "g", "s", "h", "l", "y", "w", "r", - ], - vec!["a", "e", "i", "o", "u"], - vec![ - "kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r", - ], - ) - .as_string(); - let mut arab_phonotactic = Phonotactic::new("Melayu Klasik".into(), definition.clone()); - let (_rem, word) = arab_phonotactic.parse_syllables(&word).expect("Error"); - let w = word.with_postprocessing(&definition); - assert_eq!( - w.syllables, - vec![ - SyllableUnit::from((Some("gh"), "a", None)), - SyllableUnit::from((None, "i", Some("b"))), - ] - ); - } -} From ed5734cac16b6de16faa95a491a93094b4f96f61 Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Wed, 24 Apr 2024 02:18:07 +0800 Subject: [PATCH 06/11] :art: Convert to test --- wasm-rs/onc/examples/website.rs | 1 + 1 file changed, 1 insertion(+) create mode 100644 wasm-rs/onc/examples/website.rs diff --git a/wasm-rs/onc/examples/website.rs b/wasm-rs/onc/examples/website.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/wasm-rs/onc/examples/website.rs @@ -0,0 +1 @@ + From a15f7b782405f6f4d5108c98546ce673cd7343cf Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:21:36 +0800 Subject: [PATCH 07/11] :pencil2: Typo --- static/Imbuhan.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/Imbuhan.toml b/static/Imbuhan.toml index 7688421..13980cc 100644 --- a/static/Imbuhan.toml +++ b/static/Imbuhan.toml @@ -5,7 +5,7 @@ awal = "me" [[imbuhan]] ganti.awal = { "" = "ng", "k" = "ng", "g" = "ngg", "h" = "ng", "q" = "ngq", "b" = "mb", "v" = "mv", "f" = "mf", "p" = "m", "c" = "nc", "d" = "nd", "j" = "nj", "sy" = "nsy", "t" = "n", "z" = "z", "s" = "ny" } -untuk = ["kata nama"] +untuk = ["kata kerja"] awal = "pe" [[imbuhan]] From e5565b452e8acd30982d8247e904d10a7cbe1d97 Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:23:18 +0800 Subject: [PATCH 08/11] =?UTF-8?q?=E2=99=BB=20Moving=20things=20to=20onc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wasm-rs/Cargo.lock | 7 ++ wasm-rs/Cargo.toml | 8 ++ wasm-rs/examples/simple.rs | 52 --------- wasm-rs/onc/examples/website.rs | 1 - wasm-rs/onc/src/affixes.rs | 184 ++++++++++++++++++++++++++++++++ wasm-rs/onc/src/lib.rs | 27 +++++ wasm-rs/src/imbuhan.rs | 174 ++---------------------------- wasm-rs/src/lib.rs | 3 - wasm-rs/src/phonology.rs | 2 + wasm-rs/src/phonotactics.rs | 109 ++++++------------- 10 files changed, 271 insertions(+), 296 deletions(-) delete mode 100644 wasm-rs/examples/simple.rs delete mode 100644 wasm-rs/onc/examples/website.rs create mode 100644 wasm-rs/onc/src/affixes.rs diff --git a/wasm-rs/Cargo.lock b/wasm-rs/Cargo.lock index b6f65e1..9f61bc7 100644 --- a/wasm-rs/Cargo.lock +++ b/wasm-rs/Cargo.lock @@ -60,6 +60,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "log" version = "0.4.21" @@ -272,6 +278,7 @@ name = "wasm-rs" version = "0.1.0" dependencies = [ "itertools", + "lazy_static", "onc", "serde", "serde-wasm-bindgen", diff --git a/wasm-rs/Cargo.toml b/wasm-rs/Cargo.toml index cebfd9e..ac4cad3 100644 --- a/wasm-rs/Cargo.toml +++ b/wasm-rs/Cargo.toml @@ -11,8 +11,16 @@ crate-type = ["lib", "cdylib"] [dependencies] itertools = "0.12.1" +lazy_static = { version = "1.4.0", optional = true } onc = { version = "0.1.0", path = "onc" } serde = { version = "1.0.197", features = ["derive"] } serde-wasm-bindgen = "0.6.5" toml = "0.8.11" wasm-bindgen = { version = "0.2.92", features = ["serde"] } + +[features] +lazystatic = ["dep:lazy_static"] + +[[example]] +name = "imbuhan" +required-features = ["lazystatic"] diff --git a/wasm-rs/examples/simple.rs b/wasm-rs/examples/simple.rs deleted file mode 100644 index 3b01de6..0000000 --- a/wasm-rs/examples/simple.rs +++ /dev/null @@ -1,52 +0,0 @@ -use wasm_rs::{ - functions::parse_default_tatabunyi_toml, - phonotactics::{ParseResultOptions, PhonotacticToml}, -}; - -const INPUT: [&'static str; 5] = ["susyi", "Penerangan", "English", "Ramadhan", "ramadan"]; -const CONFIG: &'static str = r#" -default = "Melayu Moden" - -[[phonotactic]] -name = "Melayu Lama" -definition.onset = [["ny", "ng", "sw", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", "w", "r"]] -definition.nucleus = [["a", "e", "i", "o", "u"]] -definition.coda = [["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] - -[[phonotactic]] -name = "Melayu Klasik" -definition.onset = [["kh", "sy", "gh", "ny", "ng", "dh", "q", "f", "sw", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", "w", "r"]] -definition.nucleus = [["a", "e", "i", "o", "u"]] -definition.coda = [["kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] - -[[phonotactic]] -name = "Melayu Moden" -definition.onset = [["sp", "spr", "sw", "sk", "skr", "st", "str", "kl", "fl", "bl", "pl", "pr", "kr", "gr", "tr", "dr", "kh", "sy", "gh", "ny", "ng", "v", "x", "q", "f", "y", "w", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "r"]] -definition.nucleus = [["a", "e", "i", "o", "u"]] -definition.coda = [["ks", "ns", "nk", "lf", "rt", "rd", "rt", "kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] -"#; - -fn main() { - for (n, &i) in INPUT.iter().enumerate() { - println!("\n[E.g. {}] ========", n + 1); - let input = String::from(i); - println!("Input\t\t: {}", &input); - - let mut phonotactic = parse_default_tatabunyi_toml(CONFIG.to_string()); - - let result = phonotactic.parse_string(input, ParseResultOptions::new(Some("/".into()))); - - println!( - "{}", - match result.error() { - true => format!( - "Err parse\t: {}{}", - result.head().unwrap(), - result.mid().unwrap(), - result.tail().unwrap() - ), - false => format!("Ok parse\t: {}", result.full().unwrap()), - } - ) - } -} diff --git a/wasm-rs/onc/examples/website.rs b/wasm-rs/onc/examples/website.rs deleted file mode 100644 index 8b13789..0000000 --- a/wasm-rs/onc/examples/website.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/wasm-rs/onc/src/affixes.rs b/wasm-rs/onc/src/affixes.rs new file mode 100644 index 0000000..dadcb9a --- /dev/null +++ b/wasm-rs/onc/src/affixes.rs @@ -0,0 +1,184 @@ +use crate::phonotactics::PhonotacticRule; +use serde::{Deserialize, Serialize}; +use std::collections::{HashMap, HashSet}; +use std::hash::Hash; + +#[derive(Serialize, Deserialize, Debug)] +pub struct ReplacementRule { + #[serde(rename = "awal")] + prefix: Option>, + #[serde(rename = "akhir")] + postfix: Option>, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct AffixRule +where + POS: Hash + Eq, +{ + #[serde(rename = "untuk")] + apply_to: HashSet, + #[serde(rename = "awal")] + prefix: Option, + #[serde(rename = "akhir")] + postfix: Option, + #[serde(rename = "ganti")] + replace: Option, +} + +impl AffixRule +where + POS: Hash + Eq, +{ + pub fn new() -> Self { + Self { + apply_to: HashSet::new(), + prefix: None, + postfix: None, + replace: None, + } + } + + pub fn pos(&self) -> HashSet<&POS> { + HashSet::<&POS>::from_iter(self.apply_to.iter()) + } + + pub fn transform_string_with(&self, input: &str, phonotactic: &PhonotacticRule) -> String { + let text = input.to_string(); + let head = match &self.prefix { + Some(awal) => match &self.replace { + Some(ReplacementRule { + prefix: Some(a), .. + }) => { + if let Ok((_rest, phrase)) = phonotactic.parse_syllables(&text) { + if let Some(first) = phrase.syllables.first() { + let mut offset = 0; + let default = &"".to_string(); + let inbetween = if let Some(onset) = first.onset { + offset = onset.len(); + match a.get(onset) { + Some(v) => v.clone(), + None => String::from(onset), + } + } else { + a.get("").unwrap_or(default).clone() + }; + let tail = text[offset..].to_string(); + format!("{}{}{}", awal.clone(), inbetween, tail) + } else { + String::new() + } + } else { + String::new() + } + } + &Some(ReplacementRule { prefix: None, .. }) | None => { + format!("{}{}", awal.clone(), text) + } + }, + None => format!("{}", text), + }; + match &self.postfix { + Some(akhir) => match &self.replace { + Some(ReplacementRule { + postfix: Some(a), .. + }) => { + if let Ok((_rest, phrase)) = phonotactic.parse_syllables(&text) { + if let Some(first) = phrase.syllables.first() { + let mut offset = 0; + let default = &"".to_string(); + let inbetween = if let Some(onset) = first.onset { + offset = onset.len(); + match a.get(onset) { + Some(v) => v.clone(), + None => String::from(onset), + } + } else { + a.get("").unwrap_or(default).clone() + }; + let tail = text[offset..].to_string(); + format!("{}{}{}", tail, inbetween, akhir.clone()) + } else { + String::new() + } + } else { + String::new() + } + } + &Some(ReplacementRule { postfix: None, .. }) | None => { + format!("{}{}", head, akhir.clone()) + } + }, + None => head, + } + .to_lowercase() + } +} +#[cfg(test)] +mod test { + use super::*; + use crate::phonotactics::tags::SyllableTags; + + #[derive(Hash, PartialEq, Eq)] + enum ExamplePOS { + Verb, + } + + #[test] + fn test_imbuhan_awal_mengawal() { + let word = "bintang".to_string(); + let definition = SyllableTags::new_ordered( + vec![ + "ny", "ng", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", + "w", "r", + ], + vec!["a", "e", "i", "o", "u"], + vec!["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"], + ) + .as_string(); + let impbuhan = AffixRule { + apply_to: HashSet::from([ExamplePOS::Verb]), + prefix: Some("me".into()), + postfix: None, + replace: Some(ReplacementRule { + prefix: Some(HashMap::from_iter(vec![("b".into(), "mb".into())])), + postfix: None, + }), + }; + let malay_phonotactic = PhonotacticRule::with_definitions(definition.clone()); + + assert_eq!( + impbuhan.transform_string_with(&word, &malay_phonotactic), + String::from("membintang") + ) + } + + #[test] + fn test_imbuhan_awal_mengambil() { + let word = "ambil".to_string(); + let definition = SyllableTags::new_ordered( + vec![ + "ny", "ng", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", + "w", "r", + ], + vec!["a", "e", "i", "o", "u"], + vec!["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"], + ) + .as_string(); + let impbuhan = AffixRule { + apply_to: HashSet::from([ExamplePOS::Verb]), + prefix: Some("me".into()), + postfix: None, + replace: Some(ReplacementRule { + prefix: Some(HashMap::from_iter(vec![("".into(), "ng".into())])), + postfix: None, + }), + }; + let malay_phonotactic = PhonotacticRule::with_definitions(definition.clone()); + + assert_eq!( + impbuhan.transform_string_with(&word, &malay_phonotactic), + String::from("mengambil") + ) + } +} diff --git a/wasm-rs/onc/src/lib.rs b/wasm-rs/onc/src/lib.rs index c0bfc40..e1d055b 100644 --- a/wasm-rs/onc/src/lib.rs +++ b/wasm-rs/onc/src/lib.rs @@ -1,3 +1,30 @@ pub mod phonotactics; pub use nom::IResult; + +pub mod affixes; + +use crate::phonotactics::Phrase; + +pub struct ParseResult<'a> { + pub full: bool, + pub rest: String, + pub phrase: Phrase<&'a str>, +} + +impl<'a> From>> for ParseResult<'a> { + fn from(value: IResult<&'a str, Phrase<&'a str>>) -> Self { + match value { + Ok((rest, phrase)) => Self { + full: rest.is_empty(), + rest: String::from(rest), + phrase, + }, + Err(_e) => Self { + full: false, + rest: "".into(), + phrase: Phrase { syllables: vec![] }, + }, + } + } +} diff --git a/wasm-rs/src/imbuhan.rs b/wasm-rs/src/imbuhan.rs index 5fe2cac..ee9ebdb 100644 --- a/wasm-rs/src/imbuhan.rs +++ b/wasm-rs/src/imbuhan.rs @@ -1,12 +1,10 @@ -use std::collections::HashMap; - +use crate::phonotactics::Phonotactic; +use onc::affixes::AffixRule; use serde::{Deserialize, Serialize}; use wasm_bindgen::prelude::wasm_bindgen; -use crate::phonotactics::Phonotactic; - #[wasm_bindgen] -#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[derive(Serialize, Deserialize, Debug, PartialEq, Hash, Eq)] pub enum JenisKata { #[serde(rename = "kata nama")] Nama, @@ -16,107 +14,24 @@ pub enum JenisKata { Sifat, } -#[derive(Serialize, Deserialize, Debug)] -pub struct Ganti { - awal: Option>, - akhir: Option>, -} - #[wasm_bindgen] #[derive(Serialize, Deserialize, Debug)] -pub struct Imbuhan { - untuk: Vec, - awal: Option, - akhir: Option, - #[serde(default)] - ganti: Option, -} +#[serde(transparent)] +pub struct Imbuhan(AffixRule); #[wasm_bindgen] impl Imbuhan { pub fn new() -> Self { - Self { - untuk: vec![], - awal: None, - akhir: None, - ganti: None, - } + Self(AffixRule::::new()) } - #[wasm_bindgen] pub fn contains(&self, kata_nama: bool, kata_kerja: bool, kata_sifat: bool) -> bool { - (kata_nama && self.untuk.contains(&JenisKata::Nama)) - | (kata_kerja && self.untuk.contains(&JenisKata::Kerja)) - | (kata_sifat && self.untuk.contains(&JenisKata::Sifat)) + (kata_nama && self.0.pos().contains(&JenisKata::Nama)) + | (kata_kerja && self.0.pos().contains(&JenisKata::Kerja)) + | (kata_sifat && self.0.pos().contains(&JenisKata::Sifat)) } - pub fn transform_string_with(&self, input: &str, phonotactic: &Phonotactic) -> String { - let text = input.to_string(); - let head = match &self.awal { - Some(awal) => match &self.ganti { - Some(Ganti { - awal: Some(a), - akhir: _, - }) => { - if let Ok((_rest, phrase)) = phonotactic.as_rule().parse_syllables(&text) { - if let Some(first) = phrase.syllables.first() { - let mut offset = 0; - let default = &"".to_string(); - let inbetween = if let Some(onset) = first.onset { - offset = onset.len(); - match a.get(onset) { - Some(v) => v.clone(), - None => String::from(onset), - } - } else { - a.get("").unwrap_or(default).clone() - }; - let tail = text[offset..].to_string(); - format!("{}{}{}", awal.clone(), inbetween, tail) - } else { - String::new() - } - } else { - String::new() - } - } - &Some(Ganti { awal: None, .. }) | None => format!("{}{}", awal.clone(), text), - }, - None => format!("{}", text), - }; - match &self.akhir { - Some(akhir) => match &self.ganti { - Some(Ganti { - akhir: Some(a), - awal: _, - }) => { - if let Ok((_rest, phrase)) = phonotactic.as_rule().parse_syllables(&text) { - if let Some(first) = phrase.syllables.first() { - let mut offset = 0; - let default = &"".to_string(); - let inbetween = if let Some(onset) = first.onset { - offset = onset.len(); - match a.get(onset) { - Some(v) => v.clone(), - None => String::from(onset), - } - } else { - a.get("").unwrap_or(default).clone() - }; - let tail = text[offset..].to_string(); - format!("{}{}{}", tail, inbetween, akhir.clone()) - } else { - String::new() - } - } else { - String::new() - } - } - &Some(Ganti { akhir: None, .. }) | None => format!("{}{}", head, akhir.clone()), - }, - None => head, - } - .to_lowercase() + self.0.transform_string_with(input, &phonotactic.as_rule()) } } @@ -132,72 +47,3 @@ impl ImbuhanToml { Ok(d.imbuhan) } } - -#[cfg(test)] -mod test { - use std::collections::HashMap; - - use crate::{ - imbuhan::{Ganti, Imbuhan, JenisKata}, - phonotactics::{tags::SyllableTags, Phonotactic}, - }; - #[test] - fn test_imbuhan_awal_mengawal() { - let word = "bintang".to_string(); - let definition = SyllableTags::new_ordered( - vec![ - "ny", "ng", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", - "w", "r", - ], - vec!["a", "e", "i", "o", "u"], - vec!["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"], - ) - .as_string(); - let impbuhan = Imbuhan { - untuk: vec![JenisKata::Kerja], - awal: Some("me".into()), - akhir: None, - ganti: Some(Ganti { - awal: Some(HashMap::from_iter(vec![("b".into(), "mb".into())])), - akhir: None, - }), - }; - let malay_phonotactic = Phonotactic::new("Melayu Klasik".into(), definition.clone()); - - let (_rest, phrase) = malay_phonotactic.parse_syllables(&word).expect("Error"); - assert_eq!( - impbuhan.transform_string_with(&phrase.as_contiguous(), &malay_phonotactic), - String::from("membintang") - ) - } - - #[test] - fn test_imbuhan_awal_mengambil() { - let word = "ambil".to_string(); - let definition = SyllableTags::new_ordered( - vec![ - "ny", "ng", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", - "w", "r", - ], - vec!["a", "e", "i", "o", "u"], - vec!["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"], - ) - .as_string(); - let impbuhan = Imbuhan { - untuk: vec![JenisKata::Kerja], - awal: Some("me".into()), - akhir: None, - ganti: Some(Ganti { - awal: Some(HashMap::from_iter(vec![("".into(), "ng".into())])), - akhir: None, - }), - }; - let malay_phonotactic = Phonotactic::new("Melayu Klasik".into(), definition.clone()); - - let (_rest, phrase) = malay_phonotactic.parse_syllables(&word).expect("Error"); - assert_eq!( - impbuhan.transform_string_with(&word, &malay_phonotactic), - String::from("mengambil") - ) - } -} diff --git a/wasm-rs/src/lib.rs b/wasm-rs/src/lib.rs index 9095dcf..2a236f5 100644 --- a/wasm-rs/src/lib.rs +++ b/wasm-rs/src/lib.rs @@ -2,6 +2,3 @@ pub mod functions; pub mod imbuhan; pub mod phonology; pub mod phonotactics; - -// TODO: Pindahkan Imbuhan ke onc -// TODO: Pindahkan Phonology ke onc diff --git a/wasm-rs/src/phonology.rs b/wasm-rs/src/phonology.rs index 55d59ac..2fec66d 100644 --- a/wasm-rs/src/phonology.rs +++ b/wasm-rs/src/phonology.rs @@ -4,6 +4,8 @@ use serde::{Deserialize, Serialize}; use toml; use wasm_bindgen::prelude::*; +// COMMENT: Perhaps no need to move this to onc crate because all these are just representations of data with no attached logic. + #[wasm_bindgen] #[derive(Deserialize)] pub struct Bunyian { diff --git a/wasm-rs/src/phonotactics.rs b/wasm-rs/src/phonotactics.rs index 4598386..4d7502a 100644 --- a/wasm-rs/src/phonotactics.rs +++ b/wasm-rs/src/phonotactics.rs @@ -1,12 +1,10 @@ use onc::phonotactics::tags::SyllableTags; -use onc::phonotactics::{PhonotacticRule, Phrase}; -use onc::IResult; +use onc::phonotactics::PhonotacticRule; +use onc::ParseResult as InnerParseResult; use serde::Deserialize; use wasm_bindgen::prelude::*; -use crate::functions::alert; - #[wasm_bindgen] #[derive(Deserialize, Clone, Default)] pub struct Phonotactic { @@ -14,29 +12,18 @@ pub struct Phonotactic { definition: SyllableTags, } -// impl From for PhonotacticRule { -// fn from(value: Phonotactic) -> Self { -// PhonotacticRule::with_definitions(value.definition) -// } -// } - +#[wasm_bindgen] impl Phonotactic { - pub fn as_rule(&self) -> PhonotacticRule { + pub(crate) fn as_rule(&self) -> PhonotacticRule { PhonotacticRule::with_definitions(self.definition.clone()) } - pub fn definitions(&self) -> SyllableTags { - self.definition.clone() - } -} - -#[wasm_bindgen] -impl Phonotactic { + #[wasm_bindgen] pub fn parse_string(&mut self, input: String, options: ParseResultOptions) -> ParseResults { let text = input.to_lowercase(); let rule = self.as_rule(); let s = rule.parse_syllables(&text); - InnerParseResult::from(s).render(options) + ParseResults::from_inner(&InnerParseResult::from(s), options) } #[wasm_bindgen(getter)] @@ -109,12 +96,6 @@ impl From> for SyllableTagsJson { } } -struct InnerParseResult<'a> { - full: bool, - rest: String, - phrase: Phrase<&'a str>, -} - // TODO: Proper Result #[wasm_bindgen] pub struct ParseResults { @@ -157,49 +138,24 @@ impl ParseResults { options, full: None, partial: None, - // details: "".into(), } } - - pub fn with_full(&mut self, input: String) { - self.full = Some(input); - } - - pub fn with_partial(&mut self, head: String, mid: String, tail: String) { - self.partial = Some((head, mid, tail)); - } -} - -#[wasm_bindgen] -pub struct ParseResultOptions { - separator: Option, -} - -#[wasm_bindgen] -impl ParseResultOptions { - #[wasm_bindgen(constructor)] - pub fn new(separator: Option) -> Self { - Self { separator } - } -} - -impl<'a> InnerParseResult<'a> { - pub fn render(&self, options: ParseResultOptions) -> ParseResults { + pub(crate) fn from_inner(inner: &InnerParseResult, options: ParseResultOptions) -> Self { let mut res = ParseResults::new(options); - if self.full { - res.with_full(self.phrase.as_separated(&res.options.separator)); + if inner.full { + res.with_full(inner.phrase.as_separated(&res.options.separator)); } else { - let mid_tail = if &self.rest.len() > &1 { - &self.rest[0..2] + let mid_tail = if &inner.rest.len() > &1 { + &inner.rest[0..2] } else { - self.rest.as_str() + inner.rest.as_str() }; - let tail_rest = if &self.rest.len() > &1 { - self.rest[2..self.rest.len()].to_string() + let tail_rest = if &inner.rest.len() > &1 { + inner.rest[2..inner.rest.len()].to_string() } else { "".into() }; - let head = self.phrase.as_contiguous(); + let head = inner.phrase.as_contiguous(); let mid = format!( "{head}{tail}", head = head.chars().last().unwrap_or(' '), @@ -213,24 +169,25 @@ impl<'a> InnerParseResult<'a> { } res } + + pub fn with_full(&mut self, input: String) { + self.full = Some(input); + } + + pub fn with_partial(&mut self, head: String, mid: String, tail: String) { + self.partial = Some((head, mid, tail)); + } } -impl<'a> From>> for InnerParseResult<'a> { - fn from(value: IResult<&'a str, Phrase<&'a str>>) -> Self { - match value { - Ok((rest, phrase)) => Self { - full: rest.is_empty(), - rest: String::from(rest), - phrase: phrase, - }, - Err(e) => { - alert(&format!("{}", e)); - Self { - full: false, - rest: "".into(), - phrase: Phrase { syllables: vec![] }, - } - } - } +#[wasm_bindgen] +pub struct ParseResultOptions { + separator: Option, +} + +#[wasm_bindgen] +impl ParseResultOptions { + #[wasm_bindgen(constructor)] + pub fn new(separator: Option) -> Self { + Self { separator } } } From db341b7622b671082840731b4505c3009a58cd6b Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:23:29 +0800 Subject: [PATCH 09/11] =?UTF-8?q?=F0=9F=8E=A8=20Add=20examples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wasm-rs/examples/imbuhan.rs | 80 +++++++++++++++++++++++++++++++++ wasm-rs/examples/phonotactic.rs | 58 ++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 wasm-rs/examples/imbuhan.rs create mode 100644 wasm-rs/examples/phonotactic.rs diff --git a/wasm-rs/examples/imbuhan.rs b/wasm-rs/examples/imbuhan.rs new file mode 100644 index 0000000..c542c66 --- /dev/null +++ b/wasm-rs/examples/imbuhan.rs @@ -0,0 +1,80 @@ +use std::collections::HashSet; + +use wasm_rs::functions::{parse_default_tatabunyi_toml, parse_imbuhan_toml}; +use wasm_rs::imbuhan::JenisKata; + +lazy_static::lazy_static! { +static ref INPUTS: [(&'static str, HashSet); 3] = [ + ("susyi", HashSet::from([JenisKata::Nama])), + ("terang", HashSet::from([JenisKata::Nama, JenisKata::Kerja])), + ("merah", HashSet::from([JenisKata::Sifat])) + ]; +} +const IMBUHAN_CONFIG: &'static str = r#" +[[imbuhan]] +ganti.awal = { "" = "ng", "k" = "ng", "g" = "ngg", "h" = "ng", "q" = "ngq", "b" = "mb", "v" = "mv", "f" = "mf", "p" = "m", "c" = "nc", "d" = "nd", "j" = "nj", "sy" = "nsy", "t" = "n", "z" = "z", "s" = "ny" } +untuk = ["kata kerja"] +awal = "me" + +[[imbuhan]] +untuk = ["kata nama"] +awal = "per" + +[[imbuhan]] +untuk = ["kata nama", "kata kerja"] +awal = "juru" + +[[imbuhan]] +untuk = ["kata kerja", "kata sifat"] +awal = "ter" + +[[imbuhan]] +untuk = ["kata sifat"] +awal = "se" + +[[imbuhan]] +ganti.awal = { "" = "ng", "k" = "ng", "g" = "ngg", "h" = "ng", "q" = "ngq", "b" = "mb", "v" = "mv", "f" = "mf", "p" = "m", "c" = "nc", "d" = "nd", "j" = "nj", "sy" = "nsy", "t" = "n", "z" = "z", "s" = "ny" } +untuk = ["kata kerja"] +awal = "me" +akhir = "kan" + +[[imbuhan]] +untuk = ["kata nama", "kata kerja", "kata sifat"] +awal = "ke" +akhir = "an" +"#; +const PHONOTACTIC_CONFIG: &'static str = r#" +default = "Melayu Moden" + +[[phonotactic]] +name = "Melayu Moden" +definition.onset = [["sp", "spr", "sw", "sk", "skr", "st", "str", "kl", "fl", "bl", "pl", "pr", "kr", "gr", "tr", "dr", "kh", "sy", "gh", "ny", "ng", "v", "x", "q", "f", "y", "w", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "r"]] +definition.nucleus = [["a", "e", "i", "o", "u"]] +definition.coda = [["ks", "ns", "nk", "lf", "rt", "rd", "rt", "kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] +"#; + +fn main() { + println!("<< Example: Imbuhan >>"); + let imbuhans = parse_imbuhan_toml(IMBUHAN_CONFIG.to_string()); + let phonotactic = parse_default_tatabunyi_toml(PHONOTACTIC_CONFIG.to_string()); + + for (n, (input, set)) in INPUTS.iter().enumerate() { + println!("\n[E.g. {}] ========", n + 1); + let input = String::from(*input); + println!("Input\t\t: {}", &input); + println!("Golongan Kata\t: {:?}", &set); + + let mut items = Vec::::new(); + + for imbuhan in &imbuhans { + if imbuhan.contains( + set.contains(&JenisKata::Nama), + set.contains(&JenisKata::Kerja), + set.contains(&JenisKata::Sifat), + ) { + items.push(imbuhan.transform_string_with(&input, &phonotactic)); + } + } + println!("Results\t\t: {}", items.join(", ")) + } +} diff --git a/wasm-rs/examples/phonotactic.rs b/wasm-rs/examples/phonotactic.rs new file mode 100644 index 0000000..1692169 --- /dev/null +++ b/wasm-rs/examples/phonotactic.rs @@ -0,0 +1,58 @@ +use wasm_rs::functions::parse_default_tatabunyi_toml; +use wasm_rs::phonotactics::ParseResultOptions; + +const INPUTS: [&'static str; 6] = [ + "susyi", + "Penerangan", + "English", + "Ramadhan", + "ramadan", + "kupu-kupu", +]; +const CONFIG: &'static str = r#" +default = "Melayu Moden" + +[[phonotactic]] +name = "Melayu Lama" +definition.onset = [["ny", "ng", "sw", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", "w", "r"]] +definition.nucleus = [["a", "e", "i", "o", "u"]] +definition.coda = [["ng", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] + +[[phonotactic]] +name = "Melayu Klasik" +definition.onset = [["kh", "sy", "gh", "ny", "ng", "dh", "q", "f", "sw", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "y", "w", "r"]] +definition.nucleus = [["a", "e", "i", "o", "u"]] +definition.coda = [["kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] + +[[phonotactic]] +name = "Melayu Moden" +definition.onset = [["sp", "spr", "sw", "sk", "skr", "st", "str", "kl", "fl", "bl", "pl", "pr", "kr", "gr", "tr", "dr", "kh", "sy", "gh", "ny", "ng", "v", "x", "q", "f", "y", "w", "m", "n", "p", "t", "c", "k", "b", "d", "j", "g", "s", "h", "l", "r"]] +definition.nucleus = [["a", "e", "i", "o", "u"]] +definition.coda = [["ks", "ns", "nk", "lf", "rt", "rd", "rt", "kh", "sy", "gh", "ng", "q", "f", "b", "m", "n", "p", "t", "k", "s", "h", "l", "r"]] +"#; + +fn main() { + println!("<< Example: Phonotactic>>"); + let mut phonotactic = parse_default_tatabunyi_toml(CONFIG.to_string()); + + for (n, &input) in INPUTS.iter().enumerate() { + println!("\n[E.g. {}] ========", n + 1); + let input = String::from(input); + println!("Input\t\t: {}", &input); + + let result = phonotactic.parse_string(input, ParseResultOptions::new(Some("/".into()))); + + println!( + "{}", + match result.error() { + true => format!( + "Err parse\t: {}{}", + result.head().unwrap(), + result.mid().unwrap(), + result.tail().unwrap() + ), + false => format!("Ok parse\t: {}", result.full().unwrap()), + } + ) + } +} From 75a78dbaee30e09b616985a7f692b651ce1731ff Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:45:45 +0800 Subject: [PATCH 10/11] =?UTF-8?q?=F0=9F=92=9A=20Test=20rust?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 17 +++++++++++++++++ package.json | 5 +++-- 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..24c7238 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,17 @@ +on: [push, pull_request] + +jobs: + test-rust: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Install Wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + + - name: Set Node.js 20.x + uses: actions/setup-node@v4 + with: + node-version: '20' + + - run: yarn test \ No newline at end of file diff --git a/package.json b/package.json index 4a9b1ec..b5a3bea 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,8 @@ "lint": "prettier --check .", "format": "prettier --write .", "modulize-wasm": "node ./wasm-rs/modulize.js", - "wasm": "wasm-pack build ./wasm-rs --target web && yarn run modulize-wasm" + "wasm": "wasm-pack build ./wasm-rs --target web && yarn run modulize-wasm", + "test": "cargo test --manifest-path ./wasm-rs/Cargo.toml --workspace" }, "devDependencies": { "@sveltejs/adapter-auto": "^3.0.0", @@ -42,4 +43,4 @@ "tailwind-variants": "^0.2.1", "vite-plugin-wasm-pack": "^0.1.12" } -} +} \ No newline at end of file From 10d5475a5a5c15d2c5fe6000f53f5823e3cc1a28 Mon Sep 17 00:00:00 2001 From: Thaza_Kun <61819672+Thaza-Kun@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:47:07 +0800 Subject: [PATCH 11/11] =?UTF-8?q?=F0=9F=9A=80=20Bump=20v0.2.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wasm-rs/Cargo.lock | 2 +- wasm-rs/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wasm-rs/Cargo.lock b/wasm-rs/Cargo.lock index 9f61bc7..f9a0e3a 100644 --- a/wasm-rs/Cargo.lock +++ b/wasm-rs/Cargo.lock @@ -275,7 +275,7 @@ checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" [[package]] name = "wasm-rs" -version = "0.1.0" +version = "0.2.0" dependencies = [ "itertools", "lazy_static", diff --git a/wasm-rs/Cargo.toml b/wasm-rs/Cargo.toml index ac4cad3..61241cf 100644 --- a/wasm-rs/Cargo.toml +++ b/wasm-rs/Cargo.toml @@ -2,7 +2,7 @@ workspace = { members = ["onc"] } [package] name = "wasm-rs" -version = "0.1.0" +version = "0.2.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html