diff --git a/Cargo.toml b/Cargo.toml index f264d93..ae49871 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,4 @@ read-fonts = "0" skrifa = "0" itertools = "0.13.0" google-fonts-languages = "*" +toml = "0.8.19" diff --git a/shaperglot-cli/Cargo.toml b/shaperglot-cli/Cargo.toml index 5b05a8e..b46c974 100644 --- a/shaperglot-cli/Cargo.toml +++ b/shaperglot-cli/Cargo.toml @@ -14,4 +14,5 @@ skrifa = { workspace = true } read-fonts = { workspace = true } itertools = { workspace = true } clap = { version = "4.5.21", features = ["derive"] } -serde_json = "1.0.70" \ No newline at end of file +serde_json = "1.0.70" +toml = {workspace = true } \ No newline at end of file diff --git a/shaperglot-cli/src/describe.rs b/shaperglot-cli/src/describe.rs new file mode 100644 index 0000000..fe29d52 --- /dev/null +++ b/shaperglot-cli/src/describe.rs @@ -0,0 +1,26 @@ +use clap::Args; + +#[derive(Args)] +pub struct DescribeArgs { + /// Output check definition as TOML + #[arg(long)] + json: bool, + /// Language name or ID to describe + language: String, +} + +pub fn describe_command(args: &DescribeArgs, language_database: shaperglot::Languages) { + if let Some(language) = language_database.get_language(&args.language) { + if args.json { + let json = serde_json::to_string_pretty(&language.checks).unwrap(); + println!("{}", json); + // } + } else { + for check in language.checks.iter() { + println!("{}", check.description); + } + } + } else { + println!("Language not found"); + } +} diff --git a/shaperglot-cli/src/main.rs b/shaperglot-cli/src/main.rs index 3127578..74f94b7 100644 --- a/shaperglot-cli/src/main.rs +++ b/shaperglot-cli/src/main.rs @@ -1,8 +1,10 @@ use check::{check_command, CheckArgs}; use clap::{Parser, Subcommand}; +use describe::{describe_command, DescribeArgs}; use report::{report_command, ReportArgs}; mod check; +mod describe; mod report; #[derive(Parser)] @@ -19,6 +21,8 @@ enum Commands { Check(CheckArgs), /// Report language support Report(ReportArgs), + /// Describe what is needed to support a language + Describe(DescribeArgs), } fn main() { @@ -32,5 +36,8 @@ fn main() { Commands::Report(args) => { report_command(args, language_database); } + Commands::Describe(args) => { + describe_command(args, language_database); + } } } diff --git a/shaperglot-cli/src/report.rs b/shaperglot-cli/src/report.rs index 3f16908..9947870 100644 --- a/shaperglot-cli/src/report.rs +++ b/shaperglot-cli/src/report.rs @@ -27,7 +27,12 @@ pub struct ReportArgs { } pub fn report_command(args: &ReportArgs, language_database: shaperglot::Languages) { - let font_binary = std::fs::read(args.font.as_path()).expect("Failed to read font file"); + let font_binary = std::fs::read(args.font.as_path()) + .map_err(|e| { + eprintln!("Failed to read font file {}: {}", args.font.display(), e); + std::process::exit(1); + }) + .unwrap(); let checker = Checker::new(&font_binary).expect("Failed to load font"); for language in language_database.iter() { if let Some(filter) = &args.filter { diff --git a/shaperglot-lib/Cargo.toml b/shaperglot-lib/Cargo.toml index 60ad3fd..7e06698 100644 --- a/shaperglot-lib/Cargo.toml +++ b/shaperglot-lib/Cargo.toml @@ -23,6 +23,6 @@ unicode-properties = "0.1.3" unicode-joining-type = "1.0.0" indexmap = "2" log = "0.4" -toml = "0.8.19" +toml = { workspace = true } serde = "1" ambassador = "0.4.1" \ No newline at end of file diff --git a/shaperglot-lib/manual_checks.toml b/shaperglot-lib/manual_checks.toml new file mode 100644 index 0000000..f7d3806 --- /dev/null +++ b/shaperglot-lib/manual_checks.toml @@ -0,0 +1,14 @@ +[[tr_Latn]] +name = "Small caps i should be dotted" +severity = "Warn" +description = "When the letter 'i' is in small caps, it should be dotted" +scoring_strategy = "Continuous" +weight = 10 + +[[tr_Latn.implementations]] +type = "ShapingDiffers" +features_optional = true +pairs = [[ + { text = "i", features = ["smcp"] }, + { text = "i", features = ["smcp"], language = "tr" }, +]] \ No newline at end of file diff --git a/shaperglot-lib/src/checks/codepoint_coverage.rs b/shaperglot-lib/src/checks/codepoint_coverage.rs index 83a77e9..bd83434 100644 --- a/shaperglot-lib/src/checks/codepoint_coverage.rs +++ b/shaperglot-lib/src/checks/codepoint_coverage.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use serde_json::json; use std::collections::HashSet; -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct CodepointCoverage { strings: HashSet, code: String, diff --git a/shaperglot-lib/src/checks/mod.rs b/shaperglot-lib/src/checks/mod.rs index cb74536..f83384f 100644 --- a/shaperglot-lib/src/checks/mod.rs +++ b/shaperglot-lib/src/checks/mod.rs @@ -21,13 +21,13 @@ pub trait CheckImplementation { fn execute(&self, checker: &Checker) -> (Vec, usize); } -#[derive(Serialize, Deserialize, PartialEq, Debug)] +#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] pub enum ScoringStrategy { Continuous, AllOrNothing, } -#[derive(Delegate, Serialize, Deserialize, Debug)] +#[derive(Delegate, Serialize, Deserialize, Debug, Clone)] #[delegate(CheckImplementation)] #[serde(tag = "type")] pub enum CheckType { @@ -36,7 +36,7 @@ pub enum CheckType { ShapingDiffers(ShapingDiffers), } -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct Check { pub name: String, pub severity: ResultCode, diff --git a/shaperglot-lib/src/checks/no_orphaned_marks.rs b/shaperglot-lib/src/checks/no_orphaned_marks.rs index c441b61..4b3a853 100644 --- a/shaperglot-lib/src/checks/no_orphaned_marks.rs +++ b/shaperglot-lib/src/checks/no_orphaned_marks.rs @@ -8,7 +8,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use unicode_properties::{GeneralCategory, UnicodeGeneralCategory}; -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct NoOrphanedMarks { test_strings: Vec, has_orthography: bool, diff --git a/shaperglot-lib/src/checks/shaping_differs.rs b/shaperglot-lib/src/checks/shaping_differs.rs index 441ffd1..cb427ce 100644 --- a/shaperglot-lib/src/checks/shaping_differs.rs +++ b/shaperglot-lib/src/checks/shaping_differs.rs @@ -10,7 +10,7 @@ use itertools::Itertools; use rustybuzz::SerializeFlags; use serde::{Deserialize, Serialize}; -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct ShapingDiffers { pairs: Vec<(ShapingInput, ShapingInput)>, features_optional: bool, diff --git a/shaperglot-lib/src/lib.rs b/shaperglot-lib/src/lib.rs index d9fa574..16ad237 100644 --- a/shaperglot-lib/src/lib.rs +++ b/shaperglot-lib/src/lib.rs @@ -1,3 +1,5 @@ +// #![deny(missing_docs)] +// #![deny(clippy::missing_docs_in_private_items)] mod checker; mod checks; mod font; diff --git a/shaperglot-lib/src/providers/mod.rs b/shaperglot-lib/src/providers/mod.rs index 5e2d5a3..275fbfe 100644 --- a/shaperglot-lib/src/providers/mod.rs +++ b/shaperglot-lib/src/providers/mod.rs @@ -3,9 +3,12 @@ use crate::{checks::Check, language::Language}; mod orthographies; mod positional; mod small_caps; +mod toml; + use orthographies::OrthographiesProvider; use positional::PositionalProvider; use small_caps::SmallCapsProvider; +use toml::TomlProvider; pub trait Provider { fn checks_for(&self, language: &Language) -> Vec; @@ -20,9 +23,7 @@ impl Provider for BaseCheckProvider { checks.extend(OrthographiesProvider.checks_for(language)); checks.extend(SmallCapsProvider.checks_for(language)); checks.extend(PositionalProvider.checks_for(language)); - - // And any manually coded checks - + checks.extend(TomlProvider.checks_for(language)); checks } } diff --git a/shaperglot-lib/src/providers/toml.rs b/shaperglot-lib/src/providers/toml.rs new file mode 100644 index 0000000..db105e3 --- /dev/null +++ b/shaperglot-lib/src/providers/toml.rs @@ -0,0 +1,21 @@ +use std::collections::HashMap; + +use crate::{checks::Check, language::Language, Provider}; + +const TOML_PROFILE: &str = include_str!("../../manual_checks.toml"); + +use std::sync::LazyLock; + +static MANUAL_CHECKS: LazyLock>> = + LazyLock::new(|| toml::from_str(TOML_PROFILE).expect("Could not parse manual checks file: ")); + +pub struct TomlProvider; + +impl Provider for TomlProvider { + fn checks_for(&self, language: &Language) -> Vec { + MANUAL_CHECKS + .get(language.id()) + .cloned() + .unwrap_or_default() + } +} diff --git a/shaperglot-lib/src/shaping.rs b/shaperglot-lib/src/shaping.rs index 13ec0b1..323b115 100644 --- a/shaperglot-lib/src/shaping.rs +++ b/shaperglot-lib/src/shaping.rs @@ -8,10 +8,12 @@ use serde::{Deserialize, Serialize}; use crate::Checker; -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct ShapingInput { pub text: String, + #[serde(skip_serializing_if = "Vec::is_empty")] pub features: Vec, + #[serde(skip_serializing_if = "Option::is_none")] pub language: Option, }