diff --git a/font-test-data/src/lib.rs b/font-test-data/src/lib.rs index 0b6242363..b825beab9 100644 --- a/font-test-data/src/lib.rs +++ b/font-test-data/src/lib.rs @@ -236,3 +236,25 @@ pub mod colrv1_json { pub mod ttc { pub static TTC: &[u8] = include_bytes!("../test_data/ttc/TTC.ttc"); } + +pub mod meta { + // the table from the binary for 'Sankofa' + #[rustfmt::skip] + pub static SIMPLE_META_TABLE: &[u8] = &[ + 0x00, 0x00, 0x00, 0x01, // version 1 + 0x00, 0x00, 0x00, 0x00, // flags 0 + 0x00, 0x00, 0x00, 0x28, // reserved (?) + 0x00, 0x00, 0x00, 0x02, // data_maps_count 2 + 0x64, 0x6c, 0x6e, 0x67, // tag: dlng + 0x00, 0x00, 0x00, 0x28, // data offset + 0x00, 0x00, 0x00, 0x0d, // data length + 0x73, 0x6c, 0x6e, 0x67, // tag: slng + 0x00, 0x00, 0x00, 0x35, // data offset + 0x00, 0x00, 0x00, 0x04, // length + 0x65, 0x6e, 0x2d, 0x6c, + 0x61, 0x74, 0x6e, 0x2c, + 0x20, 0x6c, 0x61, 0x74, + 0x6e, 0x6c, 0x61, 0x74, + 0x6e, 0x00, 0x00, 0x00, + ]; +} diff --git a/read-fonts/generated/generated_meta.rs b/read-fonts/generated/generated_meta.rs new file mode 100644 index 000000000..c98acd6f2 --- /dev/null +++ b/read-fonts/generated/generated_meta.rs @@ -0,0 +1,178 @@ +// THIS FILE IS AUTOGENERATED. +// Any changes to this file will be overwritten. +// For more information about how codegen works, see font-codegen/README.md + +#[allow(unused_imports)] +use crate::codegen_prelude::*; + +/// [`meta`](https://docs.microsoft.com/en-us/typography/opentype/spec/meta) +#[derive(Debug, Clone, Copy)] +#[doc(hidden)] +pub struct MetaMarker { + data_maps_byte_len: usize, +} + +impl MetaMarker { + pub fn version_byte_range(&self) -> Range { + let start = 0; + start..start + u32::RAW_BYTE_LEN + } + + pub fn flags_byte_range(&self) -> Range { + let start = self.version_byte_range().end; + start..start + u32::RAW_BYTE_LEN + } + + pub fn reserved_byte_range(&self) -> Range { + let start = self.flags_byte_range().end; + start..start + u32::RAW_BYTE_LEN + } + + pub fn data_maps_count_byte_range(&self) -> Range { + let start = self.reserved_byte_range().end; + start..start + u32::RAW_BYTE_LEN + } + + pub fn data_maps_byte_range(&self) -> Range { + let start = self.data_maps_count_byte_range().end; + start..start + self.data_maps_byte_len + } +} + +impl TopLevelTable for Meta<'_> { + /// `meta` + const TAG: Tag = Tag::new(b"meta"); +} + +impl<'a> FontRead<'a> for Meta<'a> { + fn read(data: FontData<'a>) -> Result { + let mut cursor = data.cursor(); + cursor.advance::(); + cursor.advance::(); + cursor.advance::(); + let data_maps_count: u32 = cursor.read()?; + let data_maps_byte_len = (data_maps_count as usize) + .checked_mul(DataMapRecord::RAW_BYTE_LEN) + .ok_or(ReadError::OutOfBounds)?; + cursor.advance_by(data_maps_byte_len); + cursor.finish(MetaMarker { data_maps_byte_len }) + } +} + +/// [`meta`](https://docs.microsoft.com/en-us/typography/opentype/spec/meta) +pub type Meta<'a> = TableRef<'a, MetaMarker>; + +impl<'a> Meta<'a> { + /// Version number of the metadata table — set to 1. + pub fn version(&self) -> u32 { + let range = self.shape.version_byte_range(); + self.data.read_at(range.start).unwrap() + } + + /// Flags — currently unused; set to 0. + pub fn flags(&self) -> u32 { + let range = self.shape.flags_byte_range(); + self.data.read_at(range.start).unwrap() + } + + /// The number of data maps in the table. + pub fn data_maps_count(&self) -> u32 { + let range = self.shape.data_maps_count_byte_range(); + self.data.read_at(range.start).unwrap() + } + + /// Array of data map records. + pub fn data_maps(&self) -> &'a [DataMapRecord] { + let range = self.shape.data_maps_byte_range(); + self.data.read_array(range).unwrap() + } +} + +#[cfg(feature = "experimental_traverse")] +impl<'a> SomeTable<'a> for Meta<'a> { + fn type_name(&self) -> &str { + "Meta" + } + fn get_field(&self, idx: usize) -> Option> { + match idx { + 0usize => Some(Field::new("version", self.version())), + 1usize => Some(Field::new("flags", self.flags())), + 2usize => Some(Field::new("data_maps_count", self.data_maps_count())), + 3usize => Some(Field::new( + "data_maps", + traversal::FieldType::array_of_records( + stringify!(DataMapRecord), + self.data_maps(), + self.offset_data(), + ), + )), + _ => None, + } + } +} + +#[cfg(feature = "experimental_traverse")] +impl<'a> std::fmt::Debug for Meta<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + (self as &dyn SomeTable<'a>).fmt(f) + } +} + +/// +#[derive(Clone, Debug, Copy, bytemuck :: AnyBitPattern)] +#[repr(C)] +#[repr(packed)] +pub struct DataMapRecord { + /// A tag indicating the type of metadata. + pub tag: BigEndian, + /// Offset in bytes from the beginning of the metadata table to the data for this tag. + pub data_offset: BigEndian, + /// Length of the data, in bytes. The data is not required to be padded to any byte boundary. + pub data_length: BigEndian, +} + +impl DataMapRecord { + /// A tag indicating the type of metadata. + pub fn tag(&self) -> Tag { + self.tag.get() + } + + /// Offset in bytes from the beginning of the metadata table to the data for this tag. + pub fn data_offset(&self) -> Offset32 { + self.data_offset.get() + } + + /// Offset in bytes from the beginning of the metadata table to the data for this tag. + /// + /// The `data` argument should be retrieved from the parent table + /// By calling its `offset_data` method. + pub fn data<'a>(&self, data: FontData<'a>) -> Result, ReadError> { + let args = (self.tag(), self.data_length()); + self.data_offset().resolve_with_args(data, &args) + } + + /// Length of the data, in bytes. The data is not required to be padded to any byte boundary. + pub fn data_length(&self) -> u32 { + self.data_length.get() + } +} + +impl FixedSize for DataMapRecord { + const RAW_BYTE_LEN: usize = Tag::RAW_BYTE_LEN + Offset32::RAW_BYTE_LEN + u32::RAW_BYTE_LEN; +} + +#[cfg(feature = "experimental_traverse")] +impl<'a> SomeRecord<'a> for DataMapRecord { + fn traverse(self, data: FontData<'a>) -> RecordResolver<'a> { + RecordResolver { + name: "DataMapRecord", + get_field: Box::new(move |idx, _data| match idx { + 0usize => Some(Field::new("tag", self.tag())), + 1usize => Some(Field::new("data_offset", traversal::FieldType::Unknown)), + 2usize => Some(Field::new("data_length", self.data_length())), + _ => None, + }), + data, + } + } +} diff --git a/read-fonts/src/array.rs b/read-fonts/src/array.rs index da0dac986..cc9cc7422 100644 --- a/read-fonts/src/array.rs +++ b/read-fonts/src/array.rs @@ -142,7 +142,8 @@ impl<'a, T: FontRead<'a> + VarSize> VarLenArray<'a, T> { } let item_len = T::read_len_at(data, 0)?; - let next = T::read(data); + let item_data = data.slice(..item_len)?; + let next = T::read(item_data); data = data.split_off(item_len)?; Some(next) }) diff --git a/read-fonts/src/table_provider.rs b/read-fonts/src/table_provider.rs index efb043911..6a9191a86 100644 --- a/read-fonts/src/table_provider.rs +++ b/read-fonts/src/table_provider.rs @@ -212,6 +212,10 @@ pub trait TableProvider<'a> { self.expect_data_for_tag(tables::ift::IFTX_TAG) .and_then(FontRead::read) } + + fn meta(&self) -> Result, ReadError> { + self.expect_table() + } } #[cfg(test)] diff --git a/read-fonts/src/tables.rs b/read-fonts/src/tables.rs index 9825cb332..9153ef8d1 100644 --- a/read-fonts/src/tables.rs +++ b/read-fonts/src/tables.rs @@ -33,6 +33,7 @@ pub mod layout; pub mod loca; pub mod ltag; pub mod maxp; +pub mod meta; pub mod mvar; pub mod name; pub mod os2; diff --git a/read-fonts/src/tables/meta.rs b/read-fonts/src/tables/meta.rs new file mode 100644 index 000000000..13aaf6dcb --- /dev/null +++ b/read-fonts/src/tables/meta.rs @@ -0,0 +1,116 @@ +//! The [meta (Metadata)](https://docs.microsoft.com/en-us/typography/opentype/spec/meta) table + +include!("../../generated/generated_meta.rs"); + +pub const DLNG: Tag = Tag::new(b"dlng"); +pub const SLNG: Tag = Tag::new(b"slng"); + +/// Data stored in the 'meta' table. +pub enum Metadata<'a> { + /// Used for the 'dlng' and 'slng' metadata + ScriptLangTags(VarLenArray<'a, ScriptLangTag<'a>>), + /// Other metadata, which may exist in certain apple fonts + Other(&'a [u8]), +} + +impl ReadArgs for Metadata<'_> { + type Args = (Tag, u32); +} + +impl<'a> FontReadWithArgs<'a> for Metadata<'a> { + fn read_with_args(data: FontData<'a>, args: &Self::Args) -> Result { + let (tag, len) = *args; + let data = data.slice(0..len as usize).ok_or(ReadError::OutOfBounds)?; + if [DLNG, SLNG].contains(&tag) { + VarLenArray::read(data).map(Metadata::ScriptLangTags) + } else { + Ok(Metadata::Other(data.as_bytes())) + } + } +} + +#[derive(Clone, Debug)] +pub struct ScriptLangTag<'a>(&'a str); + +impl<'a> ScriptLangTag<'a> { + pub fn as_str(&self) -> &'a str { + self.0 + } +} + +impl AsRef for ScriptLangTag<'_> { + fn as_ref(&self) -> &str { + self.0 + } +} + +#[cfg(feature = "std")] +impl From> for String { + fn from(value: ScriptLangTag<'_>) -> Self { + value.0.into() + } +} + +impl VarSize for ScriptLangTag<'_> { + type Size = u32; + + fn read_len_at(data: FontData, pos: usize) -> Option { + let bytes = data.split_off(pos)?.as_bytes(); + if bytes.is_empty() { + return None; + } + let end = data + .as_bytes() + .iter() + .position(|b| *b == b',') + .map(|pos| pos + 1) // include comma + .unwrap_or(bytes.len()); + Some(end) + } +} + +impl<'a> FontRead<'a> for ScriptLangTag<'a> { + fn read(data: FontData<'a>) -> Result { + std::str::from_utf8(data.as_bytes()) + .map_err(|_| ReadError::MalformedData("LangScriptTag must be utf8")) + .map(|s| ScriptLangTag(s.trim_matches([' ', ',']))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use font_test_data::meta as test_data; + + impl PartialEq<&str> for ScriptLangTag<'_> { + fn eq(&self, other: &&str) -> bool { + self.as_ref() == *other + } + } + + fn expect_script_lang_tags(table: Metadata, expected: &[&str]) -> bool { + let Metadata::ScriptLangTags(langs) = table else { + panic!("wrong metadata"); + }; + let result = langs.iter().map(|x| x.unwrap()).collect::>(); + result == expected + } + + #[test] + fn parse_simple() { + let table = Meta::read(test_data::SIMPLE_META_TABLE.into()).unwrap(); + let rec1 = table.data_maps()[0]; + let rec2 = table.data_maps()[1]; + + assert_eq!(rec1.tag(), Tag::new(b"dlng")); + assert_eq!(rec2.tag(), Tag::new(b"slng")); + assert!(expect_script_lang_tags( + rec1.data(table.offset_data()).unwrap(), + &["en-latn", "latn"] + )); + assert!(expect_script_lang_tags( + rec2.data(table.offset_data()).unwrap(), + &["latn"] + )); + } +} diff --git a/resources/codegen_inputs/meta.rs b/resources/codegen_inputs/meta.rs new file mode 100644 index 000000000..1077eb834 --- /dev/null +++ b/resources/codegen_inputs/meta.rs @@ -0,0 +1,38 @@ +#![parse_module(read_fonts::tables::meta)] + +/// [`meta`](https://docs.microsoft.com/en-us/typography/opentype/spec/meta) +#[tag = "meta"] +table Meta { + /// Version number of the metadata table — set to 1. + #[compile(1)] + version: u32, + /// Flags — currently unused; set to 0. + #[compile(0)] + flags: u32, + /// Not used; set to 0. + #[skip_getter] + #[compile(0)] + reserved: u32, + /// The number of data maps in the table. + #[compile(array_len($data_maps))] + data_maps_count: u32, + /// Array of data map records. + #[count($data_maps_count)] + data_maps: [DataMapRecord], +} + +/// +#[skip_from_obj] +record DataMapRecord { + /// A tag indicating the type of metadata. + tag: Tag, + /// Offset in bytes from the beginning of the metadata table to the data for this tag. + #[read_offset_with($tag, $data_length)] + #[traverse_with(skip)] + #[validate(validate_data_type)] + data_offset: Offset32, + /// Length of the data, in bytes. The data is not required to be padded to any byte boundary. + #[compile(self.compute_data_len())] + data_length: u32, +} + diff --git a/resources/codegen_plan.toml b/resources/codegen_plan.toml index ee74d746c..533c2ff1e 100644 --- a/resources/codegen_plan.toml +++ b/resources/codegen_plan.toml @@ -228,6 +228,16 @@ mode = "compile" source = "resources/codegen_inputs/maxp.rs" target = "write-fonts/generated/generated_maxp.rs" +[[generate]] +mode = "parse" +source = "resources/codegen_inputs/meta.rs" +target = "read-fonts/generated/generated_meta.rs" + +[[generate]] +mode = "compile" +source = "resources/codegen_inputs/meta.rs" +target = "write-fonts/generated/generated_meta.rs" + [[generate]] mode = "parse" source = "resources/codegen_inputs/stat.rs" diff --git a/write-fonts/generated/generated_meta.rs b/write-fonts/generated/generated_meta.rs new file mode 100644 index 000000000..1d883e9b9 --- /dev/null +++ b/write-fonts/generated/generated_meta.rs @@ -0,0 +1,113 @@ +// THIS FILE IS AUTOGENERATED. +// Any changes to this file will be overwritten. +// For more information about how codegen works, see font-codegen/README.md + +#[allow(unused_imports)] +use crate::codegen_prelude::*; + +/// [`meta`](https://docs.microsoft.com/en-us/typography/opentype/spec/meta) +#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Meta { + /// Array of data map records. + pub data_maps: Vec, +} + +impl Meta { + /// Construct a new `Meta` + pub fn new(data_maps: Vec) -> Self { + Self { + data_maps: data_maps.into_iter().map(Into::into).collect(), + } + } +} + +impl FontWrite for Meta { + #[allow(clippy::unnecessary_cast)] + fn write_into(&self, writer: &mut TableWriter) { + (1 as u32).write_into(writer); + (0 as u32).write_into(writer); + (0 as u32).write_into(writer); + (array_len(&self.data_maps).unwrap() as u32).write_into(writer); + self.data_maps.write_into(writer); + } + fn table_type(&self) -> TableType { + TableType::TopLevel(Meta::TAG) + } +} + +impl Validate for Meta { + fn validate_impl(&self, ctx: &mut ValidationCtx) { + ctx.in_table("Meta", |ctx| { + ctx.in_field("data_maps", |ctx| { + if self.data_maps.len() > (u32::MAX as usize) { + ctx.report("array exceeds max length"); + } + self.data_maps.validate_impl(ctx); + }); + }) + } +} + +impl TopLevelTable for Meta { + const TAG: Tag = Tag::new(b"meta"); +} + +impl<'a> FromObjRef> for Meta { + fn from_obj_ref(obj: &read_fonts::tables::meta::Meta<'a>, _: FontData) -> Self { + let offset_data = obj.offset_data(); + Meta { + data_maps: obj.data_maps().to_owned_obj(offset_data), + } + } +} + +impl<'a> FromTableRef> for Meta {} + +impl<'a> FontRead<'a> for Meta { + fn read(data: FontData<'a>) -> Result { + ::read(data).map(|x| x.to_owned_table()) + } +} + +/// +#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct DataMapRecord { + /// A tag indicating the type of metadata. + pub tag: Tag, + /// Offset in bytes from the beginning of the metadata table to the data for this tag. + pub data: OffsetMarker, +} + +impl DataMapRecord { + /// Construct a new `DataMapRecord` + pub fn new(tag: Tag, data: Metadata) -> Self { + Self { + tag, + data: data.into(), + } + } +} + +impl FontWrite for DataMapRecord { + #[allow(clippy::unnecessary_cast)] + fn write_into(&self, writer: &mut TableWriter) { + self.tag.write_into(writer); + self.data.write_into(writer); + (self.compute_data_len() as u32).write_into(writer); + } + fn table_type(&self) -> TableType { + TableType::Named("DataMapRecord") + } +} + +impl Validate for DataMapRecord { + fn validate_impl(&self, ctx: &mut ValidationCtx) { + ctx.in_table("DataMapRecord", |ctx| { + ctx.in_field("data", |ctx| { + self.validate_data_type(ctx); + }); + }) + } +} diff --git a/write-fonts/src/tables.rs b/write-fonts/src/tables.rs index 1444b797d..5ec0b99fc 100644 --- a/write-fonts/src/tables.rs +++ b/write-fonts/src/tables.rs @@ -20,6 +20,7 @@ pub mod ift; pub mod layout; pub mod loca; pub mod maxp; +pub mod meta; pub mod mvar; pub mod name; pub mod os2; @@ -51,6 +52,7 @@ fn do_we_even_serde() { hvar: hvar::Hvar, loca: loca::Loca, maxp: maxp::Maxp, + meta: meta::Meta, name: name::Name, os2: os2::Os2, post: post::Post, diff --git a/write-fonts/src/tables/meta.rs b/write-fonts/src/tables/meta.rs new file mode 100644 index 000000000..96079d404 --- /dev/null +++ b/write-fonts/src/tables/meta.rs @@ -0,0 +1,171 @@ +//! The [meta (Metadata)](https://docs.microsoft.com/en-us/typography/opentype/spec/meta) table + +use std::fmt::Display; + +include!("../../generated/generated_meta.rs"); + +pub const DLNG: Tag = Tag::new(b"dlng"); +pub const SLNG: Tag = Tag::new(b"slng"); + +/// Metadata in the `meta` table, associated with some tag. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub enum Metadata { + /// For the 'dlng' and 'slng' tags + ScriptLangTags(Vec), + /// For other tags + Other(Vec), +} + +/// A ['ScriptLangTag'] value. +/// +/// This is currently just a string and we do not perform any validation, +/// but we should do that (TK open issue) +/// +/// [`ScriptLangTag`]: https://learn.microsoft.com/en-us/typography/opentype/spec/meta#scriptlangtag-values +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ScriptLangTag(String); + +/// An error for if a [`ScriptLangTag`] does not conform to the specification. +#[derive(Clone, Debug)] +#[non_exhaustive] // so we can flesh this out later without breaking anything +pub struct InvalidScriptLangTag; + +impl ScriptLangTag { + pub fn new(raw: String) -> Result { + Ok(Self(raw)) + } + + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl Display for InvalidScriptLangTag { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("ScriptLangTag was malformed") + } +} + +impl std::error::Error for InvalidScriptLangTag {} + +impl DataMapRecord { + fn validate_data_type(&self, ctx: &mut ValidationCtx) { + if matches!( + (self.tag, self.data.as_ref()), + (SLNG | DLNG, Metadata::Other(_)) + ) { + ctx.report("'slng' or 'dlng' tags use ScriptLangTag data"); + } + } + + fn compute_data_len(&self) -> usize { + match self.data.as_ref() { + Metadata::ScriptLangTags(items) => { + let sum_len: usize = items.iter().map(|tag| tag.as_str().len()).sum(); + let toss_some_commas_in_there = items.len().saturating_sub(1); + sum_len + toss_some_commas_in_there + } + Metadata::Other(vec) => vec.len(), + } + } +} + +impl FontWrite for Metadata { + fn write_into(&self, writer: &mut TableWriter) { + match self { + Metadata::ScriptLangTags(langs) => { + let mut first = true; + for lang in langs { + if !first { + b','.write_into(writer); + } + first = false; + lang.0.as_bytes().write_into(writer); + } + } + Metadata::Other(vec) => { + vec.write_into(writer); + } + }; + } +} + +impl Validate for Metadata { + fn validate_impl(&self, _ctx: &mut ValidationCtx) {} +} + +impl FromObjRef> for Metadata { + fn from_obj_ref(from: &read_fonts::tables::meta::Metadata<'_>, _: FontData) -> Self { + match from { + read_fonts::tables::meta::Metadata::ScriptLangTags(var_len_array) => { + Self::ScriptLangTags( + var_len_array + .iter() + .flat_map(|x| { + x.ok() + .and_then(|x| ScriptLangTag::new(x.as_str().into()).ok()) + }) + .collect(), + ) + } + read_fonts::tables::meta::Metadata::Other(bytes) => Self::Other(bytes.to_vec()), + } + } +} + +impl FromTableRef> for Metadata {} + +// Note: This is required because of generated trait bounds, but we don't really +// want to use it because we want our metadata to match our tag... +impl Default for Metadata { + fn default() -> Self { + Metadata::ScriptLangTags(Vec::new()) + } +} + +impl FromObjRef for DataMapRecord { + fn from_obj_ref(obj: &read_fonts::tables::meta::DataMapRecord, offset_data: FontData) -> Self { + let data = obj + .data(offset_data) + .map(|meta| meta.to_owned_table()) + .unwrap_or_else(|_| match obj.tag() { + DLNG | SLNG => Metadata::ScriptLangTags(Vec::new()), + _ => Metadata::Other(Vec::new()), + }); + DataMapRecord { + tag: obj.tag(), + data: OffsetMarker::new(data), + } + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use font_test_data::meta as test_data; + + #[test] + fn convert_from_read() { + let table = Meta::read(test_data::SIMPLE_META_TABLE.into()).unwrap(); + let rec1 = &table.data_maps[0]; + assert_eq!( + rec1.data.as_ref(), + &Metadata::ScriptLangTags(vec![ + ScriptLangTag::new("en-latn".into()).unwrap(), + ScriptLangTag::new("latn".into()).unwrap() + ]) + ); + + let round_trip = crate::dump_table(&table).unwrap(); + let read_back = Meta::read(round_trip.as_slice().into()).unwrap(); + let readr = read_fonts::tables::meta::Meta::read(round_trip.as_slice().into()).unwrap(); + dbg!(readr); + + //eprintln!("{read_back:#?}"); + + assert_eq!(table, read_back); + } +}