Skip to content

Commit

Permalink
Add basic custom types for the 'meta' table
Browse files Browse the repository at this point in the history
This adds types and the required impls that can represent the expected
contents of the 'meta' table.

This has to be handwritten, because the 'meta' table contains different
payloads depending on the included tag for each record.

I think having these as real types will provide a significantly better
API though, and avoid us writing invalid data.

The actual implementations for these types are very spartan; eventually
I imagine having parsing & constructing code for the ScriptLangTag
struct that ensures it conforms to the spec.
  • Loading branch information
cmyr committed Nov 14, 2024
1 parent ef7f1f2 commit 122c333
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 45 deletions.
16 changes: 11 additions & 5 deletions read-fonts/generated/generated_meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl<'a> std::fmt::Debug for Meta<'a> {
}
}

/// https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats
/// <https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats>
#[derive(Clone, Debug, Copy, bytemuck :: AnyBitPattern)]
#[repr(C)]
#[repr(packed)]
Expand All @@ -138,6 +138,15 @@ impl DataMapRecord {
self.data_offset.get()
}

/// Offset in bytes from the beginning of the metadata table to the data for this tag.
///
/// The `data` argument should be retrieved from the parent table
/// By calling its `offset_data` method.
pub fn data<'a>(&self, data: FontData<'a>) -> Result<Metadata<'a>, ReadError> {
let args = (self.tag(), self.data_length());
self.data_offset().resolve_with_args(data, &args)
}

/// Length of the data, in bytes. The data is not required to be padded to any byte boundary.
pub fn data_length(&self) -> u32 {
self.data_length.get()
Expand All @@ -155,10 +164,7 @@ impl<'a> SomeRecord<'a> for DataMapRecord {
name: "DataMapRecord",
get_field: Box::new(move |idx, _data| match idx {
0usize => Some(Field::new("tag", self.tag())),
1usize => Some(Field::new(
"data_offset",
FieldType::offset_to_array_of_scalars(self.data_offset(), self.data(_data)),
)),
1usize => Some(Field::new("data_offset", traversal::FieldType::Unknown)),
2usize => Some(Field::new("data_length", self.data_length())),
_ => None,
}),
Expand Down
80 changes: 71 additions & 9 deletions read-fonts/src/tables/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,76 @@

include!("../../generated/generated_meta.rs");

impl DataMapRecord {
/// The data under this record, interpreted from length and offset.
pub fn data<'a>(&self, data: FontData<'a>) -> Result<&'a [u8], ReadError> {
let start = self.data_offset().to_usize();
let end = start + self.data_length() as usize;

data.as_bytes()
.get(start..end)
.ok_or(ReadError::OutOfBounds)
pub const DLNG: Tag = Tag::new(b"dlng");
pub const SLNG: Tag = Tag::new(b"slng");

/// Data stored in the 'meta' table.
pub enum Metadata<'a> {
/// Used for the 'dlng' and 'slng' metadata
ScriptLangTags(VarLenArray<'a, LangScriptTag<'a>>),
/// Other metadata, which may exist in certain apple fonts
Other(&'a [u8]),
}

impl ReadArgs for Metadata<'_> {
type Args = (Tag, u32);
}

impl<'a> FontReadWithArgs<'a> for Metadata<'a> {
fn read_with_args(data: FontData<'a>, args: &Self::Args) -> Result<Self, ReadError> {
let (tag, len) = *args;
let data = data.slice(0..len as usize).ok_or(ReadError::OutOfBounds)?;
if [DLNG, SLNG].contains(&tag) {
VarLenArray::read(data).map(Metadata::ScriptLangTags)
} else {
Ok(Metadata::Other(data.as_bytes()))
}
}
}

pub struct LangScriptTag<'a>(&'a str);

impl<'a> LangScriptTag<'a> {
pub fn as_str(&self) -> &'a str {
self.0
}
}

impl AsRef<str> for LangScriptTag<'_> {
fn as_ref(&self) -> &str {
self.0
}
}

#[cfg(feature = "std")]
impl From<LangScriptTag<'_>> for String {
fn from(value: LangScriptTag<'_>) -> Self {
value.0.into()
}
}

impl VarSize for LangScriptTag<'_> {
type Size = u32;

fn read_len_at(data: FontData, pos: usize) -> Option<usize> {
let bytes = data.split_off(pos)?.as_bytes();
if bytes.is_empty() {
return None;
}
let end = data
.as_bytes()
.iter()
.position(|b| *b == b',')
.map(|pos| pos + 1) // include comma
.unwrap_or(bytes.len());
Some(end)
}
}

impl<'a> FontRead<'a> for LangScriptTag<'a> {
fn read(data: FontData<'a>) -> Result<Self, ReadError> {
std::str::from_utf8(data.as_bytes())
.map_err(|_| ReadError::MalformedData("LangScriptTag must be utf8"))
.map(|s| LangScriptTag(s.trim_matches(',')))
}
}
11 changes: 7 additions & 4 deletions resources/codegen_inputs/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,18 @@ table Meta {
data_maps: [DataMapRecord],
}

/// https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats
/// <https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats>
#[skip_from_obj]
record DataMapRecord {
/// A tag indicating the type of metadata.
tag: Tag,
/// Offset in bytes from the beginning of the metadata table to the data for this tag.
#[offset_getter(data)]
#[compile_with(compile_map_value)]
data_offset: Offset32<[u8]>,
#[read_offset_with($tag, $data_length)]
#[traverse_with(skip)]
#[validate(validate_data_type)]
data_offset: Offset32<Metadata>,
/// Length of the data, in bytes. The data is not required to be padded to any byte boundary.
#[compile(skip)]
data_length: u32,
}

23 changes: 10 additions & 13 deletions write-fonts/generated/generated_meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,19 @@ impl<'a> FontRead<'a> for Meta {
}
}

/// https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats
/// <https://learn.microsoft.com/en-us/typography/opentype/spec/meta#table-formats>
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct DataMapRecord {
/// A tag indicating the type of metadata.
pub tag: Tag,
/// Offset in bytes from the beginning of the metadata table to the data for this tag.
pub data: OffsetMarker<Vec<u8>, WIDTH_32>,
pub data: OffsetMarker<Metadata, WIDTH_32>,
}

impl DataMapRecord {
/// Construct a new `DataMapRecord`
pub fn new(tag: Tag, data: Vec<u8>) -> Self {
pub fn new(tag: Tag, data: Metadata) -> Self {
Self {
tag,
data: data.into(),
Expand All @@ -94,22 +94,19 @@ impl FontWrite for DataMapRecord {
#[allow(clippy::unnecessary_cast)]
fn write_into(&self, writer: &mut TableWriter) {
self.tag.write_into(writer);
(self.compile_map_value()).write_into(writer);
self.data.write_into(writer);
}
fn table_type(&self) -> TableType {
TableType::Named("DataMapRecord")
}
}

impl Validate for DataMapRecord {
fn validate_impl(&self, _ctx: &mut ValidationCtx) {}
}

impl FromObjRef<read_fonts::tables::meta::DataMapRecord> for DataMapRecord {
fn from_obj_ref(obj: &read_fonts::tables::meta::DataMapRecord, offset_data: FontData) -> Self {
DataMapRecord {
tag: obj.tag(),
data: obj.data(offset_data).to_owned_obj(offset_data),
}
fn validate_impl(&self, ctx: &mut ValidationCtx) {
ctx.in_table("DataMapRecord", |ctx| {
ctx.in_field("data", |ctx| {
self.validate_data_type(ctx);
});
})
}
}
136 changes: 122 additions & 14 deletions write-fonts/src/tables/meta.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,137 @@
//! The [meta (Metadata)](https://docs.microsoft.com/en-us/typography/opentype/spec/meta) table

use std::fmt::Display;

include!("../../generated/generated_meta.rs");

impl DataMapRecord {
/// Required to append a variable length slice of bytes at the end of the
/// table, referenced by length and offset in this record.
fn compile_map_value(&self) -> MapValueAndLenWriter {
MapValueAndLenWriter(self.data.as_slice())
pub const DLNG: Tag = Tag::new(b"dlng");
pub const SLNG: Tag = Tag::new(b"slng");

/// Metadata in the `meta` table, associated with some tag.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Metadata {
/// For the 'dlng' and 'slng' tags
ScriptLangTags(Vec<ScriptLangTag>),
/// For other tags
Other(Vec<u8>),
}

/// A ['ScriptLangTag'] value.
///
/// This is currently just a string and we do not perform any validation,
/// but we should do that (TK open issue)
///
/// [`ScriptLangTag`]: https://learn.microsoft.com/en-us/typography/opentype/spec/meta#scriptlangtag-values
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ScriptLangTag(String);

/// An error for if a [`ScriptLangTag`] does not conform to the specification.
#[derive(Clone, Debug)]
#[non_exhaustive] // so we can flesh this out later without breaking anything
pub struct InvalidScriptLangTag;

impl ScriptLangTag {
pub fn new(raw: String) -> Result<Self, InvalidScriptLangTag> {
Ok(Self(raw))
}

pub fn as_str(&self) -> &str {
self.0.as_str()
}
}

struct MapValueAndLenWriter<'a>(&'a [u8]);
impl Display for InvalidScriptLangTag {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("ScriptLangTag was malformed")
}
}

impl FontWrite for MapValueAndLenWriter<'_> {
fn write_into(&self, writer: &mut TableWriter) {
let length = u32::try_from(self.0.len()).expect("meta record data too long: exceeds u32");
impl std::error::Error for InvalidScriptLangTag {}

writer.write_offset(&self.0, 4);
length.write_into(writer);
impl DataMapRecord {
fn validate_data_type(&self, ctx: &mut ValidationCtx) {
if matches!(
(self.tag, self.data.as_ref()),
(SLNG | DLNG, Metadata::Other(_))
) {
ctx.report("'slng' or 'dlng' tags use ScriptLangTag data");
}
}
}

// TODO: is this necessary?
impl FontWrite for &[u8] {
impl FontWrite for Metadata {
fn write_into(&self, writer: &mut TableWriter) {
writer.write_slice(self);
let len = match self {
Metadata::ScriptLangTags(langs) => {
let mut len = 0;
for lang in langs {
if len > 0 {
b','.write_into(writer);
len += 1;
}
lang.0.as_bytes().write_into(writer);
len += lang.0.as_bytes().len();
}
len
}
Metadata::Other(vec) => {
vec.write_into(writer);
vec.len()
}
};

let len: u32 = len.try_into().unwrap();
len.write_into(writer);
}
}

impl Validate for Metadata {
fn validate_impl(&self, _ctx: &mut ValidationCtx) {}
}

impl FromObjRef<read_fonts::tables::meta::Metadata<'_>> for Metadata {
fn from_obj_ref(from: &read_fonts::tables::meta::Metadata<'_>, _: FontData) -> Self {
match from {
read_fonts::tables::meta::Metadata::ScriptLangTags(var_len_array) => {
Self::ScriptLangTags(
var_len_array
.iter()
.flat_map(|x| {
x.ok()
.and_then(|x| ScriptLangTag::new(x.as_str().into()).ok())
})
.collect(),
)
}
read_fonts::tables::meta::Metadata::Other(bytes) => Self::Other(bytes.to_vec()),
}
}
}

impl FromTableRef<read_fonts::tables::meta::Metadata<'_>> for Metadata {}

// Note: This is required because of generated trait bounds, but we don't really
// want to use it because we want our metadata to match our tag...
impl Default for Metadata {
fn default() -> Self {
Metadata::ScriptLangTags(Vec::new())
}
}

impl FromObjRef<read_fonts::tables::meta::DataMapRecord> for DataMapRecord {
fn from_obj_ref(obj: &read_fonts::tables::meta::DataMapRecord, offset_data: FontData) -> Self {
let data = obj
.data(offset_data)
.map(|meta| meta.to_owned_table())
.unwrap_or_else(|_| match obj.tag() {
DLNG | SLNG => Metadata::ScriptLangTags(Vec::new()),
_ => Metadata::Other(Vec::new()),
});
DataMapRecord {
tag: obj.tag(),
data: OffsetMarker::new(data),
}
}
}

0 comments on commit 122c333

Please sign in to comment.