Skip to content

Commit

Permalink
Refactor tag matcher (#831)
Browse files Browse the repository at this point in the history
Signed-off-by: Nico Wagner <[email protected]>
  • Loading branch information
nwagner84 authored Sep 19, 2024
1 parent 6fbe186 commit f12ab91
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub use subfield::{
RegexSetMatcher, RelationMatcher, SingletonMatcher,
SubfieldMatcher,
};
pub use tag::TagMatcher;

mod error;
mod occurrence;
Expand All @@ -19,3 +20,4 @@ mod parse;
mod quantifier;
mod string;
mod subfield;
mod tag;
1 change: 1 addition & 0 deletions src/matcher/occurrence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use super::parse::parse_occurrence_matcher;
use super::ParseMatcherError;
use crate::primitives::{Occurrence, OccurrenceRef};

/// A matcher that checks for occurrences (or no occurrence).
#[derive(Debug, Clone, PartialEq)]
pub enum OccurrenceMatcher {
Exact(Occurrence),
Expand Down
1 change: 1 addition & 0 deletions src/matcher/subfield.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use super::{
use crate::matcher::parse::parse_exists_matcher;
use crate::primitives::{SubfieldCode, SubfieldRef};

/// A matcher that checks for the existance of subfields.
#[derive(Debug, Clone, PartialEq)]
pub struct ExistsMatcher {
pub(crate) codes: SmallVec<[SubfieldCode; 4]>,
Expand Down
204 changes: 204 additions & 0 deletions src/matcher/tag.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
use std::fmt::{self, Display};

use bstr::ByteSlice;
use winnow::combinator::{alt, delimited, repeat, separated_pair};
use winnow::error::ParserError;
use winnow::prelude::*;
use winnow::token::one_of;

use super::ParseMatcherError;
use crate::primitives::parse::parse_tag_ref;
use crate::primitives::{Tag, TagRef};

/// A matcher that matches against a TagRef.
#[derive(Debug, Clone, PartialEq)]
pub enum TagMatcher {
Tag(Tag),
Pattern([Vec<u8>; 4], String),
}

impl TagMatcher {
/// Creates a new [TagMatcher].
///
/// # Errors
///
/// This function fails if the given expression is not a valid tag
/// matcher.
///
/// # Example
///
/// ```rust
/// use pica_record::matcher::TagMatcher;
///
/// let _matcher = TagMatcher::new("041[A@]")?;
/// let _matcher = TagMatcher::new("003@")?;
/// let _matcher = TagMatcher::new("00.@")?;
///
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub fn new(matcher: &str) -> Result<Self, ParseMatcherError> {
parse_tag_matcher.parse(matcher.as_bytes()).map_err(|_| {
ParseMatcherError(format!(
"invalid tag matcher '{matcher}'"
))
})
}

/// Returns `true` if the given tag matches against the matcher.
///
/// # Example
///
/// ```rust
/// use pica_record::matcher::TagMatcher;
/// use pica_record::primitives::TagRef;
///
/// let matcher = TagMatcher::new("00[3-5]@")?;
/// assert!(!matcher.is_match(&TagRef::new("002@")?));
/// assert!(matcher.is_match(&TagRef::new("003@")?));
///
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
pub fn is_match(&self, tag: &TagRef) -> bool {
match self {
Self::Tag(lhs, ..) => lhs == tag,
Self::Pattern(pattern, ..) => {
pattern[0].contains(&tag[0])
&& pattern[1].contains(&tag[1])
&& pattern[2].contains(&tag[2])
&& pattern[3].contains(&tag[3])
}
}
}
}

impl Display for TagMatcher {
/// Formats the tag matcher as a human-readable string.
///
/// # Example
///
/// ```rust
/// use pica_record::matcher::TagMatcher;
///
/// let matcher = TagMatcher::new("00[3-5]@")?;
/// assert_eq!(matcher.to_string(), "00[3-5]@");
///
/// let matcher = TagMatcher::new("003@")?;
/// assert_eq!(matcher.to_string(), "003@");
///
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Pattern(_, raw_data) => write!(f, "{raw_data}"),
Self::Tag(tag) => write!(f, "{tag}"),
}
}
}

#[inline]
fn parse_tag_matcher_tag(i: &mut &[u8]) -> PResult<TagMatcher> {
parse_tag_ref
.map(Tag::from)
.map(TagMatcher::Tag)
.parse_next(i)
}

fn parse_tag_matcher_pattern_fragment<'a, E: ParserError<&'a [u8]>>(
allowed: &[u8],
) -> impl Parser<&'a [u8], Vec<u8>, E> + '_ {
move |i: &mut &'a [u8]| {
alt((
one_of(|c: u8| allowed.contains(&c)).map(|c| vec![c]),
'.'.value(allowed.to_vec()),
delimited(
'[',
repeat(
1..,
alt((
separated_pair(
one_of(|c| allowed.contains(&c)),
'-',
one_of(|c| allowed.contains(&c)),
)
.verify(|(min, max)| min < max)
.map(|(min, max)| (min..=max).collect()),
one_of(|c| allowed.contains(&c))
.map(|c| vec![c]),
)),
)
.fold(Vec::new, |mut acc, item| {
acc.extend(&item);
acc
}),
']',
),
))
.parse_next(i)
}
}

#[inline]
fn parse_tag_matcher_pattern(i: &mut &[u8]) -> PResult<TagMatcher> {
(
parse_tag_matcher_pattern_fragment(b"012"),
parse_tag_matcher_pattern_fragment(b"0123456789"),
parse_tag_matcher_pattern_fragment(b"0123456789"),
parse_tag_matcher_pattern_fragment(
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ@",
),
)
.with_taken()
.map(|((p0, p1, p2, p3), raw_data)| {
let raw_data = raw_data.to_str().unwrap().to_string();
TagMatcher::Pattern([p0, p1, p2, p3], raw_data)
})
.parse_next(i)
}

fn parse_tag_matcher(i: &mut &[u8]) -> PResult<TagMatcher> {
alt((parse_tag_matcher_tag, parse_tag_matcher_pattern))
.parse_next(i)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_parse_tag_matcher() -> anyhow::Result<()> {
macro_rules! parse_success {
($i:expr, $o:expr) => {
assert_eq!(
parse_tag_matcher.parse($i.as_bytes()).unwrap(),
$o
);
};
}

parse_success!("003@", TagMatcher::Tag(Tag::new("003@")?));
parse_success!("002@", TagMatcher::Tag(Tag::new("002@")?));

parse_success!(
".0[2-4]@",
TagMatcher::Pattern(
[
vec![b'0', b'1', b'2'],
vec![b'0'],
vec![b'2', b'3', b'4'],
vec![b'@']
],
".0[2-4]@".to_string()
)
);

parse_success!(
"00[23]@",
TagMatcher::Pattern(
[vec![b'0'], vec![b'0'], vec![b'2', b'3'], vec![b'@']],
"00[23]@".to_string()
)
);

Ok(())
}
}
50 changes: 50 additions & 0 deletions src/primitives/tag.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::fmt::{self, Display};
use std::io::{self, Write};
use std::ops::Index;

use bstr::{BStr, BString};
#[cfg(feature = "serde")]
Expand Down Expand Up @@ -138,6 +140,35 @@ impl<'a> TagRef<'a> {
}
}

impl Index<usize> for TagRef<'_> {
type Output = u8;

/// Access certain positions (indexed from zero) of a [TagRef].
///
/// # Panics
///
/// This functions panics of the position is greater than three.
///
/// # Example
///
/// ```rust
/// use pica_record::primitives::TagRef;
///
/// let tag = TagRef::new("003@")?;
/// assert_eq!(tag[0], b'0');
/// assert_eq!(tag[1], b'0');
/// assert_eq!(tag[2], b'3');
/// assert_eq!(tag[3], b'@');
///
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
#[inline]
fn index(&self, index: usize) -> &Self::Output {
debug_assert!(index < self.0.len());
&self.0[index]
}
}

impl PartialEq<&str> for TagRef<'_> {
/// Compare a [TagRef] with a string slice.
///
Expand Down Expand Up @@ -320,6 +351,25 @@ impl PartialEq<Tag> for TagRef<'_> {
}
}

impl Display for Tag {
/// Formats the tag as a human-readable string.
///
/// # Example
///
/// ```rust
/// use pica_record::primitives::Tag;
///
/// let tag = Tag::new("003@")?;
/// assert_eq!(tag.to_string(), "003@");
///
/// # Ok::<(), Box<dyn std::error::Error>>(())
/// ```
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}

#[cfg(test)]
impl quickcheck::Arbitrary for Tag {
fn arbitrary(g: &mut quickcheck::Gen) -> Self {
Expand Down

0 comments on commit f12ab91

Please sign in to comment.