diff --git a/crates/pica-path/src/lib.rs b/crates/pica-path/src/lib.rs index 5692c8b84..516643c50 100644 --- a/crates/pica-path/src/lib.rs +++ b/crates/pica-path/src/lib.rs @@ -362,7 +362,7 @@ impl<'a> PathExt for RecordRef<'a> { .flat_map(FieldRef::subfields) .filter_map(|subfield| { if path.codes_flat().contains(subfield.code()) { - Some(subfield.value()) + Some(subfield.value().as_bstr()) } else { None } diff --git a/pica-record/src/error.rs b/pica-record/src/error.rs index c3ffda5a1..0bb592b87 100644 --- a/pica-record/src/error.rs +++ b/pica-record/src/error.rs @@ -5,6 +5,9 @@ use thiserror::Error; pub enum PicaError { #[error("'{0}' is not a valid subfield code.")] InvalidSubfieldCode(char), + + #[error("'{0}' is not a valid subfield value.")] + InvalidSubfieldValue(String), } /// -----{ TODO }----------------------------------------- diff --git a/pica-record/src/lib.rs b/pica-record/src/lib.rs index 881869eb6..04a6144d6 100644 --- a/pica-record/src/lib.rs +++ b/pica-record/src/lib.rs @@ -24,9 +24,17 @@ //! select). pub use error::PicaError; -pub use subfield::SubfieldCode; +pub use subfield::{SubfieldCode, SubfieldValue, SubfieldValueRef}; + +/// Parsers recognizing low-level primitives (e.g. subfield codes). +#[rustfmt::skip] +pub mod parser_v2 { + pub use super::subfield::parse_subfield_code; + pub use super::subfield::parse_subfield_value_ref; +} + +// -----{ TODO }----------------------------------------- -/// -----{ TODO }----------------------------------------- mod error; mod field; pub mod io; diff --git a/pica-record/src/subfield.rs b/pica-record/src/subfield.rs index 001e54448..dacb5652b 100644 --- a/pica-record/src/subfield.rs +++ b/pica-record/src/subfield.rs @@ -2,6 +2,9 @@ //! subfields. use std::fmt::{self, Display}; +use std::ops::Deref; + +use bstr::BStr; use crate::PicaError; @@ -122,6 +125,215 @@ pub fn parse_subfield_code(i: &mut &[u8]) -> PResult { .parse_next(i) } +/// An immutable PICA+ subfield value. +/// +/// This type behaves like byte slice but guarantees that the subfield +/// value does not contain neither '\x1e' or '\x1f'. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] +pub struct SubfieldValueRef<'a>(&'a [u8]); + +impl<'a> SubfieldValueRef<'a> { + /// Create a new subfield value reference from a byte slice. + /// + /// # Error + /// + /// This function fails if the subfield value contains either the + /// field separator '\x1f' or the record separator '\x1e'. + /// + /// # Example + /// + /// ```rust + /// use pica_record::SubfieldValueRef; + /// + /// let value = SubfieldValueRef::new(b"abc")?; + /// assert_eq!(value, "abc"); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new(value: &'a T) -> Result + where + T: AsRef<[u8]>, + { + let value = value.as_ref(); + if value.find_byteset(b"\x1f\x1e").is_some() { + return Err(PicaError::InvalidSubfieldValue( + value.to_str_lossy().to_string(), + )); + } + + Ok(Self(value)) + } + + /// Create a new subfield value reference from a byte slice without + /// checking for validity. + /// + /// # Safety + /// + /// The caller *must* ensure that the value neither contains the + /// record separator '\x1e' nor the field separator '\x1f'. + /// + /// # Example + /// + /// ```rust + /// use pica_record::SubfieldValueRef; + /// + /// let value = SubfieldValueRef::from_unchecked("abc"); + /// assert_eq!(value, "abc"); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn from_unchecked(value: &'a T) -> Self + where + T: AsRef<[u8]> + ?Sized, + { + Self(value.as_ref()) + } +} + +impl<'a> Deref for SubfieldValueRef<'a> { + type Target = BStr; + + fn deref(&self) -> &Self::Target { + self.0.as_bstr() + } +} + +impl Display for SubfieldValueRef<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0.as_bstr()) + } +} + +impl PartialEq for SubfieldValueRef<'_> { + fn eq(&self, value: &str) -> bool { + self.0 == value.as_bytes() + } +} + +impl PartialEq<&str> for SubfieldValueRef<'_> { + fn eq(&self, value: &&str) -> bool { + self.0 == value.as_bytes() + } +} + +impl PartialEq> for SubfieldValueRef<'_> { + fn eq(&self, other: &Vec) -> bool { + self.0 == other + } +} + +/// Parse a PICA+ subfield value reference. +pub fn parse_subfield_value_ref<'a>( + i: &mut &'a [u8], +) -> PResult> { + take_till(0.., |c| c == b'\x1f' || c == b'\x1e') + .map(SubfieldValueRef) + .parse_next(i) +} + +/// A mutable PICA+ subfield value. +/// +/// This type behaves like byte slice but guarantees that the subfield +/// value does not contain neither '\x1e' or '\x1f'. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] +pub struct SubfieldValue(Vec); + +impl SubfieldValue { + /// Create a new subfield value from a byte slice. + /// + /// # Error + /// + /// This function fails if the subfield value contains either the + /// field separator '\x1f' or the record separator '\x1e'. + /// + /// # Example + /// + /// ```rust + /// use pica_record::SubfieldValue; + /// + /// let value = SubfieldValue::new(b"abc")?; + /// assert_eq!(value, "abc"); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn new(value: &T) -> Result + where + T: AsRef<[u8]>, + { + let value = value.as_ref(); + if value.find_byteset(b"\x1f\x1e").is_some() { + return Err(PicaError::InvalidSubfieldValue( + value.to_str_lossy().to_string(), + )); + } + + Ok(Self(value.to_vec())) + } + + /// Create a new subfield value from a byte slice without checking + /// for validity. + /// + /// # Safety + /// + /// The caller *must* ensure that the value neither contains the + /// record separator '\x1e' nor the field separator '\x1f'. + /// + /// # Example + /// + /// ```rust + /// use pica_record::SubfieldValue; + /// + /// let value = SubfieldValue::from_unchecked("abc"); + /// assert_eq!(value, "abc"); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn from_unchecked(value: &T) -> Self + where + T: AsRef<[u8]> + ?Sized, + { + Self(value.as_ref().to_vec()) + } +} + +impl Display for SubfieldValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0.as_bstr()) + } +} + +impl From> for SubfieldValue { + fn from(value: SubfieldValueRef<'_>) -> Self { + Self(value.to_vec()) + } +} + +impl PartialEq> for SubfieldValue { + fn eq(&self, other: &SubfieldValueRef<'_>) -> bool { + self.0 == other.0 + } +} + +impl PartialEq for SubfieldValueRef<'_> { + fn eq(&self, other: &SubfieldValue) -> bool { + self.0 == other.0 + } +} + +impl PartialEq<&str> for SubfieldValue { + fn eq(&self, other: &&str) -> bool { + self.0 == other.as_bytes() + } +} + +#[cfg(feature = "arbitrary")] +impl quickcheck::Arbitrary for SubfieldValue { + fn arbitrary(g: &mut quickcheck::Gen) -> Self { + let value = String::arbitrary(g).replace(['\x1f', '\x1e'], ""); + Self::from_unchecked(&value) + } +} + #[cfg(test)] mod tests { use super::*; @@ -202,7 +414,7 @@ use std::io::{self, Write}; use std::iter; use std::str::Utf8Error; -use bstr::{BStr, BString, ByteSlice}; +use bstr::ByteSlice; use winnow::combinator::preceded; use winnow::token::{one_of, take_till}; use winnow::{PResult, Parser}; @@ -213,22 +425,14 @@ use crate::error::ParsePicaError; #[derive(Debug, Clone, PartialEq, Eq)] pub struct SubfieldRef<'a> { code: SubfieldCode, - value: &'a BStr, + value: SubfieldValueRef<'a>, } /// A mutable PICA+ subfield. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Subfield { code: SubfieldCode, - value: BString, -} - -/// Parse a PICA+ subfield value. -#[inline] -fn parse_subfield_value<'a>(i: &mut &'a [u8]) -> PResult<&'a BStr> { - take_till(0.., |c| c == b'\x1f' || c == b'\x1e') - .map(ByteSlice::as_bstr) - .parse_next(i) + value: SubfieldValue, } /// Parse a PICA+ subfield. @@ -236,7 +440,7 @@ fn parse_subfield_value<'a>(i: &mut &'a [u8]) -> PResult<&'a BStr> { pub(crate) fn parse_subfield<'a>( i: &mut &'a [u8], ) -> PResult> { - preceded(b'\x1f', (parse_subfield_code, parse_subfield_value)) + preceded(b'\x1f', (parse_subfield_code, parse_subfield_value_ref)) .map(|(code, value)| SubfieldRef { code, value }) .parse_next(i) } @@ -329,8 +533,8 @@ impl<'a> SubfieldRef<'a> { /// Ok(()) /// } /// ``` - pub fn value(&self) -> &BStr { - self.value + pub fn value(&self) -> &SubfieldValueRef { + &self.value } /// Returns true if the subfield value is empty. @@ -380,7 +584,7 @@ impl<'a> SubfieldRef<'a> { return Ok(()); } - std::str::from_utf8(self.value)?; + std::str::from_utf8(&self.value)?; Ok(()) } @@ -458,7 +662,7 @@ where Ok(Self { code: SubfieldCode(code), - value, + value: SubfieldValueRef::from_unchecked(value), }) } } @@ -521,12 +725,9 @@ impl From> for Subfield { #[cfg(feature = "arbitrary")] impl quickcheck::Arbitrary for Subfield { fn arbitrary(g: &mut quickcheck::Gen) -> Self { - let value = - String::arbitrary(g).replace(['\x1f', '\x1e'], "").into(); - Self { code: SubfieldCode::arbitrary(g), - value, + value: SubfieldValue::arbitrary(g), } } }