diff --git a/examples/publicapi.rs b/examples/publicapi.rs index 1864d9f..6ebb21c 100644 --- a/examples/publicapi.rs +++ b/examples/publicapi.rs @@ -17,7 +17,7 @@ fn main() { { continue; } - println!(" {} {}", field.name, field.descriptor); + println!(" {} {:?}", field.name, field.descriptor); } for method in class.methods { if method @@ -26,7 +26,7 @@ fn main() { { continue; } - println!(" {} {}", method.name, method.descriptor); + println!(" {} {:?}", method.name, method.descriptor); } } Err(e) => eprintln!("Error: {} when parsing {:?}", e, arg), diff --git a/src/attributes.rs b/src/attributes.rs index a8d1e5a..c320f57 100644 --- a/src/attributes.rs +++ b/src/attributes.rs @@ -11,8 +11,8 @@ use crate::constant_pool::{ use crate::constant_pool::{ BootstrapArgument, ConstantPoolEntry, LiteralConstant, MethodHandle, NameAndType, }; -use crate::descriptor::FieldType; -use crate::names::{is_return_descriptor, is_unqualified_name}; +use crate::descriptors::{is_return_descriptor, parse_field_descriptor, FieldDescriptor}; +use crate::names::is_unqualified_name; use crate::{read_u1, read_u2, read_u4, AccessFlags, CafeRc, ParseError, ParseOptions}; #[derive(Debug)] @@ -104,7 +104,7 @@ pub struct LocalVariableEntry<'a> { pub start_pc: u16, pub length: u16, pub name: Cow<'a, str>, - pub descriptor: FieldType<'a>, + pub descriptor: FieldDescriptor<'a>, pub index: u16, } @@ -129,7 +129,7 @@ pub enum AnnotationElementValue<'a> { BooleanConstant(i32), StringConstant(Cow<'a, str>), EnumConstant { - type_name: FieldType<'a>, + type_name: FieldDescriptor<'a>, const_name: Cow<'a, str>, }, ClassLiteral { @@ -147,7 +147,7 @@ pub struct AnnotationElement<'a> { #[derive(Debug)] pub struct Annotation<'a> { - pub type_descriptor: FieldType<'a>, + pub type_descriptor: FieldDescriptor<'a>, pub elements: Vec>, } @@ -309,7 +309,7 @@ pub struct ModuleData<'a> { #[derive(Debug)] pub struct RecordComponentEntry<'a> { pub name: Cow<'a, str>, - pub descriptor: FieldType<'a>, + pub descriptor: FieldDescriptor<'a>, pub attributes: Vec>, } @@ -602,7 +602,7 @@ fn read_localvariable_data<'a>( fail!("Invalid unqualified name for variable {}", i); } let descriptor = read_cp_utf8(bytes, ix, pool) - .and_then(|descriptor| FieldType::parse(&descriptor)) + .and_then(|descriptor| parse_field_descriptor(&descriptor, 0)) .map_err(|e| err!(e, "descriptor for variable {}", i))?; let index = read_u2(bytes, ix)?; localvariables.push(LocalVariableEntry { @@ -661,7 +661,7 @@ fn read_annotation_element_value<'a>( 's' => AnnotationElementValue::StringConstant(read_cp_utf8(bytes, ix, pool)?), 'e' => { let type_name = read_cp_utf8(bytes, ix, pool) - .and_then(|descriptor| FieldType::parse(&descriptor)) + .and_then(|descriptor| parse_field_descriptor(&descriptor, 0)) .map_err(|e| err!(e, "annotation element value enum descriptor"))?; let const_name = read_cp_utf8(bytes, ix, pool)?; AnnotationElementValue::EnumConstant { @@ -699,7 +699,7 @@ fn read_annotation<'a>( pool: &[CafeRc>], ) -> Result, ParseError> { let type_descriptor = read_cp_utf8(bytes, ix, pool) - .and_then(|descriptor| FieldType::parse(&descriptor)) + .and_then(|descriptor| parse_field_descriptor(&descriptor, 0)) .map_err(|e| err!(e, "type descriptor field"))?; let element_count = read_u2(bytes, ix)?; let mut elements = Vec::with_capacity(element_count.into()); @@ -1035,7 +1035,7 @@ fn read_record_data<'a>( fail!("Invalid unqualified name for entry {}", i); } let descriptor = read_cp_utf8(bytes, ix, pool) - .and_then(|descriptor| FieldType::parse(&descriptor)) + .and_then(|descriptor| parse_field_descriptor(&descriptor, 0)) .map_err(|e| err!(e, "descriptor of entry {}", i))?; let attributes = read_attributes(bytes, ix, pool, opts).map_err(|e| err!(e, "entry {}", i))?; diff --git a/src/constant_pool.rs b/src/constant_pool.rs index 2770fa6..c79f780 100644 --- a/src/constant_pool.rs +++ b/src/constant_pool.rs @@ -7,9 +7,9 @@ use std::ops::DerefMut; #[cfg(feature = "threadsafe")] use std::sync::Mutex; +use crate::descriptors::{is_array_descriptor, is_field_descriptor, is_method_descriptor}; use crate::names::{ - is_array_descriptor, is_binary_name, is_field_descriptor, is_method_descriptor, is_module_name, - is_unqualified_method_name, is_unqualified_name, + is_binary_name, is_module_name, is_unqualified_method_name, is_unqualified_name, }; use crate::{read_u1, read_u2, read_u4, read_u8, CafeRc, ParseError}; diff --git a/src/descriptor.rs b/src/descriptor.rs deleted file mode 100644 index 0a31f23..0000000 --- a/src/descriptor.rs +++ /dev/null @@ -1,500 +0,0 @@ -#![allow(clippy::ptr_arg)] - -use std::{ - borrow::Cow, - fmt::{self, Write}, - str::CharIndices, -}; - -use crate::ParseError; - -/// MethodDescriptor as described in section 4.3.3 of the [JVM 18 specification](https://docs.oracle.com/javase/specs/jvms/se18/html/jvms-4.html#jvms-4.3.3) -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct MethodDescriptor<'a> { - pub parameters: Vec>, - pub result: ReturnDescriptor<'a>, -} - -impl<'a> MethodDescriptor<'a> { - pub(crate) fn parse(chars: &Cow<'a, str>) -> Result { - let mut chars_idx = chars.char_indices(); - match chars_idx.next().map(|(_, ch)| ch) { - Some('(') => (), - Some(c) => fail!("Invalid start of method descriptor {}", c), - None => fail!("Invalid start of method descriptor, missing ("), - }; - - let mut parameters: Vec = Vec::new(); - - 'done: loop { - // preserve the next item for use in the FieldType parser - let field = match chars_idx.as_str().chars().next() { - Some(')') => { - chars_idx.next(); // consume the final ')' - break 'done; - } - Some(_) => FieldType::parse_from_chars_idx(chars, &mut chars_idx)?, - None => fail!("Invalid method descriptor, missing end )"), - }; - - parameters.push(field); - } - - let result = ReturnDescriptor::parse(chars, &mut chars_idx)?; - - Ok(MethodDescriptor { parameters, result }) - } -} - -impl<'a> fmt::Display for MethodDescriptor<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - f.write_char('(')?; - - for param in &self.parameters { - write!(f, "{}", param)?; - } - - write!(f, "){}", self.result) - } -} - -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub enum Ty<'a> { - Base(BaseType), - Object(Cow<'a, str>), -} - -impl<'a> fmt::Display for Ty<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - match self { - Self::Base(base) => write!(f, "{}", base), - Self::Object(obj) => write!(f, "L{};", obj), - } - } -} - -/// FieldType as described in section 4.3.2 of the [JVM 18 specification](https://docs.oracle.com/javase/specs/jvms/se18/html/jvms-4.html#jvms-4.3.2) -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub enum FieldType<'a> { - Ty(Ty<'a>), - Array { dimensions: usize, ty: Ty<'a> }, -} - -impl<'a> FieldType<'a> { - pub(crate) fn parse(chars: &Cow<'a, str>) -> Result { - let mut chars_idx = chars.char_indices(); - Self::parse_from_chars_idx(chars, &mut chars_idx) - } - - fn parse_from_chars_idx( - chars: &Cow<'a, str>, - chars_idx: &mut CharIndices, - ) -> Result { - let mut field = None::; - let mut array_depth = 0; - - while let Some(ch) = chars_idx.next().map(|(_, ch)| ch) { - match ch { - 'L' => { - field = Some(Ty::Object(parse_object(chars, chars_idx)?)); - break; - } - '[' => { - array_depth += 1; - - // A field descriptor representing an array type is valid only if it represents a type with 255 or fewer dimensions. - // see: https://docs.oracle.com/javase/specs/jvms/se18/html/jvms-4.html#jvms-4.3.2 - if array_depth > 255 { - fail!("Array exceeds 255 dimensions"); - } - } - ch => { - field = Some(Ty::Base(BaseType::parse(ch)?)); - break; - } - }; - } - - let field = field.ok_or_else(|| err!("FieldType not specified"))?; - if array_depth > 0 { - Ok(FieldType::Array { - dimensions: array_depth, - ty: field, - }) - } else { - Ok(FieldType::Ty(field)) - } - } -} - -impl<'a> fmt::Display for FieldType<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - match self { - Self::Ty(ty) => write!(f, "{}", ty), - Self::Array { dimensions, ty } => write!(f, "{}{}", "[".repeat(*dimensions), ty), - } - } -} - -/// BaseType as described in Table 4.3-A. of the [JVM 18 specification](https://docs.oracle.com/javase/specs/jvms/se18/html/jvms-4.html#jvms-4.3.2-200) -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum BaseType { - /// B, byte, signed byte - Byte, - /// C, char, Unicode character code point in the Basic Multilingual Plane, encoded with UTF-16 - Char, - /// D, double, double-precision floating-point value - Double, - /// F, float, single-precision floating-point value - Float, - /// I, int, integer - Int, - /// J, long, long integer - Long, - /// S, short, signed short - Short, - /// Z, boolean, true or false - Boolean, -} - -impl BaseType { - fn parse(ch: char) -> Result { - let this = match ch { - 'B' => Self::Byte, - 'C' => Self::Char, - 'D' => Self::Double, - 'F' => Self::Float, - 'I' => Self::Int, - 'J' => Self::Long, - 'S' => Self::Short, - 'Z' => Self::Boolean, - _ => fail!("Invalid base type {}", ch), - }; - - Ok(this) - } -} - -impl fmt::Display for BaseType { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - let c = match self { - Self::Byte => 'B', - Self::Char => 'C', - Self::Double => 'D', - Self::Float => 'F', - Self::Int => 'I', - Self::Long => 'J', - Self::Short => 'S', - Self::Boolean => 'Z', - }; - - f.write_char(c) - } -} - -/// ReturnDescriptor -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub enum ReturnDescriptor<'a> { - Return(FieldType<'a>), - Void, -} - -impl<'a> ReturnDescriptor<'a> { - fn parse(chars: &Cow<'a, str>, chars_idx: &mut CharIndices) -> Result { - // preserve the next item for use in the FieldType parser - let result = match chars_idx.as_str().chars().next() { - Some('V') => { - chars_idx.next(); // for correctness - Self::Void - } - Some(_) => Self::Return(FieldType::parse_from_chars_idx(chars, chars_idx)?), - None => fail!("Invalid return descriptor, missing value"), - }; - - Ok(result) - } -} - -impl<'a> fmt::Display for ReturnDescriptor<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - match self { - Self::Void => f.write_char('V'), - Self::Return(field) => write!(f, "{}", field), - } - } -} - -/// Parses the object less the beginning L, e.g. this expects `java/lang/Object;` -fn parse_object<'a>( - chars: &Cow<'a, str>, - chars_idx: &mut CharIndices, -) -> Result, ParseError> { - let start_idx = chars_idx - .next() - .map(|ch_idx| ch_idx.0) - .ok_or_else(|| err!("Invalid object descriptor, expected ;"))?; - - let end_idx = chars_idx - .find_map(|(idx, ch)| if ch == ';' { Some(idx) } else { None }) - .ok_or_else(|| err!("Invalid object descriptor, expected ;"))?; - - // Because a Cow can be either Borrowed or Owned, we need to create an Owned String in the case that it's not a reference. - // This should be rare, if ever. - let object = match *chars { - Cow::Borrowed(chars) => { - let object = chars - .get(start_idx..end_idx) - .ok_or_else(|| err!("Invalid object descriptor, out of bounds"))?; - Cow::Borrowed(object) - } - Cow::Owned(ref chars) => { - let object = chars - .get(start_idx..end_idx) - .ok_or_else(|| err!("Invalid object descriptor, out of bounds"))?; - Cow::Owned(object.to_string()) - } - }; - - Ok(object) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_void_void() { - let chars = Cow::from("()V"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert!(parameters.next().is_none()); - assert_eq!(result, ReturnDescriptor::Void); - } - - #[test] - fn test_single_param() { - let chars = Cow::from("(J)V"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Long)) - ); - assert!(parameters.next().is_none()); - assert_eq!(result, ReturnDescriptor::Void); - } - - #[test] - fn test_basetype_return() { - let chars = Cow::from("()J"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert!(parameters.next().is_none()); - assert_eq!( - result, - ReturnDescriptor::Return(FieldType::Ty(Ty::Base(BaseType::Long))) - ); - } - - #[test] - fn test_all_basetype_params() { - let chars = Cow::from("(BCDFIJSZ)V"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Byte)) - ); - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Char)) - ); - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Double)) - ); - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Float)) - ); - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Int)) - ); - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Long)) - ); - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Short)) - ); - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Base(BaseType::Boolean)) - ); - assert!(parameters.next().is_none()); - assert_eq!(result, ReturnDescriptor::Void); - } - - #[test] - fn test_object_param() { - let chars = Cow::from("(Ljava/lang/Object;)V"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Object(Cow::Borrowed("java/lang/Object"))) - ); - assert!(parameters.next().is_none()); - assert_eq!(result, ReturnDescriptor::Void); - } - - #[test] - fn test_owned_cow() { - let chars = Cow::from("(Ljava/lang/Object;)V".to_string()); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Object(Cow::Borrowed("java/lang/Object"))) - ); - assert!(parameters.next().is_none()); - assert_eq!(result, ReturnDescriptor::Void); - } - - #[test] - fn test_multi_object_param() { - let chars = Cow::from("(Ljava/lang/Object;Ljava/lang/String;)V"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Object(Cow::Borrowed("java/lang/Object"))) - ); - assert_eq!( - parameters.next().unwrap(), - FieldType::Ty(Ty::Object(Cow::Borrowed("java/lang/String"))) - ); - assert!(parameters.next().is_none()); - assert_eq!(result, ReturnDescriptor::Void); - } - - #[test] - fn test_object_return() { - let chars = Cow::from("()Ljava/lang/Object;"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert!(parameters.next().is_none()); - assert_eq!( - result, - ReturnDescriptor::Return(FieldType::Ty(Ty::Object(Cow::Borrowed("java/lang/Object")))) - ); - } - - #[test] - fn test_array_basetype_param() { - let chars = Cow::from("([J)V"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert_eq!( - parameters.next().unwrap(), - FieldType::Array { - dimensions: 1, - ty: Ty::Base(BaseType::Long) - } - ); - assert!(parameters.next().is_none()); - assert_eq!(result, ReturnDescriptor::Void); - } - - #[test] - fn test_multi_array_param() { - let chars = Cow::from("([[J)V"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert_eq!( - parameters.next().unwrap(), - FieldType::Array { - dimensions: 2, - ty: Ty::Base(BaseType::Long) - } - ); - assert!(parameters.next().is_none()); - assert_eq!(result, ReturnDescriptor::Void); - } - - #[test] - fn test_array_return() { - let chars = Cow::from("()[J"); - - let descriptor = MethodDescriptor::parse(&chars).unwrap(); - let mut parameters = descriptor.parameters.into_iter(); - let result = descriptor.result; - - assert!(parameters.next().is_none()); - assert_eq!( - result, - ReturnDescriptor::Return(FieldType::Array { - dimensions: 1, - ty: Ty::Base(BaseType::Long) - }) - ); - } - - #[test] - fn test_display() { - let descriptor = MethodDescriptor { - parameters: vec![ - FieldType::Ty(Ty::Base(BaseType::Long)), - FieldType::Ty(Ty::Object(Cow::Borrowed("java/lang/Object"))), - FieldType::Array { - dimensions: 2, - ty: Ty::Base(BaseType::Byte), - }, - ], - result: ReturnDescriptor::Void, - }; - - assert_eq!(descriptor.to_string(), "(JLjava/lang/Object;[[B)V"); - } - - #[test] - fn test_max_array_depth() { - let chars_ok = Cow::from(format!("({}J)V", "[".repeat(255))); - let chars_bad = Cow::from(format!("({}J)V", "[".repeat(256))); - - assert!(MethodDescriptor::parse(&chars_ok).is_ok()); - assert!(MethodDescriptor::parse(&chars_bad).is_err()); - } -} diff --git a/src/descriptors.rs b/src/descriptors.rs new file mode 100644 index 0000000..ee99f61 --- /dev/null +++ b/src/descriptors.rs @@ -0,0 +1,603 @@ +use std::borrow::Cow; + +use crate::ParseError; + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct UnqualifiedSegment<'a> { + pub name: Cow<'a, str>, +} + +// Returns the unqualified segment and the following char (either '/' or ';') +// or an error. This only extracts the unqualified segment at the start of +// the given data, and ignores anything following. +fn parse_unqualified_segment<'a>( + data: &Cow<'a, str>, + start_index: usize, +) -> Result<(UnqualifiedSegment<'a>, char), ParseError> { + for (ix, c) in data[start_index..].char_indices() { + match c { + '/' if ix == 0 => fail!("Unexpected / at start of unqualified segment"), + ';' if ix == 0 => fail!("Unexpected ; at start of unqualified segment"), + '/' | ';' => { + let name = match data { + Cow::Borrowed(borrowed_str) => { + Cow::Borrowed(&borrowed_str[start_index..start_index + ix]) + } + Cow::Owned(ref owned_str) => { + Cow::Owned(owned_str[start_index..start_index + ix].to_string()) + } + }; + let segment = UnqualifiedSegment { name }; + return Ok((segment, c)); + } + '.' | '[' | '<' | '>' => fail!("Disallowed character in unqualified segment"), + _ => (), + }; + } + fail!("Unterminated unqualified segment"); +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct ClassName<'a> { + pub segments: Vec>, +} + +impl<'a> ClassName<'a> { + fn byte_len(&self) -> usize { + self.segments + .iter() + .fold(0, |sum, segment| sum + segment.name.len() + 1) + } +} + +// Returns the classname descriptor at the start of the given data, and ignores anything following. +// Returns an error if there was no such classname. +fn parse_class_descriptor<'a>( + data: &Cow<'a, str>, + index: usize, +) -> Result, ParseError> { + let mut segments = vec![]; + let mut remaining_index = index; + loop { + match parse_unqualified_segment(data, remaining_index)? { + (segment, ';') => { + segments.push(segment); + return Ok(ClassName { segments }); + } + (segment, '/') => { + remaining_index += segment.name.len() + 1; + segments.push(segment); + continue; + } + _ => panic!("Got unexpected return value from parse_unqualified_segment"), + } + } +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub enum FieldType<'a> { + Byte, + Char, + Double, + Float, + Integer, + Long, + Short, + Boolean, + Object(ClassName<'a>), +} + +impl<'a> FieldType<'a> { + fn byte_len(&self) -> usize { + match self { + FieldType::Object(class_name) => 1 + class_name.byte_len(), + _ => 1, + } + } +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct FieldDescriptor<'a> { + pub dimensions: u8, + pub field_type: FieldType<'a>, +} + +impl<'a> FieldDescriptor<'a> { + fn byte_len(&self) -> usize { + (self.dimensions as usize) + self.field_type.byte_len() + } +} + +// Parse the field descriptor at the start of the given data, and ignores anything +// following. Returns an error if the data don't start with a field descriptor. +pub(crate) fn parse_field_descriptor<'a>( + data: &Cow<'a, str>, + index: usize, +) -> Result, ParseError> { + let mut dimensions: usize = 0; + for c in data[index..].chars() { + if c == '[' { + dimensions += 1; + if dimensions > 255 { + fail!("Dimensions in field descriptor exceeded allowed limit"); + } + continue; + } + let field_type = match c { + 'B' => FieldType::Byte, + 'C' => FieldType::Char, + 'D' => FieldType::Double, + 'F' => FieldType::Float, + 'I' => FieldType::Integer, + 'J' => FieldType::Long, + 'S' => FieldType::Short, + 'Z' => FieldType::Boolean, + 'L' => FieldType::Object(parse_class_descriptor(data, index + dimensions + 1)?), + _ => fail!("Unexpected field type"), + }; + return Ok(FieldDescriptor { + dimensions: dimensions as u8, + field_type, + }); + } + fail!("Empty string is not a field descriptor"); +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub enum ReturnDescriptor<'a> { + Return(FieldDescriptor<'a>), + Void, +} + +impl<'a> ReturnDescriptor<'a> { + fn byte_len(&self) -> usize { + match self { + Self::Return(d) => d.byte_len(), + Self::Void => 1, + } + } +} + +fn parse_return_descriptor<'a>( + data: &Cow<'a, str>, + index: usize, +) -> Result, ParseError> { + if &data[index..] == "V" { + Ok(ReturnDescriptor::Void) + } else { + Ok(ReturnDescriptor::Return(parse_field_descriptor( + data, index, + )?)) + } +} + +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct MethodDescriptor<'a> { + pub parameters: Vec>, + pub return_type: ReturnDescriptor<'a>, +} + +impl<'a> MethodDescriptor<'a> { + fn byte_len(&self) -> usize { + 1 + self + .parameters + .iter() + .fold(0, |sum, param| sum + param.byte_len()) + + 1 + + self.return_type.byte_len() + } +} + +pub(crate) fn parse_method_descriptor<'a>( + data: &Cow<'a, str>, + mut index: usize, +) -> Result, ParseError> { + let bytes = data.as_bytes(); + if bytes.len() <= index || bytes[index] != b'(' { + fail!("Method descriptor must start with '('") + } + index += 1; + let mut parameters = vec![]; + loop { + if bytes.len() > index && bytes[index] == b')' { + break; + } + let parameter = parse_field_descriptor(data, index)?; + index += parameter.byte_len(); + parameters.push(parameter); + } + index += 1; + let return_type = parse_return_descriptor(data, index)?; + Ok(MethodDescriptor { + parameters, + return_type, + }) +} + +pub(crate) fn is_field_descriptor(name: &str) -> bool { + match parse_field_descriptor(&Cow::Borrowed(name), 0) { + Ok(desc) => name.len() == desc.byte_len(), + Err(_) => false, + } +} + +pub(crate) fn is_array_descriptor(name: &str) -> bool { + is_field_descriptor(name) && name.as_bytes()[0] == b'[' +} + +pub(crate) fn is_method_descriptor(name: &str) -> bool { + match parse_method_descriptor(&Cow::Borrowed(name), 0) { + Ok(desc) => name.len() == desc.byte_len(), + Err(_) => false, + } +} + +pub(crate) fn is_return_descriptor(name: &str) -> bool { + match parse_return_descriptor(&Cow::Borrowed(name), 0) { + Ok(desc) => name.len() == desc.byte_len(), + Err(_) => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_field_descriptors() { + assert!(is_field_descriptor("I")); + assert!(is_field_descriptor("[Ljava;")); + assert!(is_field_descriptor("[Ljava/lang/Object;")); + assert!(is_field_descriptor("[[Z")); + + assert!(!is_field_descriptor("M")); + assert!(!is_field_descriptor("[[L;")); + assert!(!is_field_descriptor("[[Ljava/;")); + assert!(!is_field_descriptor("[[L/java;")); + assert!(!is_field_descriptor("[[Ljava")); + assert!(!is_field_descriptor("[[Ljava/lang/Object;stuff")); + assert!(!is_field_descriptor("Istuff")); + } + + #[test] + fn test_method_descriptors() { + assert!(is_method_descriptor("()V")); + assert!(is_method_descriptor("(II)V")); + assert!(is_method_descriptor("([Ljava/lang/Object;)V")); + assert!(is_method_descriptor("(Ljava/lang/Object;I)V")); + assert!(is_method_descriptor("()Ljava/lang/Obejct;")); + assert!(is_method_descriptor("()I")); + + assert!(!is_method_descriptor("(V)V")); + assert!(!is_method_descriptor("(")); + assert!(!is_method_descriptor(")")); + assert!(!is_method_descriptor("()")); + assert!(!is_method_descriptor("()VV")); + assert!(!is_method_descriptor("()II")); + assert!(!is_method_descriptor("()ILjava/lang/Object;")); + } + + #[test] + fn test_void_void() { + let chars = Cow::from("()V"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert!(parameters.next().is_none()); + assert_eq!(return_type, ReturnDescriptor::Void); + } + + #[test] + fn test_single_param() { + let chars = Cow::from("(J)V"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Long + }, + ); + assert!(parameters.next().is_none()); + assert_eq!(return_type, ReturnDescriptor::Void); + } + + #[test] + fn test_basetype_return() { + let chars = Cow::from("()J"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert!(parameters.next().is_none()); + assert_eq!( + return_type, + ReturnDescriptor::Return(FieldDescriptor { + dimensions: 0, + field_type: FieldType::Long + }), + ); + } + + #[test] + fn test_all_basetype_params() { + let chars = Cow::from("(BCDFIJSZ)V"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Byte + }, + ); + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Char + }, + ); + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Double + }, + ); + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Float + }, + ); + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Integer + }, + ); + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Long + }, + ); + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Short + }, + ); + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Boolean + }, + ); + assert!(parameters.next().is_none()); + assert_eq!(return_type, ReturnDescriptor::Void); + } + + #[test] + fn test_object_param() { + let chars = Cow::from("(Ljava/lang/Object;)V"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Object(ClassName { + segments: vec![ + UnqualifiedSegment { + name: Cow::Borrowed("java") + }, + UnqualifiedSegment { + name: Cow::Borrowed("lang") + }, + UnqualifiedSegment { + name: Cow::Borrowed("Object") + }, + ], + }), + }, + ); + assert!(parameters.next().is_none()); + assert_eq!(return_type, ReturnDescriptor::Void); + } + + #[test] + fn test_owned_cow() { + let chars = Cow::from("(Ljava/lang/Object;)V".to_string()); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Object(ClassName { + segments: vec![ + UnqualifiedSegment { + name: Cow::Borrowed("java") + }, + UnqualifiedSegment { + name: Cow::Borrowed("lang") + }, + UnqualifiedSegment { + name: Cow::Borrowed("Object") + }, + ], + }), + }, + ); + assert!(parameters.next().is_none()); + assert_eq!(return_type, ReturnDescriptor::Void); + } + + #[test] + fn test_multi_object_param() { + let chars = Cow::from("(Ljava/lang/Object;Ljava/lang/String;)V"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Object(ClassName { + segments: vec![ + UnqualifiedSegment { + name: Cow::Borrowed("java") + }, + UnqualifiedSegment { + name: Cow::Borrowed("lang") + }, + UnqualifiedSegment { + name: Cow::Borrowed("Object") + }, + ], + }), + }, + ); + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 0, + field_type: FieldType::Object(ClassName { + segments: vec![ + UnqualifiedSegment { + name: Cow::Borrowed("java") + }, + UnqualifiedSegment { + name: Cow::Borrowed("lang") + }, + UnqualifiedSegment { + name: Cow::Borrowed("String") + }, + ], + }), + }, + ); + assert!(parameters.next().is_none()); + assert_eq!(return_type, ReturnDescriptor::Void); + } + + #[test] + fn test_object_return() { + let chars = Cow::from("()Ljava/lang/Object;"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert!(parameters.next().is_none()); + assert_eq!( + return_type, + ReturnDescriptor::Return(FieldDescriptor { + dimensions: 0, + field_type: FieldType::Object(ClassName { + segments: vec![ + UnqualifiedSegment { + name: Cow::Borrowed("java") + }, + UnqualifiedSegment { + name: Cow::Borrowed("lang") + }, + UnqualifiedSegment { + name: Cow::Borrowed("Object") + }, + ], + }), + }), + ); + } + + #[test] + fn test_array_basetype_param() { + let chars = Cow::from("([J)V"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 1, + field_type: FieldType::Long, + } + ); + assert!(parameters.next().is_none()); + assert_eq!(return_type, ReturnDescriptor::Void); + } + + #[test] + fn test_multi_array_param() { + let chars = Cow::from("([[J)V"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert_eq!( + parameters.next().unwrap(), + FieldDescriptor { + dimensions: 2, + field_type: FieldType::Long, + } + ); + assert!(parameters.next().is_none()); + assert_eq!(return_type, ReturnDescriptor::Void); + } + + #[test] + fn test_array_return() { + let chars = Cow::from("()[J"); + + let descriptor = parse_method_descriptor(&chars, 0).unwrap(); + let mut parameters = descriptor.parameters.into_iter(); + let return_type = descriptor.return_type; + + assert!(parameters.next().is_none()); + assert_eq!( + return_type, + ReturnDescriptor::Return(FieldDescriptor { + dimensions: 1, + field_type: FieldType::Long, + }) + ); + } + + #[test] + fn test_max_array_depth() { + let chars_ok = Cow::from(format!("({}J)V", "[".repeat(255))); + let chars_bad = Cow::from(format!("({}J)V", "[".repeat(256))); + + assert!(parse_method_descriptor(&chars_ok, 0).is_ok()); + assert!(parse_method_descriptor(&chars_bad, 0).is_err()); + } +} diff --git a/src/lib.rs b/src/lib.rs index 234c6df..db4c2eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,7 @@ pub mod error; pub mod attributes; pub mod bytecode; pub mod constant_pool; -pub mod descriptor; +pub mod descriptors; pub mod names; use std::borrow::Cow; @@ -29,7 +29,10 @@ use crate::constant_pool::{ read_constant_pool, read_cp_classinfo, read_cp_classinfo_opt, read_cp_utf8, ConstantPoolEntry, ConstantPoolIter, }; -use crate::descriptor::{FieldType, MethodDescriptor, ReturnDescriptor}; +use crate::descriptors::{ + parse_field_descriptor, parse_method_descriptor, FieldDescriptor, MethodDescriptor, + ReturnDescriptor, +}; pub use crate::error::ParseError; use crate::names::{is_unqualified_method_name, is_unqualified_name}; @@ -147,7 +150,7 @@ bitflags! { pub struct FieldInfo<'a> { pub access_flags: FieldAccessFlags, pub name: Cow<'a, str>, - pub descriptor: FieldType<'a>, + pub descriptor: FieldDescriptor<'a>, pub attributes: Vec>, } @@ -159,7 +162,7 @@ fn read_fields<'a>( ) -> Result>, ParseError> { let count = read_u2(bytes, ix)?; let mut fields = Vec::with_capacity(count.into()); - let mut unique_ids: HashSet<(Cow<'a, str>, FieldType<'a>)> = HashSet::new(); + let mut unique_ids: HashSet<(Cow<'a, str>, FieldDescriptor<'a>)> = HashSet::new(); for i in 0..count { let access_flags = FieldAccessFlags::from_bits_truncate(read_u2(bytes, ix)?); let name = @@ -169,7 +172,7 @@ fn read_fields<'a>( } let descriptor = read_cp_utf8(bytes, ix, pool) .map_err(|e| err!(e, "descriptor of class field {}", i))?; - let descriptor = FieldType::parse(&descriptor)?; + let descriptor = parse_field_descriptor(&descriptor, 0)?; let unique_id = (name.clone(), descriptor.clone()); if !unique_ids.insert(unique_id) { @@ -235,14 +238,14 @@ fn read_methods<'a>( fail!("Invalid unqualified name for class method {}", i); } let descriptor = read_cp_utf8(bytes, ix, pool) + .and_then(|d| parse_method_descriptor(&d, 0)) .map_err(|e| err!(e, "descriptor of class method {}", i))?; - let descriptor = MethodDescriptor::parse(&descriptor)?; - if allow_init && name == "" && descriptor.result != ReturnDescriptor::Void { + if allow_init && name == "" && descriptor.return_type != ReturnDescriptor::Void { fail!("Non-void method descriptor for init method {}", i); } if name == "" { - if descriptor.result != ReturnDescriptor::Void { + if descriptor.return_type != ReturnDescriptor::Void { fail!("Non-void method descriptor for clinit method {}", i); } if major_version >= 51 && !descriptor.parameters.is_empty() { diff --git a/src/names.rs b/src/names.rs index 4708859..5af8ac6 100644 --- a/src/names.rs +++ b/src/names.rs @@ -1,5 +1,3 @@ -use std::str::Chars; - pub(crate) fn is_binary_name(name: &str) -> bool { for segment in name.split('/') { if !is_unqualified_name(segment) { @@ -50,140 +48,3 @@ pub(crate) fn is_module_name(name: &str) -> bool { } true } - -/// Returns None if there was a parse error. -/// Returns Some('/') for an unqualified segment followed by / -/// Returns Some(';') for an unqualified segment followed by ; -fn consume_unqualified_segment(chars: &mut Chars) -> Option { - let mut first = true; - for c in chars { - match c { - '/' if first => return None, - ';' if first => return None, - '/' | ';' => return Some(c), - '.' | '[' | '<' | '>' => return None, - _ => first = false, - }; - } - None -} - -/// Returns false if there was a parse error. -/// Returns true if exactly one class descriptor (including terminating semicolon) was consumed. -fn consume_class_descriptor(chars: &mut Chars) -> bool { - loop { - match consume_unqualified_segment(chars) { - None => return false, - Some(';') => return true, - Some('/') => continue, - _ => panic!("Got unexpected return value from consume_unqualified_segment"), - }; - } -} - -/// Returns (false, None) if there was a parse error. -/// Returns (false, Some(x)) if a field descriptor was not read, but a character x was consumed. -/// Returns (true, None) after consuming exactly one field descriptor. -fn consume_field_descriptor(chars: &mut Chars) -> (bool, Option) { - let mut dimensions = 0; - while let Some(c) = chars.next() { - match c { - '[' => { - dimensions += 1; - if dimensions > 255 { - return (false, None); - } - continue; - } - 'B' | 'C' | 'D' | 'F' | 'I' | 'J' | 'S' | 'Z' => (), - 'L' => { - if !consume_class_descriptor(chars) { - return (false, None); - } - } - _ => return (false, Some(c)), - }; - return (true, None); - } - (false, None) -} - -pub(crate) fn is_field_descriptor(name: &str) -> bool { - let mut chars = name.chars(); - match consume_field_descriptor(&mut chars) { - (false, _) => false, - (true, _) => chars.next().is_none(), - } -} - -pub(crate) fn is_array_descriptor(name: &str) -> bool { - is_field_descriptor(name) && name.as_bytes()[0] == b'[' -} - -pub(crate) fn is_method_descriptor(name: &str) -> bool { - let mut chars = name.chars(); - if chars.next() != Some('(') { - return false; - } - loop { - match consume_field_descriptor(&mut chars) { - (false, Some(')')) => break, - (false, _) => return false, - (true, _) => continue, - } - } - // ')' was already consumed, so now consume the return descriptor - match consume_field_descriptor(&mut chars) { - (false, Some('V')) => chars.next().is_none(), - (false, _) => false, - (true, _) => chars.next().is_none(), - } -} - -pub(crate) fn is_return_descriptor(name: &str) -> bool { - let mut chars = name.chars(); - match consume_field_descriptor(&mut chars) { - (false, Some('V')) => chars.next().is_none(), - (false, _) => false, - (true, _) => chars.next().is_none(), - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_field_descriptors() { - assert!(is_field_descriptor("I")); - assert!(is_field_descriptor("[Ljava;")); - assert!(is_field_descriptor("[Ljava/lang/Object;")); - assert!(is_field_descriptor("[[Z")); - - assert!(!is_field_descriptor("M")); - assert!(!is_field_descriptor("[[L;")); - assert!(!is_field_descriptor("[[Ljava/;")); - assert!(!is_field_descriptor("[[L/java;")); - assert!(!is_field_descriptor("[[Ljava")); - assert!(!is_field_descriptor("[[Ljava/lang/Object;stuff")); - assert!(!is_field_descriptor("Istuff")); - } - - #[test] - fn test_method_descriptors() { - assert!(is_method_descriptor("()V")); - assert!(is_method_descriptor("(II)V")); - assert!(is_method_descriptor("([Ljava/lang/Object;)V")); - assert!(is_method_descriptor("(Ljava/lang/Object;I)V")); - assert!(is_method_descriptor("()Ljava/lang/Obejct;")); - assert!(is_method_descriptor("()I")); - - assert!(!is_method_descriptor("(V)V")); - assert!(!is_method_descriptor("(")); - assert!(!is_method_descriptor(")")); - assert!(!is_method_descriptor("()")); - assert!(!is_method_descriptor("()VV")); - assert!(!is_method_descriptor("()II")); - assert!(!is_method_descriptor("()ILjava/lang/Object;")); - } -}