From e583b53e1b051620c01a5c043039cff7c7285197 Mon Sep 17 00:00:00 2001 From: Kartikaya Gupta Date: Wed, 28 Aug 2024 18:31:31 -0400 Subject: [PATCH] Add stronger typing for opcodes with classinfo This is from section 4.4.1 which allows classinfo to reference binary names or array type descriptors, except for the new opcode which is only allowed binary names. --- src/bytecode.rs | 33 ++++++++++++++++++++------------- src/constant_pool.rs | 29 ++++++++++++++++++++++++++++- src/descriptors.rs | 13 +++++++++++++ 3 files changed, 61 insertions(+), 14 deletions(-) diff --git a/src/bytecode.rs b/src/bytecode.rs index b9d67e1..cbacad5 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -1,11 +1,10 @@ -use std::borrow::Cow; use std::convert::TryFrom; use crate::constant_pool::{ - get_cp_loadable, read_cp_classinfo, read_cp_invokedynamic, read_cp_memberref, + get_cp_loadable, read_cp_invokedynamic, read_cp_memberref, read_cp_object_array_type, }; use crate::constant_pool::{ - ConstantPoolEntry, ConstantPoolEntryTypes, InvokeDynamic, Loadable, MemberRef, + ConstantPoolEntry, ConstantPoolEntryTypes, InvokeDynamic, Loadable, MemberRef, ObjectArrayType, }; use crate::{read_u1, read_u2, read_u4, CafeRc, ParseError}; @@ -43,7 +42,7 @@ pub enum Opcode<'a> { Aastore, AconstNull, Aload(u16), // both wide and narrow - Anewarray(Cow<'a, str>), + Anewarray(ObjectArrayType<'a>), Areturn, Arraylength, Astore(u16), // both wide and narrow @@ -54,7 +53,7 @@ pub enum Opcode<'a> { Breakpoint, Caload, Castore, - Checkcast(Cow<'a, str>), + Checkcast(ObjectArrayType<'a>), D2f, D2i, D2l, @@ -141,7 +140,7 @@ pub enum Opcode<'a> { Impdep2, Imul, Ineg, - Instanceof(Cow<'a, str>), + Instanceof(ObjectArrayType<'a>), Invokedynamic(InvokeDynamic<'a>), Invokeinterface(MemberRef<'a>, u8), Invokespecial(MemberRef<'a>), @@ -186,8 +185,8 @@ pub enum Opcode<'a> { Lxor, Monitorenter, Monitorexit, - Multianewarray(Cow<'a, str>, u8), - New(Cow<'a, str>), + Multianewarray(ObjectArrayType<'a>, u8), + New(ObjectArrayType<'a>), Newarray(PrimitiveArrayType), Nop, Pop, @@ -614,7 +613,15 @@ fn read_opcodes<'a>( } Opcode::Invokedynamic(invokedynamic) } - 0xbb => Opcode::New(read_cp_classinfo(code, &mut ix, pool)?), + 0xbb => { + let object_array_type = match read_cp_object_array_type(code, &mut ix, pool)? { + ObjectArrayType::ArrayType(_) => { + fail!("Array types not allowed for new opcode at index {}", ix - 2) + } + ObjectArrayType::BinaryName(name) => ObjectArrayType::BinaryName(name), + }; + Opcode::New(object_array_type) + } 0xbc => { let primitive_type = match read_u1(code, &mut ix)? { 4 => PrimitiveArrayType::Boolean, @@ -632,11 +639,11 @@ fn read_opcodes<'a>( }; Opcode::Newarray(primitive_type) } - 0xbd => Opcode::Anewarray(read_cp_classinfo(code, &mut ix, pool)?), + 0xbd => Opcode::Anewarray(read_cp_object_array_type(code, &mut ix, pool)?), 0xbe => Opcode::Arraylength, 0xbf => Opcode::Athrow, - 0xc0 => Opcode::Checkcast(read_cp_classinfo(code, &mut ix, pool)?), - 0xc1 => Opcode::Instanceof(read_cp_classinfo(code, &mut ix, pool)?), + 0xc0 => Opcode::Checkcast(read_cp_object_array_type(code, &mut ix, pool)?), + 0xc1 => Opcode::Instanceof(read_cp_object_array_type(code, &mut ix, pool)?), 0xc2 => Opcode::Monitorenter, 0xc3 => Opcode::Monitorexit, 0xc4 => { @@ -662,7 +669,7 @@ fn read_opcodes<'a>( } } 0xc5 => Opcode::Multianewarray( - read_cp_classinfo(code, &mut ix, pool)?, + read_cp_object_array_type(code, &mut ix, pool)?, read_u1(code, &mut ix)?, ), 0xc6 => Opcode::Ifnull((read_u2(code, &mut ix)? as i16).into()), diff --git a/src/constant_pool.rs b/src/constant_pool.rs index c79f780..3721b02 100644 --- a/src/constant_pool.rs +++ b/src/constant_pool.rs @@ -7,7 +7,10 @@ use std::ops::DerefMut; #[cfg(feature = "threadsafe")] use std::sync::Mutex; -use crate::descriptors::{is_array_descriptor, is_field_descriptor, is_method_descriptor}; +use crate::descriptors::FieldDescriptor; +use crate::descriptors::{ + is_array_descriptor, is_field_descriptor, is_method_descriptor, parse_array_descriptor, +}; use crate::names::{ is_binary_name, is_module_name, is_unqualified_method_name, is_unqualified_name, }; @@ -1071,6 +1074,30 @@ pub(crate) fn read_cp_bootstrap_argument<'a>( } } +#[derive(Clone, Debug)] +pub enum ObjectArrayType<'a> { + ArrayType(FieldDescriptor<'a>), + BinaryName(Cow<'a, str>), +} + +pub(crate) fn read_cp_object_array_type<'a>( + bytes: &'a [u8], + ix: &mut usize, + pool: &[CafeRc>], +) -> Result, ParseError> { + let cp_ref = read_cp_ref_any(bytes, ix, pool)?; + match cp_ref.deref() { + ConstantPoolEntry::ClassInfo(x) => { + let name = peel!(x).utf8(); + match parse_array_descriptor(&name)? { + Some(desc) => Ok(ObjectArrayType::ArrayType(desc)), + None => Ok(ObjectArrayType::BinaryName(name)), + } + } + _ => fail!("Unexpected constant pool reference type"), + } +} + #[derive(Debug)] pub enum ConstantPoolItem<'a> { LiteralConstant(LiteralConstant<'a>), diff --git a/src/descriptors.rs b/src/descriptors.rs index ee99f61..96f543b 100644 --- a/src/descriptors.rs +++ b/src/descriptors.rs @@ -214,6 +214,19 @@ pub(crate) fn parse_method_descriptor<'a>( }) } +pub(crate) fn parse_array_descriptor<'a>( + data: &Cow<'a, str>, +) -> Result>, ParseError> { + if data.len() == 0 || data.as_bytes()[0] != b'[' { + return Ok(None); + } + let desc = parse_field_descriptor(data, 0)?; + if data.len() != desc.byte_len() { + fail!("Not a field descriptor") + } + Ok(Some(desc)) +} + pub(crate) fn is_field_descriptor(name: &str) -> bool { match parse_field_descriptor(&Cow::Borrowed(name), 0) { Ok(desc) => name.len() == desc.byte_len(),