Skip to content

Commit

Permalink
Merge pull request #13 from FuzzingLabs/dev/antonin
Browse files Browse the repository at this point in the history
Improve the decompiler output for remote contracts
  • Loading branch information
Rog3rSm1th authored Jun 3, 2024
2 parents 491bcb0 + a088638 commit 0f5d153
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 13 deletions.
20 changes: 12 additions & 8 deletions lib/src/decompiler/decompiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::decompiler::cfg::EdgeType;
use crate::decompiler::function::Function;
use crate::decompiler::function::SierraStatement;
use crate::decompiler::libfuncs_patterns::{IS_ZERO_REGEX, USER_DEFINED_FUNCTION_REGEX};
use crate::decompiler::utils::replace_types_id;
use crate::parse_element_name;
use crate::parse_element_name_with_fallback;
use crate::sierra_program::SierraProgram;
Expand All @@ -30,9 +31,9 @@ pub struct Decompiler<'a> {
/// The function we are currently working on
current_function: Option<Function<'a>>,
/// Names of all declared types (in order)
declared_types_names: Vec<String>,
pub declared_types_names: Vec<String>,
/// Names of all declared libfuncs (in order)
declared_libfuncs_names: Vec<String>,
pub declared_libfuncs_names: Vec<String>,
/// Enable / disable the verbose output
/// Some statements are not included in the regular output to improve the readability
verbose: bool,
Expand Down Expand Up @@ -552,11 +553,13 @@ impl<'a> Decompiler<'a> {
// Add the formatted statements to the block
// Some statements are only included in the verbose output
//
// We pass it the declared libfunc names to allow the method to reconstruct function calls
// For remote contracts
if let Some(formatted_statement) = statement
.formatted_statement(self.verbose, self.declared_libfuncs_names.clone())
{
// We pass it the declared libfunc names & types names to allow the method
// to reconstruct function calls & used types for remote contracts
if let Some(formatted_statement) = statement.formatted_statement(
self.verbose,
self.declared_libfuncs_names.clone(),
self.declared_types_names.clone(),
) {
decompiled_basic_block += &format!("{}{}\n", indentation, formatted_statement);
}
}
Expand Down Expand Up @@ -589,7 +592,8 @@ impl<'a> Decompiler<'a> {
format!(
"{}if ({}({}) == 0) {}{}\n",
indentation_str,
function_name,
// Recover the type from type_id if it's a remote contract
replace_types_id(&self.declared_types_names, function_name).blue(),
function_arguments,
bold_brace_open,
"\t".repeat(indentation + 1) // Adjust for nested content indentation
Expand Down
15 changes: 14 additions & 1 deletion lib/src/decompiler/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::decompiler::libfuncs_patterns::{
VARIABLE_ASSIGNMENT_REGEX,
};
use crate::decompiler::utils::decode_hex_bigint;
use crate::decompiler::utils::replace_types_id;
use crate::extract_parameters;
use crate::parse_element_name;
use crate::parse_element_name_with_fallback;
Expand Down Expand Up @@ -55,6 +56,7 @@ impl SierraStatement {
&self,
verbose: bool,
declared_libfuncs_names: Vec<String>,
declared_types_names: Vec<String>,
) -> Option<String> {
match &self.statement {
// Return statements
Expand Down Expand Up @@ -110,6 +112,7 @@ impl SierraStatement {
&libfunc_id_str,
&parameters,
&verbose,
&declared_types_names,
))
}
}
Expand All @@ -125,6 +128,7 @@ impl SierraStatement {
match function_name {
"branch_align"
| "disable_ap_tracking"
| "enable_ap_tracking"
| "finalize_locals"
| "revoke_ap_tracking"
| "get_builtin_costs" => false,
Expand All @@ -145,7 +149,12 @@ impl SierraStatement {
libfunc_id_str: &str,
parameters: &[String],
verbose: &bool,
declared_types_names: &Vec<String>,
) -> String {
// Replace types id in libfuncs names by their types names equivalents in remote contracts
let binding = replace_types_id(declared_types_names, &libfunc_id_str);
let libfunc_id_str = binding.as_str();

// Join parameters for general use
let parameters_str = parameters.join(", ");

Expand Down Expand Up @@ -205,11 +214,15 @@ impl SierraStatement {
if let Some(captures) = NEW_ARRAY_REGEX.captures(libfunc_id_str) {
if let Some(array_type) = captures.get(1) {
let formatted_array_type = array_type.as_str();

let final_array_type = formatted_array_type;

// Return the formatted array declaration string
return format!(
"{} = {}<{}>::{}()",
assigned_variables_str,
"Array".blue(),
formatted_array_type,
final_array_type,
"new".blue()
);
}
Expand Down
10 changes: 8 additions & 2 deletions lib/src/decompiler/libfuncs_patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,17 @@ lazy_static! {
// Variable renaming
pub static ref VARIABLE_ASSIGNMENT_REGEX: Vec<Regex> = vec![
Regex::new(r"rename<.+>").unwrap(),
Regex::new(r"store_temp<.+>").unwrap()
Regex::new(r"store_temp<.+>").unwrap(),
Regex::new(r"store_local<.+>").unwrap(),
Regex::new(r"unbox<.+>").unwrap()
];

// Check if an integer is 0
pub static ref IS_ZERO_REGEX: Regex = Regex::new(r"(felt|u)_?(8|16|32|64|128|252)_is_zero").unwrap();

// Consts declarations
pub static ref CONST_REGEXES: Vec<Regex> = vec![
Regex::new(r"const_as_immediate<Const<.+, (?P<const>-?[0-9]+)>>").unwrap(),
Regex::new(r"const_as_immediate<Const<.*, (?P<const>-?[0-9]+)>>").unwrap(),
Regex::new(r"storage_base_address_const<(?P<const>-?[0-9]+)>").unwrap(),
Regex::new(r"(felt|u)_?(8|16|32|64|128|252)_const<(?P<const>-?[0-9]+)>").unwrap(),
];
Expand All @@ -48,4 +50,8 @@ lazy_static! {
// Array declarations & mutations
pub static ref NEW_ARRAY_REGEX: Regex = Regex::new(r"array_new<(?P<array_type>.+)>").unwrap();
pub static ref ARRAY_APPEND_REGEX: Regex = Regex::new(r"array_append<(.+)>").unwrap();

// Regex of a type ID
// Used to match and replace them in remote contracts
pub static ref TYPE_ID_REGEX: Regex = Regex::new(r"(?<type_id>\[[0-9]+\])").unwrap();
}
39 changes: 39 additions & 0 deletions lib/src/decompiler/utils.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::decompiler::libfuncs_patterns::TYPE_ID_REGEX;
use num_bigint::BigInt;
use std::str;

Expand All @@ -18,3 +19,41 @@ pub fn decode_hex_bigint(bigint: &BigInt) -> Option<String> {

string
}

/// Replaces type IDs in the given invocation string with the corresponding type names from the declared_types_names list
/// If there are no matches or if there is an error in the process, the original string is returned
pub fn replace_types_id(declared_types_names: &Vec<String>, invocation: &str) -> String {
// Use the TYPE_ID_REGEX to replace all matches in the invocation string
TYPE_ID_REGEX
.replace_all(&invocation, |caps: &regex::Captures| {
// Get the type ID from the capture group
caps.name("type_id")
// Parse the type ID as a usize, if possible
.and_then(|type_id| {
let type_id_str = type_id.as_str();
// Check if the type ID is not preceded by "user@" (to avoid mistakes w/ user defined functions)
if !caps
.get(0)
.unwrap()
.start()
.checked_sub(5)
.map_or(false, |i| &invocation[i..i + 5] == "user@")
{
// If the type ID is not preceded by "user@", parse it as a usize
type_id_str
.trim_matches(|c| c == '[' || c == ']')
.parse::<usize>()
.ok()
} else {
// If the type ID is preceded by "user@", return None
None
}
})
// Use the parsed type ID as an index into the declared_types_names list
.and_then(|index| declared_types_names.get(index).cloned())
// If there was an error, return the original type ID
.unwrap_or_else(|| caps[0].to_string())
})
// Convert the result to a string
.to_string()
}
23 changes: 21 additions & 2 deletions lib/src/detectors/strings_detector.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::decompiler::decompiler::Decompiler;
use crate::decompiler::libfuncs_patterns::CONST_REGEXES;
use crate::decompiler::utils::decode_hex_bigint;
use crate::decompiler::utils::replace_types_id;
use crate::detectors::detector::{Detector, DetectorType};
use crate::parse_element_name;

Expand Down Expand Up @@ -58,6 +59,20 @@ impl Detector for StringsDetector {
// Parse the ID of the invoked library function
let libfunc_id_str = parse_element_name!(invocation.libfunc_id);

// If the libfunc id is an integer
let libfunc_id_str = if let Ok(index) = libfunc_id_str.parse::<usize>() {
// If it's a remote contract we try to convert the types IDs to their equivalents types names
if let Some(libfunc_name) =
decompiler.declared_libfuncs_names.get(index)
{
replace_types_id(&decompiler.declared_types_names, libfunc_name)
} else {
continue;
}
} else {
parse_element_name!(invocation.libfunc_id)
};

// Iterate over the CONST_REGEXES and check if the input string matches
for regex in CONST_REGEXES.iter() {
if let Some(captures) = regex.captures(&libfunc_id_str) {
Expand All @@ -72,8 +87,12 @@ impl Detector for StringsDetector {
if let Some(decoded_string) =
decode_hex_bigint(&const_value_bigint)
{
// Add the decoded string to the set
extracted_strings.insert(decoded_string);
// Check if the string is not empty, not whitespace, and contains printable characters
if !decoded_string.trim().is_empty()
&& decoded_string.chars().any(|c| c.is_ascii_graphic())
{
extracted_strings.insert(decoded_string);
}
}
}
}
Expand Down

0 comments on commit 0f5d153

Please sign in to comment.