Skip to content

Commit

Permalink
fix b2s wide/utf-8 string handling via workaround
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-tz committed Nov 9, 2023
1 parent d49c6cf commit a7b7ea3
Showing 1 changed file with 48 additions and 7 deletions.
55 changes: 48 additions & 7 deletions floss/language/rust/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pathlib
import argparse
import itertools
from typing import List, Tuple, Iterable
from typing import List, Tuple, Iterable, Optional

import pefile
import binary2strings as b2s
Expand All @@ -25,6 +25,41 @@ def get_rdata_section(pe: pefile.PE) -> pefile.SectionStructure:
raise ValueError("no .rdata section found")


def fix_b2s_wide_strings(
strings: List[Tuple[str, str, Tuple[int, int], bool]], min_length: int, buffer: bytes
) -> List[Tuple[str, str, Tuple[int, int], bool]]:
# TODO(mr-tz): b2s may parse wide strings where there really should be utf-8 strings
# handle special cases here until fixed
# https://github.com/mandiant/flare-floss/issues/867
fixed_strings: List[Tuple[str, str, Tuple[int, int], bool]] = list()
last_fixup: Optional[Tuple[str, str, Tuple[int, int], bool]] = None
for string in strings:
s = string[0]
string_type = string[1]
start = string[2][0]

if string_type == "WIDE_STRING":
sd = s.encode("utf-16le", "ignore")
# utf-8 strings will not start with \x00
if sd[0] == 0:
new_string = b2s.extract_string(buffer[start + 1 :])
last_fixup = (
new_string[0],
new_string[1],
(new_string[2][0] + start + 1, new_string[2][1] + start + 1),
new_string[3],
)
if len(last_fixup[0]) < min_length:
last_fixup = None
else:
if last_fixup and s in last_fixup[0]:
fixed_strings.append(last_fixup)
else:
fixed_strings.append(string)
last_fixup = None
return fixed_strings


def filter_and_transform_utf8_strings(
strings: List[Tuple[str, str, Tuple[int, int], bool]],
start_rdata: int,
Expand All @@ -46,7 +81,7 @@ def filter_and_transform_utf8_strings(
return transformed_strings


def split_strings(static_strings: List[StaticString], address: int) -> None:
def split_strings(static_strings: List[StaticString], address: int, min_length: int) -> None:
"""
if address is in between start and end of a string in ref data then split the string
this modifies the elements of the static strings list directly
Expand All @@ -57,8 +92,12 @@ def split_strings(static_strings: List[StaticString], address: int) -> None:
rust_string = string.string[0 : address - string.offset]
rest = string.string[address - string.offset :]

static_strings.append(StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8))
static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8))
if len(rust_string) >= min_length:
static_strings.append(
StaticString(string=rust_string, offset=string.offset, encoding=StringEncoding.UTF8)
)
if len(rest) >= min_length:
static_strings.append(StaticString(string=rest, offset=address, encoding=StringEncoding.UTF8))

# remove string from static_strings
for static_string in static_strings:
Expand Down Expand Up @@ -97,12 +136,14 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
end_rdata = start_rdata + rdata_section.SizeOfRawData
virtual_address = rdata_section.VirtualAddress
pointer_to_raw_data = rdata_section.PointerToRawData
buffer_rdata = rdata_section.get_data()

# extract utf-8 and wide strings, latter not needed here
strings = b2s.extract_all_strings(rdata_section.get_data(), min_length)
strings = b2s.extract_all_strings(buffer_rdata, min_length)
fixed_strings = fix_b2s_wide_strings(strings, min_length, buffer_rdata)

# select only UTF-8 strings and adjust offset
static_strings = filter_and_transform_utf8_strings(strings, start_rdata)
static_strings = filter_and_transform_utf8_strings(fixed_strings, start_rdata)

struct_string_addrs = map(lambda c: c.address, get_struct_string_candidates(pe))

Expand All @@ -126,7 +167,7 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt
if not (start_rdata <= address < end_rdata):
continue

split_strings(static_strings, address)
split_strings(static_strings, address, min_length)

return static_strings

Expand Down

0 comments on commit a7b7ea3

Please sign in to comment.