Skip to content

Commit

Permalink
Revert "Optimize pattern sub-matches"
Browse files Browse the repository at this point in the history
This reverts commit 6f4badc.
  • Loading branch information
stevemk14ebr committed Aug 19, 2024
1 parent e3a2297 commit 1b96965
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 59 deletions.
62 changes: 8 additions & 54 deletions objfile/patterns.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package objfile

import (
"errors"
"sort"
"strconv"
"strings"

Expand Down Expand Up @@ -239,48 +238,11 @@ func RegexpPatternFromYaraPattern(pattern string) (*RegexAndNeedle, error) {
return &RegexAndNeedle{patLen, regex_pattern, r, needleOffset, needle}, nil
}

func getOrSetRegion(regionMap map[int]map[int]bool, start, end int) bool {
if ends, ok := regionMap[start]; ok {
if ends[end] {
return true
} else {
ends[end] = true
return false
}
} else {
regionMap[start] = map[int]bool{end: true}
return false
}
}

func regionMapToSlices(regionMap map[int]map[int]bool) [][]int {
totalSize := 0
keys := make([]int, 0, len(regionMap))
for key, valueMap := range regionMap {
keys = append(keys, key)
totalSize += len(valueMap)
}
sort.Ints(keys)
result := make([][]int, 0, totalSize)
for _, key := range keys {
values := make([]int, 0, len(regionMap[key]))
for value := range regionMap[key] {
values = append(values, value)
}
sort.Ints(values)
for _, value := range values {
result = append(result, []int{key, value})
}
}
return result
}

func FindRegex(data []byte, regexInfo *RegexAndNeedle) [][]int {
func FindRegex(data []byte, regexInfo *RegexAndNeedle) []int {
data_len := len(data)
matchMap := make(map[int]map[int]bool)
cacheMap := make(map[int]map[int]bool)
matches := make([]int, 0)

// use an optimized memscan to find all candidates chunks from the much larger haystack
// use an optimized memscan to find some candidates chunks from the much larger haystack
needleMatches := findAllOccurrences(data, [][]byte{regexInfo.needle})
for _, needleMatch := range needleMatches {
// adjust the window to the pattern start and end
Expand All @@ -296,16 +258,13 @@ func FindRegex(data []byte, regexInfo *RegexAndNeedle) [][]int {
data_end = data_len
}

// don't repeat previously scanned chunks
if getOrSetRegion(cacheMap, data_start, data_end) {
continue
}
// do the full regex scan on a very small chunk
for _, reMatch := range regexInfo.re.FindAllIndex(data[data_start:data_end], -1) {
// the match offset is the start index of the chunk + reMatch index
start := reMatch[0] + data_start
end := reMatch[1] + data_start
getOrSetRegion(matchMap, start, end)

//end := reMatch[1] + data_start
matches = append(matches, start)

// special case to handle sub-matches, which are skipped by regex but matched by YARA:
// AA AA BB CC
Expand All @@ -315,23 +274,18 @@ func FindRegex(data []byte, regexInfo *RegexAndNeedle) [][]int {
// AA BB CC
subStart := start + 1
for {
// don't repeat previously scanned chunks
if getOrSetRegion(cacheMap, subStart, data_end) {
break
}
subMatches := regexInfo.re.FindAllIndex(data[subStart:data_end], -1)
if len(subMatches) == 0 {
break
}
for _, match := range subMatches {
getOrSetRegion(matchMap, match[0]+subStart, match[1]+subStart)
matches = append(matches, match[0]+subStart)
}
subStart += subMatches[0][0] + 1
}
}
}

return regionMapToSlices(matchMap)
return matches
}

type RegexAndNeedle struct {
Expand Down
10 changes: 5 additions & 5 deletions objfile/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch {
}

for _, match := range FindRegex(data, x64reg) {
sigPtr := uint64(match[0]) // from int
sigPtr := uint64(match) // from int

// this is the pointer offset stored in the instruction
// 0x44E06A: 48 8D 0D 4F F0 24 00 lea rcx, off_69D0C0 (result: 0x24f04f)
Expand All @@ -119,7 +119,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch {
}

for _, match := range FindRegex(data, x86reg) {
sigPtr := uint64(match[0]) // from int
sigPtr := uint64(match) // from int

moduleDataPtr := uint64(binary.LittleEndian.Uint32(data[sigPtr+x86sig.moduleDataPtrLoc:][:4]))
matches = append(matches, SignatureMatch{
Expand All @@ -138,7 +138,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch {
}

for _, match := range FindRegex(data, arm64reg) {
sigPtr := uint64(match[0]) // from int
sigPtr := uint64(match) // from int

adrp := binary.LittleEndian.Uint32(data[sigPtr+ARM64_sig.moduleDataPtrADRP:][:4])
add := binary.LittleEndian.Uint32(data[sigPtr+ARM64_sig.moduleDataPtrADD:][:4])
Expand Down Expand Up @@ -169,7 +169,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch {
}

for _, match := range FindRegex(data, arm32reg) {
sigPtr := uint64(match[0]) // from int
sigPtr := uint64(match) // from int
ldr := binary.LittleEndian.Uint32(data[sigPtr+ARM32_sig.moduleDataPtrLDR:][:4])
// ARM PC relative is always +8 due to legacy nonsense
ldr_pointer_stub := uint64((ldr & 0x00000FFF) + 8)
Expand All @@ -190,7 +190,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch {
}

for _, match := range FindRegex(data, ppcBEreg) {
sigPtr := uint64(match[0]) // from int
sigPtr := uint64(match) // from int
moduleDataPtrHi := int64(binary.BigEndian.Uint16(data[sigPtr+PPC_BE_sig.moduleDataPtrHi:][:2]))
// addi takes a signed immediate
moduleDataPtrLo := int64(int16(binary.BigEndian.Uint16(data[sigPtr+PPC_BE_sig.moduleDataPtrLo:][:2])))
Expand Down

0 comments on commit 1b96965

Please sign in to comment.