From 1b96965b4b7830666f47509d416b3830f283be81 Mon Sep 17 00:00:00 2001 From: Stephen Eckels Date: Mon, 19 Aug 2024 15:11:46 +0000 Subject: [PATCH] Revert "Optimize pattern sub-matches" This reverts commit 6f4badc4b7b1663c6e1968670f7c1e86e7004438. --- objfile/patterns.go | 62 ++++++--------------------------------------- objfile/scanner.go | 10 ++++---- 2 files changed, 13 insertions(+), 59 deletions(-) diff --git a/objfile/patterns.go b/objfile/patterns.go index 281653e..07ac11e 100644 --- a/objfile/patterns.go +++ b/objfile/patterns.go @@ -2,7 +2,6 @@ package objfile import ( "errors" - "sort" "strconv" "strings" @@ -239,48 +238,11 @@ func RegexpPatternFromYaraPattern(pattern string) (*RegexAndNeedle, error) { return &RegexAndNeedle{patLen, regex_pattern, r, needleOffset, needle}, nil } -func getOrSetRegion(regionMap map[int]map[int]bool, start, end int) bool { - if ends, ok := regionMap[start]; ok { - if ends[end] { - return true - } else { - ends[end] = true - return false - } - } else { - regionMap[start] = map[int]bool{end: true} - return false - } -} - -func regionMapToSlices(regionMap map[int]map[int]bool) [][]int { - totalSize := 0 - keys := make([]int, 0, len(regionMap)) - for key, valueMap := range regionMap { - keys = append(keys, key) - totalSize += len(valueMap) - } - sort.Ints(keys) - result := make([][]int, 0, totalSize) - for _, key := range keys { - values := make([]int, 0, len(regionMap[key])) - for value := range regionMap[key] { - values = append(values, value) - } - sort.Ints(values) - for _, value := range values { - result = append(result, []int{key, value}) - } - } - return result -} - -func FindRegex(data []byte, regexInfo *RegexAndNeedle) [][]int { +func FindRegex(data []byte, regexInfo *RegexAndNeedle) []int { data_len := len(data) - matchMap := make(map[int]map[int]bool) - cacheMap := make(map[int]map[int]bool) + matches := make([]int, 0) - // use an optimized memscan to find all candidates chunks from the much larger haystack + // use an optimized memscan to find some candidates chunks from the much larger haystack needleMatches := findAllOccurrences(data, [][]byte{regexInfo.needle}) for _, needleMatch := range needleMatches { // adjust the window to the pattern start and end @@ -296,16 +258,13 @@ func FindRegex(data []byte, regexInfo *RegexAndNeedle) [][]int { data_end = data_len } - // don't repeat previously scanned chunks - if getOrSetRegion(cacheMap, data_start, data_end) { - continue - } // do the full regex scan on a very small chunk for _, reMatch := range regexInfo.re.FindAllIndex(data[data_start:data_end], -1) { // the match offset is the start index of the chunk + reMatch index start := reMatch[0] + data_start - end := reMatch[1] + data_start - getOrSetRegion(matchMap, start, end) + + //end := reMatch[1] + data_start + matches = append(matches, start) // special case to handle sub-matches, which are skipped by regex but matched by YARA: // AA AA BB CC @@ -315,23 +274,18 @@ func FindRegex(data []byte, regexInfo *RegexAndNeedle) [][]int { // AA BB CC subStart := start + 1 for { - // don't repeat previously scanned chunks - if getOrSetRegion(cacheMap, subStart, data_end) { - break - } subMatches := regexInfo.re.FindAllIndex(data[subStart:data_end], -1) if len(subMatches) == 0 { break } for _, match := range subMatches { - getOrSetRegion(matchMap, match[0]+subStart, match[1]+subStart) + matches = append(matches, match[0]+subStart) } subStart += subMatches[0][0] + 1 } } } - - return regionMapToSlices(matchMap) + return matches } type RegexAndNeedle struct { diff --git a/objfile/scanner.go b/objfile/scanner.go index 359c20f..a4b33da 100644 --- a/objfile/scanner.go +++ b/objfile/scanner.go @@ -94,7 +94,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch { } for _, match := range FindRegex(data, x64reg) { - sigPtr := uint64(match[0]) // from int + sigPtr := uint64(match) // from int // this is the pointer offset stored in the instruction // 0x44E06A: 48 8D 0D 4F F0 24 00 lea rcx, off_69D0C0 (result: 0x24f04f) @@ -119,7 +119,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch { } for _, match := range FindRegex(data, x86reg) { - sigPtr := uint64(match[0]) // from int + sigPtr := uint64(match) // from int moduleDataPtr := uint64(binary.LittleEndian.Uint32(data[sigPtr+x86sig.moduleDataPtrLoc:][:4])) matches = append(matches, SignatureMatch{ @@ -138,7 +138,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch { } for _, match := range FindRegex(data, arm64reg) { - sigPtr := uint64(match[0]) // from int + sigPtr := uint64(match) // from int adrp := binary.LittleEndian.Uint32(data[sigPtr+ARM64_sig.moduleDataPtrADRP:][:4]) add := binary.LittleEndian.Uint32(data[sigPtr+ARM64_sig.moduleDataPtrADD:][:4]) @@ -169,7 +169,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch { } for _, match := range FindRegex(data, arm32reg) { - sigPtr := uint64(match[0]) // from int + sigPtr := uint64(match) // from int ldr := binary.LittleEndian.Uint32(data[sigPtr+ARM32_sig.moduleDataPtrLDR:][:4]) // ARM PC relative is always +8 due to legacy nonsense ldr_pointer_stub := uint64((ldr & 0x00000FFF) + 8) @@ -190,7 +190,7 @@ func findModuleInitPCHeader(data []byte, sectionBase uint64) []SignatureMatch { } for _, match := range FindRegex(data, ppcBEreg) { - sigPtr := uint64(match[0]) // from int + sigPtr := uint64(match) // from int moduleDataPtrHi := int64(binary.BigEndian.Uint16(data[sigPtr+PPC_BE_sig.moduleDataPtrHi:][:2])) // addi takes a signed immediate moduleDataPtrLo := int64(int16(binary.BigEndian.Uint16(data[sigPtr+PPC_BE_sig.moduleDataPtrLo:][:2])))