From c2e15d97c705431c44df72912685b7fc35e49ffb Mon Sep 17 00:00:00 2001 From: Romain Guy Date: Fri, 7 Jun 2024 13:02:29 -0700 Subject: [PATCH] Fix call references Inline calls were not decoded properly. This change also improves branch counts. The new label shows: -- 40 branches (32 + 8) The parethensis indicates how many branches are before the first function return (32 here), and how many come after (8) and are in the epilogue. --- build.gradle.kts | 2 +- .../romainguy/kotlin/explorer/code/Code.kt | 17 ++++++- .../kotlin/explorer/code/CodeBuilder.kt | 47 +++++++++++++----- .../kotlin/explorer/code/DataModels.kt | 16 +++--- .../kotlin/explorer/oat/OatDumpParser.kt | 49 +++++++++++++++---- 5 files changed, 101 insertions(+), 30 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 2688f69e..4fa594c8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -73,7 +73,7 @@ compose.desktop { targetFormats(TargetFormat.Dmg) - packageVersion = "1.4.1" + packageVersion = "1.4.2" packageName = "Kotlin Explorer" description = "Kotlin Explorer" vendor = "Romain Guy" diff --git a/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/Code.kt b/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/Code.kt index 2dd47a26..f11ecc09 100644 --- a/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/Code.kt +++ b/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/Code.kt @@ -17,6 +17,8 @@ package dev.romainguy.kotlin.explorer.code import androidx.collection.IntIntMap +import androidx.collection.IntObjectMap +import androidx.collection.mutableIntObjectMapOf /** * A data model representing disassembled code @@ -40,15 +42,28 @@ class Code( companion object { fun fromClasses(classes: List, codeStyle: CodeStyle = CodeStyle()): Code { return buildCode(codeStyle) { + val indexedMethods = buildIndexedMethods(classes) classes.forEachIndexed { classIndex, clazz -> startClass(clazz) val notLastClass = classIndex < classes.size - 1 clazz.methods.forEachIndexed { methodIndex, method -> - writeMethod(method) + writeMethod(method, indexedMethods) if (methodIndex < clazz.methods.size - 1 || notLastClass) writeLine("") } } }.build() } + + private fun buildIndexedMethods(classes: List): IntObjectMap { + val map = mutableIntObjectMapOf() + classes.forEach { clazz -> + clazz.methods.forEach { method -> + if (method.index != -1) { + map[method.index] = method + } + } + } + return map + } } } diff --git a/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/CodeBuilder.kt b/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/CodeBuilder.kt index e5a9c987..bd43a7b9 100644 --- a/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/CodeBuilder.kt +++ b/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/CodeBuilder.kt @@ -17,6 +17,7 @@ package dev.romainguy.kotlin.explorer.code import androidx.collection.IntIntPair +import androidx.collection.IntObjectMap import androidx.collection.mutableIntIntMapOf fun buildCode(codeStyle: CodeStyle = CodeStyle(), builderAction: CodeBuilder.() -> Unit): CodeBuilder { @@ -45,11 +46,11 @@ class CodeBuilder(private val codeStyle: CodeStyle) { writeLine(clazz.header) } - fun writeMethod(method: Method) { + fun writeMethod(method: Method, indexedMethods: IntObjectMap) { startMethod(method) val instructionSet = method.instructionSet instructionSet.instructions.forEach { instruction -> - writeInstruction(instructionSet, instruction) + writeInstruction(instructionSet, instruction, indexedMethods) } endMethod() } @@ -62,26 +63,41 @@ class CodeBuilder(private val codeStyle: CodeStyle) { val instructionCount = method.instructionSet.instructions.size writeLine("-- $instructionCount instruction${if (instructionCount > 1) "s" else ""}") - val branches = countBranches(method.instructionSet) + val (pre, post) = countBranches(method.instructionSet) + val branches = pre + post if (branches > 0) { sb.append(" ".repeat(codeStyle.indent)) - writeLine("-- $branches branch${if (branches > 1) "es" else ""}") + writeLine("-- $branches branch${if (branches > 1) "es" else ""} ($pre + $post)") } } - private fun countBranches(instructionSet: InstructionSet): Int { - var count = 0 + private fun countBranches(instructionSet: InstructionSet): IntIntPair { + var preReturnCount = 0 + var postReturnCount = 0 + var returnSeen = false + val branchInstructions = instructionSet.isa.branchInstructions + val returnInstructions = instructionSet.isa.returnInstructions + instructionSet.instructions.forEach { instruction -> val code = instruction.code val start = code.indexOf(": ") + 2 val end = code.indexOfFirst(start) { c -> !c.isLetter() } val opCode = code.substring(start, end) - if (branchInstructions.contains(opCode)) { - count++ + if (returnInstructions.contains(opCode)) { + returnSeen = true + } else { + if (branchInstructions.contains(opCode)) { + if (returnSeen) { + postReturnCount++ + } else { + preReturnCount++ + } + } } } - return count + + return IntIntPair(preReturnCount, postReturnCount) } private fun endMethod() { @@ -95,7 +111,11 @@ class CodeBuilder(private val codeStyle: CodeStyle) { lastMethodLineNumber = -1 } - private fun writeInstruction(instructionSet: InstructionSet, instruction: Instruction) { + private fun writeInstruction( + instructionSet: InstructionSet, + instruction: Instruction, + indexedMethods: IntObjectMap + ) { sb.append(" ".repeat(codeStyle.indent)) methodAddresses[instruction.address] = line @@ -118,7 +138,12 @@ class CodeBuilder(private val codeStyle: CodeStyle) { sb.append(instruction.code) if (instruction.callAddress != -1) { - val callReference = instructionSet.methodReferences[instruction.callAddress] + val set = if (instruction.callAddressMethod == -1) { + instructionSet + } else { + indexedMethods[instruction.callAddressMethod]?.instructionSet + } + val callReference = set?.methodReferences?.get(instruction.callAddress) if (callReference != null) { sb.append(" → ").append(callReference.name) } diff --git a/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/DataModels.kt b/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/DataModels.kt index 1b4e1e13..01c3d341 100644 --- a/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/DataModels.kt +++ b/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/code/DataModels.kt @@ -18,10 +18,10 @@ package dev.romainguy.kotlin.explorer.code import androidx.collection.* -enum class ISA(val branchInstructions: ScatterSet) { - ByteCode(scatterSetOf("if")), - Dex(scatterSetOf("if")), - Oat(scatterSetOf()), +enum class ISA(val branchInstructions: ScatterSet, val returnInstructions: ScatterSet) { + ByteCode(scatterSetOf("if"), scatterSetOf("areturn", "ireturn", "lreturn", "dreturn", "freturn", "return")), + Dex(scatterSetOf("if"), scatterSetOf("return")), + Oat(scatterSetOf(), scatterSetOf()), X86_64( scatterSetOf( "je", @@ -46,14 +46,15 @@ enum class ISA(val branchInstructions: ScatterSet) { "jnae", "jbe", "jna" - ) + ), + scatterSetOf("ret") ), - Arm64(scatterSetOf("b", "bl", "cbz", "cbnz", "tbz", "tbnz")) + Arm64(scatterSetOf("b", "bl", "cbz", "cbnz", "tbz", "tbnz"), scatterSetOf("ret")) } data class Class(val header: String, val methods: List) -data class Method(val header: String, val instructionSet: InstructionSet) +data class Method(val header: String, val instructionSet: InstructionSet, val index: Int = -1) data class InstructionSet( val isa: ISA, @@ -66,6 +67,7 @@ data class Instruction( val code: String, val jumpAddress: Int, val callAddress: Int = -1, + val callAddressMethod: Int = -1, val lineNumber: Int = -1 ) diff --git a/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/oat/OatDumpParser.kt b/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/oat/OatDumpParser.kt index 9492e094..1690f225 100644 --- a/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/oat/OatDumpParser.kt +++ b/src/jvmMain/kotlin/dev/romainguy/kotlin/explorer/oat/OatDumpParser.kt @@ -24,7 +24,7 @@ import dev.romainguy.kotlin.explorer.code.CodeContent.Error import dev.romainguy.kotlin.explorer.code.CodeContent.Success private val ClassNameRegex = Regex("^\\d+: L(?[^;]+); \\(offset=0x$HexDigit+\\) \\(type_idx=\\d+\\).+") -private val MethodRegex = Regex("^\\s+\\d+:\\s+(?.+)\\s+\\(dex_method_idx=\\d+\\)") +private val MethodRegex = Regex("^\\s+\\d+:\\s+(?.+)\\s+\\(dex_method_idx=(?\\d+)\\)") private val CodeRegex = Regex("^\\s+0x(?
$HexDigit+):\\s+$HexDigit+\\s+(?.+)") private val DexCodeRegex = Regex("^\\s+0x(?
$HexDigit+):\\s+($HexDigit+\\s+)+\\|\\s+(?.+)") @@ -37,6 +37,7 @@ private val Arm64MethodCallRegex = Regex("^blr lr$") private val X86MethodCallRegex = Regex("^TODO$") // TODO: implement x86 private val DexMethodReferenceRegex = Regex("^\\s+StackMap.+dex_pc=0x(?$HexDigit+),.+$") +private val DexInlineInfoRegex = Regex("^\\s+InlineInfo.+dex_pc=0x(?$HexDigit+),\\s+method_index=0x(?$HexDigit+).+$") internal class OatDumpParser { private var isa = ISA.Arm64 @@ -96,22 +97,35 @@ internal class OatDumpParser { val line = peek() when { ClassNameRegex.matches(line) -> break - MethodRegex.matches(line) -> add(readMethod(jumpRegex, methodCallRegex)) - else -> next() + else -> { + // Skip to the next line first and then read the method + next() + + val match = MethodRegex.matchEntire(line) + if (match != null) { + add(readMethod(match, jumpRegex, methodCallRegex)) + } + } } } } return Class("class $className", methods) } - private fun PeekingIterator.readMethod(jumpRegex: Regex, methodCallRegex: Regex): Method { - val match = MethodRegex.matchEntire(next()) ?: throw IllegalStateException("Should not happen") - val method = match.getValue("method") + private fun PeekingIterator.readMethod( + match: MatchResult, + jumpRegex: Regex, + methodCallRegex: Regex + ): Method { consumeUntil("DEX CODE:") val methodReferences = readMethodReferences() + consumeUntil("CODE:") val instructions = readNativeInstructions(jumpRegex, methodCallRegex) - return Method(method, InstructionSet(isa, instructions, methodReferences)) + + val method = match.getValue("method") + val index = match.getValue("methodIndex").toInt(16) + return Method(method, InstructionSet(isa, instructions, methodReferences), index) } private fun PeekingIterator.readMethodReferences(): IntObjectMap { @@ -176,20 +190,35 @@ internal class OatDumpParser { methodCallRegex: Regex ): Instruction { val address = match.getValue("address") - val code = match.getValue("code") - val callAddress = if (methodCallRegex.matches(code)) { + + var callAddress = if (methodCallRegex.matches(code)) { DexMethodReferenceRegex.matchEntire(iterator.peek())?.getValue("callAddress")?.toInt(16) ?: -1 } else { -1 } + val callAddressMethod = if (callAddress != -1) { + // Skip the StackMap line + iterator.next() + // Check the InlineInfo if present + val methodIndex = DexInlineInfoRegex.matchEntire(iterator.peek()) + if (methodIndex != null) { + callAddress = methodIndex.getValue("callAddress").toInt(16) + methodIndex.getValue("methodIndex").toInt(16) + } + -1 + } else { + -1 + } + val jumpAddress = if (callAddress == -1) { jumpRegex.matchEntire(code)?.getValue("address")?.toInt(16) ?: -1 } else { -1 } - return Instruction(address.toInt(16), "0x$address: $code", jumpAddress, callAddress) + val codeAddress = address.toInt(16) + return Instruction(codeAddress, "0x$address: $code", jumpAddress, callAddress, callAddressMethod) } }