diff --git a/include/remill/Arch/Instruction.h b/include/remill/Arch/Instruction.h index e13ccef2d..77aa99072 100644 --- a/include/remill/Arch/Instruction.h +++ b/include/remill/Arch/Instruction.h @@ -215,6 +215,12 @@ class Instruction { // Is this instruction decoded within the context of a delay slot? bool in_delay_slot; + // For x86 it is possible to specify a prefix that overrides the default + // segment register. This attribute by itself is currently not used directly + // by the lifter - it is expeted `Operand`s will include segment reg where appropriate + // but it can be used in different applications. + const Register *segment_override = nullptr; + enum Category { kCategoryInvalid, kCategoryNormal, diff --git a/lib/Arch/Instruction.cpp b/lib/Arch/Instruction.cpp index a132da120..4fd09e149 100644 --- a/lib/Arch/Instruction.cpp +++ b/lib/Arch/Instruction.cpp @@ -692,6 +692,10 @@ std::string Instruction::Serialize(void) const { ss << " " << function; } + if (segment_override) { + ss << "(SEGMENT_OVERRIDE " << segment_override->name << ")"; + } + for (const auto &op : operands) { ss << " " << op.Serialize(); } diff --git a/lib/Arch/X86/Arch.cpp b/lib/Arch/X86/Arch.cpp index 1a655c47c..40c1162ad 100644 --- a/lib/Arch/X86/Arch.cpp +++ b/lib/Arch/X86/Arch.cpp @@ -424,7 +424,6 @@ static void DecodeMemory(Instruction &inst, const xed_decoded_inst_t *xedd, auto iform = xed_decoded_inst_get_iform_enum(xedd); auto iclass = xed_decoded_inst_get_iclass(xedd); auto op_name = xed_operand_name(xedo); - auto segment = xed_decoded_inst_get_seg_reg(xedd, mem_index); auto base = xed_decoded_inst_get_base_reg(xedd, mem_index); auto index = xed_decoded_inst_get_index_reg(xedd, mem_index); auto disp = xed_decoded_inst_get_memory_displacement(xedd, mem_index); @@ -438,24 +437,34 @@ static void DecodeMemory(Instruction &inst, const xed_decoded_inst_t *xedd, size = 16; } - // Deduce the implicit segment register if it is absent. - if (XED_REG_INVALID == segment) { - segment = XED_REG_DS; + auto raw_segment_reg = xed_decoded_inst_get_seg_reg(xedd, mem_index); + auto deduce_segment = [&](auto segment_reg) { + // Deduce the implicit segment register if it is absent. + if (XED_REG_INVALID != segment_reg) { + return segment_reg; + } if (XED_REG_RSP == base_wide || XED_REG_RBP == base_wide) { - segment = XED_REG_SS; + return XED_REG_SS; + } + return XED_REG_DS; + }; + auto ignore_segment = [&](auto segment_reg) { + // On AMD64, only the `FS` and `GS` segments are non-zero. + if (Is64Bit(inst.arch_name) && XED_REG_FS != segment_reg && + XED_REG_GS != segment_reg) { + return XED_REG_INVALID; } - } - // On AMD64, only the `FS` and `GS` segments are non-zero. - if (Is64Bit(inst.arch_name) && XED_REG_FS != segment && - XED_REG_GS != segment) { - segment = XED_REG_INVALID; + // AGEN operands, e.g. for the `LEA` instuction, can be marked with an + // explicit segment, but it is ignored. + if (XED_OPERAND_AGEN == op_name) { + return XED_REG_INVALID; + } - // AGEN operands, e.g. for the `LEA` instuction, can be marked with an - // explicit segment, but it is ignored. - } else if (XED_OPERAND_AGEN == op_name) { - segment = XED_REG_INVALID; - } + // No need to ignore it + return segment_reg; + }; + auto segment_reg = ignore_segment(deduce_segment(raw_segment_reg)); // Special case: `POP [xSP + ...] uses the value of `xSP` after incrementing // it by the stack width. @@ -470,7 +479,7 @@ static void DecodeMemory(Instruction &inst, const xed_decoded_inst_t *xedd, op.addr.address_size = xed_decoded_inst_get_memop_address_width(xedd, mem_index); - op.addr.segment_base_reg = SegBaseRegOp(segment, op.addr.address_size); + op.addr.segment_base_reg = SegBaseRegOp(segment_reg, op.addr.address_size); op.addr.base_reg = RegOp(base); op.addr.index_reg = RegOp(index); op.addr.scale = XED_REG_INVALID != index ? static_cast(scale) : 0; @@ -987,6 +996,13 @@ bool X86Arch::DecodeInstruction(uint64_t address, std::string_view inst_bytes, // instuction implementation. auto xedi = xed_decoded_inst_inst(xedd); auto num_operands = xed_decoded_inst_noperands(xedd); + + auto xedv = xed_decoded_inst_operands_const(xedd); + if (xed_operand_values_has_segment_prefix(xedv)) { + auto reg_name = xed_reg_enum_t2str(xed_operand_values_segment_prefix(xedv)); + inst.segment_override = RegisterByName(reg_name); + } + for (auto i = 0U; i < num_operands; ++i) { auto xedo = xed_inst_operand(xedi, i); if (XED_OPVIS_SUPPRESSED != xed_operand_operand_visibility(xedo)) {