| Index: syzygy/core/disassembler_util.cc
|
| diff --git a/syzygy/core/disassembler_util.cc b/syzygy/core/disassembler_util.cc
|
| index 7be5c8781853644f4d74abb1c39bed05a1411cca..b7c9a2019729bea000cb12a77568069962a4cedc 100644
|
| --- a/syzygy/core/disassembler_util.cc
|
| +++ b/syzygy/core/disassembler_util.cc
|
| @@ -24,55 +24,311 @@ namespace core {
|
|
|
| namespace {
|
|
|
| -// Return the size of a 3-byte VEX encoded instruction.
|
| +// Opcode of the 3-byte VEX instructions.
|
| +const uint8_t kThreeByteVexOpcode = 0xC4;
|
| +
|
| +// Structure representing a Mod R/M byte, it has the following format:
|
| +// +---+---+---+---+---+---+---+---+
|
| +// | mod |reg/opcode | r/m |
|
| +// +---+---+---+---+---+---+---+---+
|
| +//
|
| +// Here's a description of the different fields (from
|
| +// https://en.wikipedia.org/wiki/VEX_prefix):
|
| +// - mod: combined with the r/m field, encodes either 8 registers or 24
|
| +// addressing modes. Also encodes opcode information for some
|
| +// instructions.
|
| +// - reg/opcode: specifies either a register or three more bits of
|
| +// opcode information, as specified in the primary opcode byte.
|
| +// - r/m: can specify a register as an operand, or combine with the mod
|
| +// field to encode an addressing mode.
|
| +//
|
| +// The |mod| field can have the following values:
|
| +// - 0b00: Register indirect addressing mode or SIB with no displacement
|
| +// (if r/m = 0b100) or displacement only addressing mode (if r/m = 0b101).
|
| +// - 0b01: One-byte signed displacement follows addressing mode byte(s).
|
| +// - 0b10: Four-byte signed displacement follows addressing mode byte(s).
|
| +// - 0b11: Register addressing mode.
|
| +struct ModRMByte {
|
| + // Constructor.
|
| + // @param value The Value used to initialize this Mod R/M byte.
|
| + explicit ModRMByte(uint8_t value) : raw_value(value) {}
|
| +
|
| + union {
|
| + uint8_t raw_value;
|
| + struct {
|
| + uint8_t r_m : 3;
|
| + uint8_t reg_or_opcode : 3;
|
| + uint8_t mod : 2;
|
| + };
|
| + };
|
| +};
|
| +
|
| +// Calculates the number of bytes used to encode a Mod R/M operand.
|
| +// @param ci The code information for this instruction.
|
| +// @param has_register_addressing_mode Indicates if the instruction supports
|
| +// the register addressing mode (value of |mod| of 0b11).
|
| +// @returns the total size of this Mod R/M operand (in bytes), 0 on failure.
|
| +size_t GetModRMOperandBytesSize(const _CodeInfo* ci,
|
| + bool has_register_addressing_mode) {
|
| + DCHECK_GE(ci->codeLen, 5);
|
| +
|
| + // If SIB (Scale*Index+Base) is specified then the operand uses an
|
| + // additional SIB byte.
|
| + const uint8_t kSIBValue = 0b100;
|
| + ModRMByte modRM_byte(ci->code[4]);
|
| +
|
| + switch (modRM_byte.mod) {
|
| + case 0b00: {
|
| + if (modRM_byte.r_m == kSIBValue) {
|
| + CHECK_GE(ci->codeLen, 6);
|
| + // The SIB byte has the following layout:
|
| + // +---+---+---+---+---+---+---+---+
|
| + // | scale | index | base |
|
| + // +---+---+---+---+---+---+---+---+
|
| + //
|
| + // If |base| = 5 then there's an additional 4 bytes used to encode the
|
| + // displacement, e.g.:
|
| + // vpbroadcastd ymm0, DWORD PTR [ebp+eax*8+0x76543210]
|
| + const uint8_t kSIBBaseMask = 0b111;
|
| + if ((ci->code[5] & kSIBBaseMask) == 5)
|
| + return 6;
|
| + // If |base| != 5 then there's just the SIB byte, e.g.:
|
| + // vpbroadcastd ymm0, DWORD PTR [ecx+edx*1]
|
| + return 2;
|
| + }
|
| + if (modRM_byte.r_m == 0b101) {
|
| + // Displacement only addressing mode, e.g.:
|
| + // vpbroadcastb xmm2, BYTE PTR ds:0x12345678
|
| + return 5;
|
| + }
|
| + // Register indirect addressing mode, e.g.:
|
| + // vpbroadcastb xmm2, BYTE PTR [eax]
|
| + return 1;
|
| + }
|
| + case 0b01: {
|
| + // One-byte displacement.
|
| + if (modRM_byte.r_m == kSIBValue) {
|
| + // Additional SIB byte, e.g.:
|
| + // vpbroadcastb xmm2, BYTE PTR [eax+edx*1+0x42]
|
| + return 3;
|
| + }
|
| + // No SIB byte, e.g.:
|
| + // vpbroadcastb xmm2, BYTE PTR [eax+0x42]
|
| + return 2;
|
| + }
|
| + case 0b10: {
|
| + // One-byte displacement.
|
| + if (modRM_byte.r_m == kSIBValue) {
|
| + // Additional SIB byte, e.g.:
|
| + // vpbroadcastb xmm0, BYTE PTR [edx+edx*1+0x12345678]
|
| + return 6;
|
| + }
|
| + // No SIB byte, e.g.:
|
| + // vpbroadcastb xmm0, BYTE PTR [eax+0x34567812]
|
| + return 5;
|
| + }
|
| + case 0b11:
|
| + // Register addressing mode, e.g.:
|
| + // vpbroadcastb xmm2, BYTE PTR [eax]
|
| + if (has_register_addressing_mode)
|
| + return 1;
|
| + LOG(ERROR) << "Unexpected |mod| value of 0b11 for an instruction that "
|
| + << "doesn't support it.";
|
| + return 0;
|
| + default:
|
| + NOTREACHED();
|
| + }
|
| +
|
| + return 0;
|
| +}
|
| +
|
| +// Structure representing a 3-byte VEX encoded instruction.
|
| //
|
| // The layout of these instructions is as follows, starting with a byte with
|
| // value 0xC4:
|
| -// - First byte:
|
| +// - Opcode indicating that this is a 3-byte VEX instruction:
|
| // +---+---+---+---+---+---+---+---+
|
| // | 1 1 0 0 0 1 0 0 |
|
| // +---+---+---+---+---+---+---+---+
|
| -// - Second byte:
|
| +// - First byte:
|
| // +---+---+---+---+---+---+---+---+
|
| // |~R |~X |~B | map_select |
|
| // +---+---+---+---+---+---+---+---+
|
| -// - Third byte:
|
| +// - Second byte:
|
| // +---+---+---+---+---+---+---+---+
|
| // |W/E| ~vvvv | L | pp |
|
| // +---+---+---+---+---+---+---+---+
|
| -// - Fourth byte: The opcode for this instruction.
|
| +// - Third byte: The opcode for this instruction.
|
| //
|
| -// |map_select| Indicates the opcode map that should be used for this
|
| -// instruction.
|
| +// If this instructions takes some operands then it's followed by a ModR/M byte
|
| +// and some optional bytes to represent the operand. We don't represent these
|
| +// optional bytes here.
|
| //
|
| -// See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix
|
| +// See
|
| +// http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix
|
| // for more details.
|
| +struct ThreeBytesVexInstruction {
|
| + explicit ThreeBytesVexInstruction(const uint8_t* data) {
|
| + DCHECK_NE(nullptr, data);
|
| + CHECK_EQ(kThreeByteVexOpcode, data[0]);
|
| + first_byte = data[1];
|
| + second_byte = data[2];
|
| + opcode = data[3];
|
| + }
|
| +
|
| + // Checks if this instruction match the expectations that we have for it.
|
| + //
|
| + // It compares the value of several fields that can have an impact on the
|
| + // instruction size and make sure that they have the expected value.
|
| + //
|
| + // @param expected_inv_rxb The expected value for |inv_rxb|.
|
| + // @param expected_we The expected value for |we|.
|
| + // @param expected_l The expected value for |l|.
|
| + // @param expected_pp The expected value for |pp|.
|
| + // @returns true if all the expectations are met, false otherwise.
|
| + bool MatchExpectations(uint8_t expected_inv_rxb,
|
| + uint8_t expected_we,
|
| + uint8_t expected_l,
|
| + uint8_t expected_pp,
|
| + const char* instruction);
|
| +
|
| + // First byte, contains the RXB value and map_select.
|
| + union {
|
| + uint8_t first_byte;
|
| + struct {
|
| + uint8_t map_select : 5;
|
| + uint8_t inv_rxb : 3;
|
| + };
|
| + };
|
| + // Second byte, contains the W/E, ~vvvv, L and pp values.
|
| + union {
|
| + uint8_t second_byte;
|
| + struct {
|
| + uint8_t pp : 2;
|
| + uint8_t l : 1;
|
| + uint8_t inv_vvvv : 4;
|
| + uint8_t w_e : 1;
|
| + };
|
| + };
|
| +
|
| + // Opcode of this instruction.
|
| + uint8_t opcode;
|
| +};
|
| +
|
| +// Checks if |value| is equal to |expected| value and log verbosely if it's not
|
| +// the case.
|
| +bool CheckField(uint8_t expected_value,
|
| + uint8_t value,
|
| + const char* field_name,
|
| + const char* instruction) {
|
| + if (expected_value != value) {
|
| + LOG(ERROR) << "Unexpected " << field_name << " value for the "
|
| + << instruction << " instruction, expecting 0x" << std::hex
|
| + << static_cast<size_t>(expected_value) << " but got 0x"
|
| + << static_cast<size_t>(value) << "." << std::dec;
|
| + return false;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +bool ThreeBytesVexInstruction::MatchExpectations(uint8_t expected_inv_rxb,
|
| + uint8_t expected_we,
|
| + uint8_t expected_l,
|
| + uint8_t expected_pp,
|
| + const char* instruction) {
|
| + if (!CheckField(expected_inv_rxb, inv_rxb, "inv_rxb", instruction))
|
| + return false;
|
| + if (!CheckField(expected_we, w_e, "we", instruction))
|
| + return false;
|
| + if (!CheckField(expected_l, l, "l", instruction))
|
| + return false;
|
| + if (!CheckField(expected_pp, pp, "pp", instruction))
|
| + return false;
|
| + return true;
|
| +}
|
| +
|
| +// Returns the size of a 3-byte VEX encoded instruction.
|
| +//
|
| +// NOTE: We only support the instructions that have been encountered in Chrome
|
| +// and there's some restrictions on which variants of these instructions are
|
| +// supported.
|
| size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
|
| - DCHECK_EQ(0xC4, ci->code[0]);
|
| - // Switch case based on the opcode map used by this instruction.
|
| - switch (ci->code[1] & 0x1F) {
|
| + // A 3-byte VEX instructions has always a size of 5 bytes or more (the C4
|
| + // constant, the 3 VEX bytes and the mod R/M byte).
|
| + DCHECK_GE(ci->codeLen, 5);
|
| +
|
| + ThreeBytesVexInstruction instruction(ci->code);
|
| +
|
| + const size_t kBaseSize = 4;
|
| + size_t operand_size = 0;
|
| + size_t constants_size = 0;
|
| +
|
| + // Switch case based on the opcode used by this instruction.
|
| + switch (instruction.map_select) {
|
| case 0x02: {
|
| - switch (ci->code[3]) {
|
| - case 0x13: return 5; // vcvtps2ps
|
| - case 0x18: return 5; // vbroadcastss
|
| - case 0x36: return 5; // vpermd
|
| - case 0x58: return 6; // vpbroadcastd
|
| - case 0x5A: return 6; // vbroadcasti128
|
| - case 0x78: return 5; // vpbroadcastb
|
| - case 0x8C: return 5; // vpmaskmovd
|
| - case 0x8E: return 5; // vpmaskmovd
|
| - case 0x90: return 6; // vpgatherdd
|
| + switch (instruction.opcode) {
|
| + case 0x13: // vcvtph2ps
|
| + if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtph2ps"))
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + break;
|
| + case 0x18: // vbroadcastss
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcastss"))
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + break;
|
| + case 0x36: // vpermd
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpermd"))
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + break;
|
| + case 0x58: // vpbroadcastd
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpbroadcastd"))
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + break;
|
| + case 0x5A: // vbroadcasti128
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcasti128"))
|
| + operand_size = GetModRMOperandBytesSize(ci, false);
|
| + break;
|
| + case 0x78: // vpbroadcastb
|
| + if (instruction.MatchExpectations(0b111, 0, 0, 1, "vpbroadcastb"))
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + break;
|
| + case 0x8C: // vpmaskmovd
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpmaskmovd"))
|
| + operand_size = GetModRMOperandBytesSize(ci, false);
|
| + break;
|
| + case 0x90: // vpgatherdd
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpgatherdd"))
|
| + operand_size = GetModRMOperandBytesSize(ci, false);
|
| + break;
|
| default:
|
| break;
|
| }
|
| break;
|
| }
|
| case 0x03: {
|
| - switch (ci->code[3]) {
|
| - case 0x00: return 6; // vpermq
|
| - case 0x1D: return 6; // vcvtps2ph
|
| - case 0x38: return 7; // vinserti128
|
| - case 0x39: return 6; // vextracti128
|
| + switch (instruction.opcode) {
|
| + case 0x00: // vpermq
|
| + if (instruction.MatchExpectations(0b111, 1, 1, 1, "vpermq")) {
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + constants_size = 1;
|
| + }
|
| + break;
|
| + case 0x1D: // vcvtps2ph
|
| + if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtps2ph")) {
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + constants_size = 1;
|
| + }
|
| + break;
|
| + case 0x38: // vinserti128
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vinserti128")) {
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + constants_size = 1;
|
| + }
|
| + break;
|
| + case 0x39: // vextracti128
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vextracti128")) {
|
| + operand_size = GetModRMOperandBytesSize(ci, true);
|
| + constants_size = 1;
|
| + }
|
| default: break;
|
| }
|
| break;
|
| @@ -81,6 +337,9 @@ size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
|
| break;
|
| }
|
|
|
| + if (operand_size != 0)
|
| + return kBaseSize + operand_size + constants_size;
|
| +
|
| // Print the instructions that we haven't been able to decompose in a format
|
| // that can easily be pasted into ODA (https://onlinedisassembler.com/).
|
| const int kMaxBytes = 10;
|
| @@ -150,10 +409,9 @@ bool HandleBadDecode(_CodeInfo* ci,
|
|
|
| return true;
|
| }
|
| - }
|
| -
|
| - if (ci->code[0] == 0xC4)
|
| + } else if (ci->code[0] == kThreeByteVexOpcode) {
|
| size = Get3ByteVexEncodedInstructionSize(ci);
|
| + }
|
|
|
| if (size == 0)
|
| return false;
|
|
|