Chromium Code Reviews| Index: syzygy/core/disassembler_util.cc |
| diff --git a/syzygy/core/disassembler_util.cc b/syzygy/core/disassembler_util.cc |
| index 7be5c8781853644f4d74abb1c39bed05a1411cca..d9e06e0f09979633bd074b21b022a0e43122fad0 100644 |
| --- a/syzygy/core/disassembler_util.cc |
| +++ b/syzygy/core/disassembler_util.cc |
| @@ -24,55 +24,301 @@ namespace core { |
| namespace { |
| -// Return the size of a 3-byte VEX encoded instruction. |
| +// Opcode of the 3-byte VEX instructions. |
| +const uint8_t kThreeByteVexOpcode = 0xC4; |
| + |
| +// Structure representing a Mod R/M byte, it has the following format: |
| +// +---+---+---+---+---+---+---+---+ |
| +// | mod |reg/opcode | r/m | |
| +// +---+---+---+---+---+---+---+---+ |
| +// |
| +// Here's a description of the different fields (from |
| +// https://en.wikipedia.org/wiki/VEX_prefix): |
| +// - mod: combined with the r/m field, encodes either 8 registers or 2 |
| +// addressing modes. Also encodes opcode information for some |
| +// instructions. |
| +// - reg/opcode: specifies either a register or three more bits of |
| +// opcode information, as specified in the primary opcode byte. |
| +// - r/m: can specify a register as an operand, or combine with the mod |
| +// field to encode an addressing mode. |
| +// |
| +// The |mod| field can have the following values: |
| +// - 0b00: Register indirect addressing mode or SIB with no displacement |
| +// (if R/M = 0b100) or displacement only addressing mode (if R/M = 0b101). |
| +// - 0b01: One-byte signed displacement follows addressing mode byte(s). |
| +// - 0b10: Four-byte signed displacement follows addressing mode byte(s). |
| +// - 0b11: Register addressing mode. |
| +struct ModRMByte { |
| + // Constructor. |
| + // @param value The Value used to initialize this Mod R/M byte. |
| + explicit ModRMByte(uint8_t value) : raw_value(value) {} |
| + |
| + union { |
| + uint8_t raw_value; |
| + struct { |
| + uint8_t r_m : 3; |
| + uint8_t reg_or_opcode : 3; |
| + uint8_t mod : 2; |
| + }; |
| + }; |
| +}; |
| + |
| +// Calculates the number of bytes used to encode a Mod R/M operand. |
| +// @param modRMByte The Mod R/M byte. |
| +// @param no_register_addressing_mode Indicates if the instruction supports |
| +// the register addressing mode (value of |mod| of 0b11). |
| +// @returns the total size of this Mod R/M operand (in bytes), 0 on failure. |
| +size_t GetModRMOperandBytesSize(const ModRMByte& modRMByte, |
| + bool no_register_addressing_mode) { |
| + // If the SIB (Scale*Index+Base) bit is set then the operand uses an |
|
huangs
2017/04/28 21:58:04
NIT: SIB is not a bit, but a special value. Maybe
Sébastien Marchand
2017/05/01 16:04:32
Done.
|
| + // additional SIB byte. |
| + const uint8_t kSIBMask = 0b100; |
|
huangs
2017/04/28 21:58:04
NIT: SIB is not a mask; 0b101, 0b110, 0b111 are un
Sébastien Marchand
2017/05/01 16:04:31
Ha, good point :)
|
| + |
| + switch (modRMByte.mod) { |
| + case 0b00: { |
| + if (modRMByte.r_m == kSIBMask) { |
| + // SIB with no displacement, e.g.: |
| + // vpbroadcastb xmm2, BYTE PTR [edx+eax*2] |
|
huangs
2017/04/28 21:58:04
NIT: Inconsistent tabbing for disassembly examples
Sébastien Marchand
2017/05/01 16:04:31
Done.
|
| + return 2; |
|
huangs
2017/04/28 21:58:04
Depending how far you want to go down the rabbit h
Sébastien Marchand
2017/05/01 16:04:31
Thanks! This is a simple special case and it doesn
|
| + } else if (modRMByte.r_m == 0b101) { |
|
huangs
2017/04/28 21:58:04
Don't need "else" if returning? Same below.
Sébastien Marchand
2017/05/01 16:04:32
Done.
|
| + // Displacement only addressing mode, e.g.: |
| + // vpbroadcastb xmm2, BYTE PTR ds:0x12345678 |
| + return 5; |
| + } else { |
| + // Register indirect addressing mode, e.g.: |
| + // vpbroadcastb xmm2, BYTE PTR [eax] |
| + return 1; |
| + } |
| + } |
| + case 0b01: { |
| + // One-byte displacement. |
| + if (modRMByte.r_m == kSIBMask) { |
| + // Additional SIB byte, e.g.: |
| + // vpbroadcastb xmm2, BYTE PTR [eax+edx*1+0x42] |
| + return 3; |
| + } else { |
| + // No SIB byte, e.g.: |
| + // vpbroadcastb xmm2, BYTE PTR [eax+0x42] |
| + return 2; |
| + } |
| + } |
| + case 0b10: { |
| + // One-byte displacement. |
| + if (modRMByte.r_m == kSIBMask) { |
| + // Additional SIB byte, e.g.: |
| + // vpbroadcastb xmm0, BYTE PTR [edx+edx*1+0x12345678] |
| + return 6; |
| + } else { |
| + // No SIB byte, e.g.: |
| + // vpbroadcastb xmm0, BYTE PTR [eax+0x34567812] |
| + return 5; |
| + } |
| + } |
| + case 0b11: |
| + // Register addressing mode, e.g.: |
| + // vpbroadcastb xmm2, BYTE PTR [eax] |
| + if (no_register_addressing_mode) { |
| + LOG(ERROR) << "Unexpected |mod| value of 0b11 for an instruction that " |
| + << "doesn't support it."; |
| + return 0; |
| + } |
| + return 1; |
| + default: |
| + NOTREACHED(); |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +// Structure representing a 3-byte VEX encoded instruction. |
| // |
| // The layout of these instructions is as follows, starting with a byte with |
| // value 0xC4: |
| -// - First byte: |
| +// - Opcode indicating that this is a 3-byte VEX instruction: |
| // +---+---+---+---+---+---+---+---+ |
| // | 1 1 0 0 0 1 0 0 | |
| // +---+---+---+---+---+---+---+---+ |
| -// - Second byte: |
| +// - First byte: |
| // +---+---+---+---+---+---+---+---+ |
| // |~R |~X |~B | map_select | |
| // +---+---+---+---+---+---+---+---+ |
| -// - Third byte: |
| +// - Second byte: |
| // +---+---+---+---+---+---+---+---+ |
| // |W/E| ~vvvv | L | pp | |
| // +---+---+---+---+---+---+---+---+ |
| -// - Fourth byte: The opcode for this instruction. |
| +// - Third byte: The opcode for this instruction. |
| // |
| -// |map_select| Indicates the opcode map that should be used for this |
| -// instruction. |
| +// If this instructions takes some operands then it's followed by a ModR/M byte |
| +// and some optional bytes to represent the operand. We don't represent these |
| +// optional bytes here. |
| // |
| -// See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix |
| +// See |
| +// http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix |
| // for more details. |
| +struct ThreeBytesVexInstruction { |
| + explicit ThreeBytesVexInstruction(const uint8_t* data) { |
| + DCHECK_NE(nullptr, data); |
| + CHECK_EQ(kThreeByteVexOpcode, data[0]); |
| + first_byte = data[1]; |
| + second_byte = data[2]; |
| + opcode = data[3]; |
| + } |
| + |
| + // Check if this instruction match the expectations that we have for it. |
| + // |
| + // It compares the value of several fields that can have an impact on the |
| + // instruction size and make sure that they have the expected value. |
| + // |
| + // @param expected_rxb The expected value for |rxb|. |
| + // @param expected_we The expected value for |we|. |
| + // @param expected_l The expected value for |l|. |
| + // @param expected_pp The expected value for |pp|. |
| + // @returns true if all the expectations are met, false otherwise. |
| + bool MatchExpectations(uint8_t expected_rxb, |
| + uint8_t expected_we, |
| + uint8_t expected_l, |
| + uint8_t expected_pp, |
| + const char* instruction); |
| + |
| + // First byte, contains the RXB value and map_select. |
| + union { |
| + uint8_t first_byte; |
| + struct { |
| + uint8_t map_select : 5; |
| + uint8_t rxb : 3; |
|
huangs
2017/04/28 21:58:04
|rxb| is inverted, and should be labelled as such,
Sébastien Marchand
2017/05/01 16:04:31
Done.
|
| + }; |
| + }; |
| + // Second byte, contains the W/E, ~vvvv, L and pp values. |
| + union { |
| + uint8_t second_byte; |
| + struct { |
| + uint8_t pp : 2; |
| + uint8_t l : 1; |
| + uint8_t vvvv : 4; |
|
huangs
2017/04/28 21:58:04
|vvvv| is inverted, and should be labelled as such
Sébastien Marchand
2017/05/01 16:04:32
Done.
|
| + uint8_t w_e : 1; |
| + }; |
| + }; |
| + |
| + // Opcode of this instruction. |
| + uint8_t opcode; |
| +}; |
| + |
| +// Check if |value| is equal to |expected| value and log verbosely if it's not |
| +// the case. |
| +bool CheckField(uint8_t expected_value, |
| + uint8_t value, |
| + const char* field_name, |
| + const char* instruction) { |
| + if (expected_value != value) { |
| + LOG(ERROR) << "Unexpected " << field_name << " value for the " |
| + << instruction << " instruction, expecting 0x" << std::hex |
| + << static_cast<size_t>(expected_value) << " but got 0x" |
| + << static_cast<size_t>(value) << "."; |
|
huangs
2017/04/28 21:58:04
Add
<< std::dec
at end to restore LOG(ERROR)'s s
Sébastien Marchand
2017/05/01 16:04:31
Done.
|
| + return false; |
| + } |
| + return true; |
| +} |
| + |
| +bool ThreeBytesVexInstruction::MatchExpectations(uint8_t expected_rxb, |
| + uint8_t expected_we, |
| + uint8_t expected_l, |
| + uint8_t expected_pp, |
| + const char* instruction) { |
| + if (!CheckField(expected_rxb, rxb, "rxb", instruction)) |
| + return false; |
| + if (!CheckField(expected_we, w_e, "we", instruction)) |
| + return false; |
| + if (!CheckField(expected_l, l, "l", instruction)) |
| + return false; |
| + if (!CheckField(expected_pp, pp, "pp", instruction)) |
| + return false; |
| + return true; |
|
huangs
2017/04/28 21:58:04
Shorter to do
return CheckField(...) &&
CheckFi
Sébastien Marchand
2017/05/01 16:04:31
Splitting this into multiple checks make it easier
|
| +} |
| + |
| +// Return the size of a 3-byte VEX encoded instruction. |
| +// |
| +// NOTE: We only support the instructions that have been encountered in Chrome |
| +// and there's some restrictions on which variants of these instructions are |
| +// supported. |
| size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) { |
| - DCHECK_EQ(0xC4, ci->code[0]); |
| - // Switch case based on the opcode map used by this instruction. |
| - switch (ci->code[1] & 0x1F) { |
| + // A 3-byte VEX instructions has always a size of 5 bytes or more (the C4 |
| + // constant, the 3 VEX bytes and the opcode). |
| + DCHECK_GE(ci->codeLen, 5); |
| + |
| + ThreeBytesVexInstruction instruction(ci->code); |
| + ModRMByte modRMByte(ci->code[4]); |
| + |
| + const size_t kBaseSize = 4; |
| + size_t operand_size = 0; |
| + size_t constants_size = 0; |
| + |
| + // Switch case based on the opcode mp used by this instruction. |
| + switch (instruction.map_select) { |
| case 0x02: { |
| - switch (ci->code[3]) { |
| - case 0x13: return 5; // vcvtps2ps |
| - case 0x18: return 5; // vbroadcastss |
| - case 0x36: return 5; // vpermd |
| - case 0x58: return 6; // vpbroadcastd |
| - case 0x5A: return 6; // vbroadcasti128 |
| - case 0x78: return 5; // vpbroadcastb |
| - case 0x8C: return 5; // vpmaskmovd |
| - case 0x8E: return 5; // vpmaskmovd |
| - case 0x90: return 6; // vpgatherdd |
| + switch (instruction.opcode) { |
| + case 0x13: // vcvtph2ps |
| + if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtph2ps")) |
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| + break; |
| + case 0x18: // vbroadcastss |
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcastss")) |
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| + break; |
| + case 0x36: // vpermd |
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpermd")) |
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| + break; |
| + case 0x5A: // vbroadcasti128 |
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcasti128")) { |
|
huangs
2017/04/28 21:58:04
NIT: Inconsistent of {} for these if's.
Sébastien Marchand
2017/05/01 16:04:31
Done.
|
| + operand_size = GetModRMOperandBytesSize(modRMByte, true); |
| + } |
| + break; |
| + case 0x58: // vpbroadcastb |
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpbroadcastb")) |
|
huangs
2017/04/28 21:58:04
0x58 Should be vpbroadcastd (last letter).
Sébastien Marchand
2017/05/01 16:04:32
Done, thanks for spotting this!
|
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| + break; |
| + case 0x78: // vpbroadcastd |
|
huangs
2017/04/28 21:58:04
0x78 Should be vpbroadcastb (last letter).
Also,
Sébastien Marchand
2017/05/01 16:04:31
Done.
|
| + if (instruction.MatchExpectations(0b111, 0, 0, 1, "vpbroadcastd")) |
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| + break; |
| + case 0x8C: // vpmaskmovd |
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpmaskmovd")) { |
| + operand_size = GetModRMOperandBytesSize(modRMByte, true); |
| + } |
| + break; |
| + case 0x90: // vpgatherdd |
|
huangs
2017/04/28 21:58:04
case 0x8E disappeared? This is another vpmaskmovd
Sébastien Marchand
2017/05/01 16:04:31
Yeah, it wasn't properly unittested and I don't se
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpgatherdd")) { |
| + operand_size = GetModRMOperandBytesSize(modRMByte, true); |
| + } |
| + break; |
| default: |
| break; |
| } |
| break; |
| } |
| case 0x03: { |
| - switch (ci->code[3]) { |
| - case 0x00: return 6; // vpermq |
| - case 0x1D: return 6; // vcvtps2ph |
| - case 0x38: return 7; // vinserti128 |
| - case 0x39: return 6; // vextracti128 |
| + switch (instruction.opcode) { |
| + case 0x38: // vinserti128 |
|
huangs
2017/04/28 21:58:04
Sort?
Sébastien Marchand
2017/05/01 16:04:31
Done.
|
| + if (instruction.MatchExpectations(0b111, 0, 1, 1, "vinserti128")) { |
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| + constants_size = 1; |
| + } |
| + break; |
| + case 0x00: // vpermq |
| + if (instruction.MatchExpectations(0b111, 1, 1, 1, "vpermq")) { |
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| + constants_size = 1; |
| + } |
| + break; |
| + case 0x1D: // vcvtps2ph |
| + if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtps2ph")) { |
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| + constants_size = 1; |
| + } |
| + break; |
| + case 0x39: // vextracti128 |
| + if (instruction.MatchExpectations(0b111, 0, 0, 1, "vextracti128")) |
| + operand_size = GetModRMOperandBytesSize(modRMByte, false); |
| default: break; |
| } |
| break; |
| @@ -81,6 +327,9 @@ size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) { |
| break; |
| } |
| + if (operand_size != 0) |
| + return kBaseSize + operand_size + constants_size; |
| + |
| // Print the instructions that we haven't been able to decompose in a format |
| // that can easily be pasted into ODA (https://onlinedisassembler.com/). |
| const int kMaxBytes = 10; |
| @@ -150,10 +399,9 @@ bool HandleBadDecode(_CodeInfo* ci, |
| return true; |
| } |
| - } |
| - |
| - if (ci->code[0] == 0xC4) |
| + } else if (ci->code[0] == kThreeByteVexOpcode) { |
| size = Get3ByteVexEncodedInstructionSize(ci); |
| + } |
| if (size == 0) |
| return false; |