Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(274)

Unified Diff: syzygy/core/disassembler_util.cc

Issue 2841863003: Improve the decoding of the VEX encoded instructions. (Closed)
Patch Set: Add support for the Mod R/M byte. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « syzygy/core/core.gyp ('k') | syzygy/core/disassembler_util_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: syzygy/core/disassembler_util.cc
diff --git a/syzygy/core/disassembler_util.cc b/syzygy/core/disassembler_util.cc
index 7be5c8781853644f4d74abb1c39bed05a1411cca..d9e06e0f09979633bd074b21b022a0e43122fad0 100644
--- a/syzygy/core/disassembler_util.cc
+++ b/syzygy/core/disassembler_util.cc
@@ -24,55 +24,301 @@ namespace core {
namespace {
-// Return the size of a 3-byte VEX encoded instruction.
+// Opcode of the 3-byte VEX instructions.
+const uint8_t kThreeByteVexOpcode = 0xC4;
+
+// Structure representing a Mod R/M byte, it has the following format:
+// +---+---+---+---+---+---+---+---+
+// | mod |reg/opcode | r/m |
+// +---+---+---+---+---+---+---+---+
+//
+// Here's a description of the different fields (from
+// https://en.wikipedia.org/wiki/VEX_prefix):
+// - mod: combined with the r/m field, encodes either 8 registers or 2
+// addressing modes. Also encodes opcode information for some
+// instructions.
+// - reg/opcode: specifies either a register or three more bits of
+// opcode information, as specified in the primary opcode byte.
+// - r/m: can specify a register as an operand, or combine with the mod
+// field to encode an addressing mode.
+//
+// The |mod| field can have the following values:
+// - 0b00: Register indirect addressing mode or SIB with no displacement
+// (if R/M = 0b100) or displacement only addressing mode (if R/M = 0b101).
+// - 0b01: One-byte signed displacement follows addressing mode byte(s).
+// - 0b10: Four-byte signed displacement follows addressing mode byte(s).
+// - 0b11: Register addressing mode.
+struct ModRMByte {
+ // Constructor.
+ // @param value The Value used to initialize this Mod R/M byte.
+ explicit ModRMByte(uint8_t value) : raw_value(value) {}
+
+ union {
+ uint8_t raw_value;
+ struct {
+ uint8_t r_m : 3;
+ uint8_t reg_or_opcode : 3;
+ uint8_t mod : 2;
+ };
+ };
+};
+
+// Calculates the number of bytes used to encode a Mod R/M operand.
+// @param modRMByte The Mod R/M byte.
+// @param no_register_addressing_mode Indicates if the instruction supports
+// the register addressing mode (value of |mod| of 0b11).
+// @returns the total size of this Mod R/M operand (in bytes), 0 on failure.
+size_t GetModRMOperandBytesSize(const ModRMByte& modRMByte,
+ bool no_register_addressing_mode) {
+ // If the SIB (Scale*Index+Base) bit is set then the operand uses an
huangs 2017/04/28 21:58:04 NIT: SIB is not a bit, but a special value. Maybe
Sébastien Marchand 2017/05/01 16:04:32 Done.
+ // additional SIB byte.
+ const uint8_t kSIBMask = 0b100;
huangs 2017/04/28 21:58:04 NIT: SIB is not a mask; 0b101, 0b110, 0b111 are un
Sébastien Marchand 2017/05/01 16:04:31 Ha, good point :)
+
+ switch (modRMByte.mod) {
+ case 0b00: {
+ if (modRMByte.r_m == kSIBMask) {
+ // SIB with no displacement, e.g.:
+ // vpbroadcastb xmm2, BYTE PTR [edx+eax*2]
huangs 2017/04/28 21:58:04 NIT: Inconsistent tabbing for disassembly examples
Sébastien Marchand 2017/05/01 16:04:31 Done.
+ return 2;
huangs 2017/04/28 21:58:04 Depending how far you want to go down the rabbit h
Sébastien Marchand 2017/05/01 16:04:31 Thanks! This is a simple special case and it doesn
+ } else if (modRMByte.r_m == 0b101) {
huangs 2017/04/28 21:58:04 Don't need "else" if returning? Same below.
Sébastien Marchand 2017/05/01 16:04:32 Done.
+ // Displacement only addressing mode, e.g.:
+ // vpbroadcastb xmm2, BYTE PTR ds:0x12345678
+ return 5;
+ } else {
+ // Register indirect addressing mode, e.g.:
+ // vpbroadcastb xmm2, BYTE PTR [eax]
+ return 1;
+ }
+ }
+ case 0b01: {
+ // One-byte displacement.
+ if (modRMByte.r_m == kSIBMask) {
+ // Additional SIB byte, e.g.:
+ // vpbroadcastb xmm2, BYTE PTR [eax+edx*1+0x42]
+ return 3;
+ } else {
+ // No SIB byte, e.g.:
+ // vpbroadcastb xmm2, BYTE PTR [eax+0x42]
+ return 2;
+ }
+ }
+ case 0b10: {
+ // One-byte displacement.
+ if (modRMByte.r_m == kSIBMask) {
+ // Additional SIB byte, e.g.:
+ // vpbroadcastb xmm0, BYTE PTR [edx+edx*1+0x12345678]
+ return 6;
+ } else {
+ // No SIB byte, e.g.:
+ // vpbroadcastb xmm0, BYTE PTR [eax+0x34567812]
+ return 5;
+ }
+ }
+ case 0b11:
+ // Register addressing mode, e.g.:
+ // vpbroadcastb xmm2, BYTE PTR [eax]
+ if (no_register_addressing_mode) {
+ LOG(ERROR) << "Unexpected |mod| value of 0b11 for an instruction that "
+ << "doesn't support it.";
+ return 0;
+ }
+ return 1;
+ default:
+ NOTREACHED();
+ }
+
+ return 0;
+}
+
+// Structure representing a 3-byte VEX encoded instruction.
//
// The layout of these instructions is as follows, starting with a byte with
// value 0xC4:
-// - First byte:
+// - Opcode indicating that this is a 3-byte VEX instruction:
// +---+---+---+---+---+---+---+---+
// | 1 1 0 0 0 1 0 0 |
// +---+---+---+---+---+---+---+---+
-// - Second byte:
+// - First byte:
// +---+---+---+---+---+---+---+---+
// |~R |~X |~B | map_select |
// +---+---+---+---+---+---+---+---+
-// - Third byte:
+// - Second byte:
// +---+---+---+---+---+---+---+---+
// |W/E| ~vvvv | L | pp |
// +---+---+---+---+---+---+---+---+
-// - Fourth byte: The opcode for this instruction.
+// - Third byte: The opcode for this instruction.
//
-// |map_select| Indicates the opcode map that should be used for this
-// instruction.
+// If this instructions takes some operands then it's followed by a ModR/M byte
+// and some optional bytes to represent the operand. We don't represent these
+// optional bytes here.
//
-// See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix
+// See
+// http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefix
// for more details.
+struct ThreeBytesVexInstruction {
+ explicit ThreeBytesVexInstruction(const uint8_t* data) {
+ DCHECK_NE(nullptr, data);
+ CHECK_EQ(kThreeByteVexOpcode, data[0]);
+ first_byte = data[1];
+ second_byte = data[2];
+ opcode = data[3];
+ }
+
+ // Check if this instruction match the expectations that we have for it.
+ //
+ // It compares the value of several fields that can have an impact on the
+ // instruction size and make sure that they have the expected value.
+ //
+ // @param expected_rxb The expected value for |rxb|.
+ // @param expected_we The expected value for |we|.
+ // @param expected_l The expected value for |l|.
+ // @param expected_pp The expected value for |pp|.
+ // @returns true if all the expectations are met, false otherwise.
+ bool MatchExpectations(uint8_t expected_rxb,
+ uint8_t expected_we,
+ uint8_t expected_l,
+ uint8_t expected_pp,
+ const char* instruction);
+
+ // First byte, contains the RXB value and map_select.
+ union {
+ uint8_t first_byte;
+ struct {
+ uint8_t map_select : 5;
+ uint8_t rxb : 3;
huangs 2017/04/28 21:58:04 |rxb| is inverted, and should be labelled as such,
Sébastien Marchand 2017/05/01 16:04:31 Done.
+ };
+ };
+ // Second byte, contains the W/E, ~vvvv, L and pp values.
+ union {
+ uint8_t second_byte;
+ struct {
+ uint8_t pp : 2;
+ uint8_t l : 1;
+ uint8_t vvvv : 4;
huangs 2017/04/28 21:58:04 |vvvv| is inverted, and should be labelled as such
Sébastien Marchand 2017/05/01 16:04:32 Done.
+ uint8_t w_e : 1;
+ };
+ };
+
+ // Opcode of this instruction.
+ uint8_t opcode;
+};
+
+// Check if |value| is equal to |expected| value and log verbosely if it's not
+// the case.
+bool CheckField(uint8_t expected_value,
+ uint8_t value,
+ const char* field_name,
+ const char* instruction) {
+ if (expected_value != value) {
+ LOG(ERROR) << "Unexpected " << field_name << " value for the "
+ << instruction << " instruction, expecting 0x" << std::hex
+ << static_cast<size_t>(expected_value) << " but got 0x"
+ << static_cast<size_t>(value) << ".";
huangs 2017/04/28 21:58:04 Add << std::dec at end to restore LOG(ERROR)'s s
Sébastien Marchand 2017/05/01 16:04:31 Done.
+ return false;
+ }
+ return true;
+}
+
+bool ThreeBytesVexInstruction::MatchExpectations(uint8_t expected_rxb,
+ uint8_t expected_we,
+ uint8_t expected_l,
+ uint8_t expected_pp,
+ const char* instruction) {
+ if (!CheckField(expected_rxb, rxb, "rxb", instruction))
+ return false;
+ if (!CheckField(expected_we, w_e, "we", instruction))
+ return false;
+ if (!CheckField(expected_l, l, "l", instruction))
+ return false;
+ if (!CheckField(expected_pp, pp, "pp", instruction))
+ return false;
+ return true;
huangs 2017/04/28 21:58:04 Shorter to do return CheckField(...) && CheckFi
Sébastien Marchand 2017/05/01 16:04:31 Splitting this into multiple checks make it easier
+}
+
+// Return the size of a 3-byte VEX encoded instruction.
+//
+// NOTE: We only support the instructions that have been encountered in Chrome
+// and there's some restrictions on which variants of these instructions are
+// supported.
size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
- DCHECK_EQ(0xC4, ci->code[0]);
- // Switch case based on the opcode map used by this instruction.
- switch (ci->code[1] & 0x1F) {
+ // A 3-byte VEX instructions has always a size of 5 bytes or more (the C4
+ // constant, the 3 VEX bytes and the opcode).
+ DCHECK_GE(ci->codeLen, 5);
+
+ ThreeBytesVexInstruction instruction(ci->code);
+ ModRMByte modRMByte(ci->code[4]);
+
+ const size_t kBaseSize = 4;
+ size_t operand_size = 0;
+ size_t constants_size = 0;
+
+ // Switch case based on the opcode mp used by this instruction.
+ switch (instruction.map_select) {
case 0x02: {
- switch (ci->code[3]) {
- case 0x13: return 5; // vcvtps2ps
- case 0x18: return 5; // vbroadcastss
- case 0x36: return 5; // vpermd
- case 0x58: return 6; // vpbroadcastd
- case 0x5A: return 6; // vbroadcasti128
- case 0x78: return 5; // vpbroadcastb
- case 0x8C: return 5; // vpmaskmovd
- case 0x8E: return 5; // vpmaskmovd
- case 0x90: return 6; // vpgatherdd
+ switch (instruction.opcode) {
+ case 0x13: // vcvtph2ps
+ if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtph2ps"))
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
+ break;
+ case 0x18: // vbroadcastss
+ if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcastss"))
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
+ break;
+ case 0x36: // vpermd
+ if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpermd"))
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
+ break;
+ case 0x5A: // vbroadcasti128
+ if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcasti128")) {
huangs 2017/04/28 21:58:04 NIT: Inconsistent of {} for these if's.
Sébastien Marchand 2017/05/01 16:04:31 Done.
+ operand_size = GetModRMOperandBytesSize(modRMByte, true);
+ }
+ break;
+ case 0x58: // vpbroadcastb
+ if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpbroadcastb"))
huangs 2017/04/28 21:58:04 0x58 Should be vpbroadcastd (last letter).
Sébastien Marchand 2017/05/01 16:04:32 Done, thanks for spotting this!
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
+ break;
+ case 0x78: // vpbroadcastd
huangs 2017/04/28 21:58:04 0x78 Should be vpbroadcastb (last letter). Also,
Sébastien Marchand 2017/05/01 16:04:31 Done.
+ if (instruction.MatchExpectations(0b111, 0, 0, 1, "vpbroadcastd"))
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
+ break;
+ case 0x8C: // vpmaskmovd
+ if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpmaskmovd")) {
+ operand_size = GetModRMOperandBytesSize(modRMByte, true);
+ }
+ break;
+ case 0x90: // vpgatherdd
huangs 2017/04/28 21:58:04 case 0x8E disappeared? This is another vpmaskmovd
Sébastien Marchand 2017/05/01 16:04:31 Yeah, it wasn't properly unittested and I don't se
+ if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpgatherdd")) {
+ operand_size = GetModRMOperandBytesSize(modRMByte, true);
+ }
+ break;
default:
break;
}
break;
}
case 0x03: {
- switch (ci->code[3]) {
- case 0x00: return 6; // vpermq
- case 0x1D: return 6; // vcvtps2ph
- case 0x38: return 7; // vinserti128
- case 0x39: return 6; // vextracti128
+ switch (instruction.opcode) {
+ case 0x38: // vinserti128
huangs 2017/04/28 21:58:04 Sort?
Sébastien Marchand 2017/05/01 16:04:31 Done.
+ if (instruction.MatchExpectations(0b111, 0, 1, 1, "vinserti128")) {
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
+ constants_size = 1;
+ }
+ break;
+ case 0x00: // vpermq
+ if (instruction.MatchExpectations(0b111, 1, 1, 1, "vpermq")) {
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
+ constants_size = 1;
+ }
+ break;
+ case 0x1D: // vcvtps2ph
+ if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtps2ph")) {
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
+ constants_size = 1;
+ }
+ break;
+ case 0x39: // vextracti128
+ if (instruction.MatchExpectations(0b111, 0, 0, 1, "vextracti128"))
+ operand_size = GetModRMOperandBytesSize(modRMByte, false);
default: break;
}
break;
@@ -81,6 +327,9 @@ size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
break;
}
+ if (operand_size != 0)
+ return kBaseSize + operand_size + constants_size;
+
// Print the instructions that we haven't been able to decompose in a format
// that can easily be pasted into ODA (https://onlinedisassembler.com/).
const int kMaxBytes = 10;
@@ -150,10 +399,9 @@ bool HandleBadDecode(_CodeInfo* ci,
return true;
}
- }
-
- if (ci->code[0] == 0xC4)
+ } else if (ci->code[0] == kThreeByteVexOpcode) {
size = Get3ByteVexEncodedInstructionSize(ci);
+ }
if (size == 0)
return false;
« no previous file with comments | « syzygy/core/core.gyp ('k') | syzygy/core/disassembler_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698