Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: syzygy/core/disassembler_util.cc

Issue 2841863003: Improve the decoding of the VEX encoded instructions. (Closed)
Patch Set: Add support for the Mod R/M byte. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « syzygy/core/core.gyp ('k') | syzygy/core/disassembler_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 Google Inc. All Rights Reserved. 1 // Copyright 2012 Google Inc. All Rights Reserved.
2 // 2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); 3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License. 4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at 5 // You may obtain a copy of the License at
6 // 6 //
7 // http://www.apache.org/licenses/LICENSE-2.0 7 // http://www.apache.org/licenses/LICENSE-2.0
8 // 8 //
9 // Unless required by applicable law or agreed to in writing, software 9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, 10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and 12 // See the License for the specific language governing permissions and
13 // limitations under the License. 13 // limitations under the License.
14 14
15 #include "syzygy/core/disassembler_util.h" 15 #include "syzygy/core/disassembler_util.h"
16 16
17 #include <algorithm> 17 #include <algorithm>
18 18
19 #include "base/logging.h" 19 #include "base/logging.h"
20 #include "base/strings/stringprintf.h" 20 #include "base/strings/stringprintf.h"
21 #include "mnemonics.h" // NOLINT 21 #include "mnemonics.h" // NOLINT
22 22
23 namespace core { 23 namespace core {
24 24
25 namespace { 25 namespace {
26 26
27 // Return the size of a 3-byte VEX encoded instruction. 27 // Opcode of the 3-byte VEX instructions.
28 const uint8_t kThreeByteVexOpcode = 0xC4;
29
30 // Structure representing a Mod R/M byte, it has the following format:
31 // +---+---+---+---+---+---+---+---+
32 // | mod |reg/opcode | r/m |
33 // +---+---+---+---+---+---+---+---+
34 //
35 // Here's a description of the different fields (from
36 // https://en.wikipedia.org/wiki/VEX_prefix):
37 // - mod: combined with the r/m field, encodes either 8 registers or 2
38 // addressing modes. Also encodes opcode information for some
39 // instructions.
40 // - reg/opcode: specifies either a register or three more bits of
41 // opcode information, as specified in the primary opcode byte.
42 // - r/m: can specify a register as an operand, or combine with the mod
43 // field to encode an addressing mode.
44 //
45 // The |mod| field can have the following values:
46 // - 0b00: Register indirect addressing mode or SIB with no displacement
47 // (if R/M = 0b100) or displacement only addressing mode (if R/M = 0b101).
48 // - 0b01: One-byte signed displacement follows addressing mode byte(s).
49 // - 0b10: Four-byte signed displacement follows addressing mode byte(s).
50 // - 0b11: Register addressing mode.
51 struct ModRMByte {
52 // Constructor.
53 // @param value The Value used to initialize this Mod R/M byte.
54 explicit ModRMByte(uint8_t value) : raw_value(value) {}
55
56 union {
57 uint8_t raw_value;
58 struct {
59 uint8_t r_m : 3;
60 uint8_t reg_or_opcode : 3;
61 uint8_t mod : 2;
62 };
63 };
64 };
65
66 // Calculates the number of bytes used to encode a Mod R/M operand.
67 // @param modRMByte The Mod R/M byte.
68 // @param no_register_addressing_mode Indicates if the instruction supports
69 // the register addressing mode (value of |mod| of 0b11).
70 // @returns the total size of this Mod R/M operand (in bytes), 0 on failure.
71 size_t GetModRMOperandBytesSize(const ModRMByte& modRMByte,
72 bool no_register_addressing_mode) {
73 // If the SIB (Scale*Index+Base) bit is set then the operand uses an
huangs 2017/04/28 21:58:04 NIT: SIB is not a bit, but a special value. Maybe
Sébastien Marchand 2017/05/01 16:04:32 Done.
74 // additional SIB byte.
75 const uint8_t kSIBMask = 0b100;
huangs 2017/04/28 21:58:04 NIT: SIB is not a mask; 0b101, 0b110, 0b111 are un
Sébastien Marchand 2017/05/01 16:04:31 Ha, good point :)
76
77 switch (modRMByte.mod) {
78 case 0b00: {
79 if (modRMByte.r_m == kSIBMask) {
80 // SIB with no displacement, e.g.:
81 // vpbroadcastb xmm2, BYTE PTR [edx+eax*2]
huangs 2017/04/28 21:58:04 NIT: Inconsistent tabbing for disassembly examples
Sébastien Marchand 2017/05/01 16:04:31 Done.
82 return 2;
huangs 2017/04/28 21:58:04 Depending how far you want to go down the rabbit h
Sébastien Marchand 2017/05/01 16:04:31 Thanks! This is a simple special case and it doesn
83 } else if (modRMByte.r_m == 0b101) {
huangs 2017/04/28 21:58:04 Don't need "else" if returning? Same below.
Sébastien Marchand 2017/05/01 16:04:32 Done.
84 // Displacement only addressing mode, e.g.:
85 // vpbroadcastb xmm2, BYTE PTR ds:0x12345678
86 return 5;
87 } else {
88 // Register indirect addressing mode, e.g.:
89 // vpbroadcastb xmm2, BYTE PTR [eax]
90 return 1;
91 }
92 }
93 case 0b01: {
94 // One-byte displacement.
95 if (modRMByte.r_m == kSIBMask) {
96 // Additional SIB byte, e.g.:
97 // vpbroadcastb xmm2, BYTE PTR [eax+edx*1+0x42]
98 return 3;
99 } else {
100 // No SIB byte, e.g.:
101 // vpbroadcastb xmm2, BYTE PTR [eax+0x42]
102 return 2;
103 }
104 }
105 case 0b10: {
106 // One-byte displacement.
107 if (modRMByte.r_m == kSIBMask) {
108 // Additional SIB byte, e.g.:
109 // vpbroadcastb xmm0, BYTE PTR [edx+edx*1+0x12345678]
110 return 6;
111 } else {
112 // No SIB byte, e.g.:
113 // vpbroadcastb xmm0, BYTE PTR [eax+0x34567812]
114 return 5;
115 }
116 }
117 case 0b11:
118 // Register addressing mode, e.g.:
119 // vpbroadcastb xmm2, BYTE PTR [eax]
120 if (no_register_addressing_mode) {
121 LOG(ERROR) << "Unexpected |mod| value of 0b11 for an instruction that "
122 << "doesn't support it.";
123 return 0;
124 }
125 return 1;
126 default:
127 NOTREACHED();
128 }
129
130 return 0;
131 }
132
133 // Structure representing a 3-byte VEX encoded instruction.
28 // 134 //
29 // The layout of these instructions is as follows, starting with a byte with 135 // The layout of these instructions is as follows, starting with a byte with
30 // value 0xC4: 136 // value 0xC4:
137 // - Opcode indicating that this is a 3-byte VEX instruction:
138 // +---+---+---+---+---+---+---+---+
139 // | 1 1 0 0 0 1 0 0 |
140 // +---+---+---+---+---+---+---+---+
31 // - First byte: 141 // - First byte:
32 // +---+---+---+---+---+---+---+---+ 142 // +---+---+---+---+---+---+---+---+
33 // | 1 1 0 0 0 1 0 0 | 143 // |~R |~X |~B | map_select |
34 // +---+---+---+---+---+---+---+---+ 144 // +---+---+---+---+---+---+---+---+
35 // - Second byte: 145 // - Second byte:
36 // +---+---+---+---+---+---+---+---+ 146 // +---+---+---+---+---+---+---+---+
37 // |~R |~X |~B | map_select |
38 // +---+---+---+---+---+---+---+---+
39 // - Third byte:
40 // +---+---+---+---+---+---+---+---+
41 // |W/E| ~vvvv | L | pp | 147 // |W/E| ~vvvv | L | pp |
42 // +---+---+---+---+---+---+---+---+ 148 // +---+---+---+---+---+---+---+---+
43 // - Fourth byte: The opcode for this instruction. 149 // - Third byte: The opcode for this instruction.
44 // 150 //
45 // |map_select| Indicates the opcode map that should be used for this 151 // If this instructions takes some operands then it's followed by a ModR/M byte
46 // instruction. 152 // and some optional bytes to represent the operand. We don't represent these
47 // 153 // optional bytes here.
48 // See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_p refix 154 //
155 // See
156 // http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefi x
49 // for more details. 157 // for more details.
158 struct ThreeBytesVexInstruction {
159 explicit ThreeBytesVexInstruction(const uint8_t* data) {
160 DCHECK_NE(nullptr, data);
161 CHECK_EQ(kThreeByteVexOpcode, data[0]);
162 first_byte = data[1];
163 second_byte = data[2];
164 opcode = data[3];
165 }
166
167 // Check if this instruction match the expectations that we have for it.
168 //
169 // It compares the value of several fields that can have an impact on the
170 // instruction size and make sure that they have the expected value.
171 //
172 // @param expected_rxb The expected value for |rxb|.
173 // @param expected_we The expected value for |we|.
174 // @param expected_l The expected value for |l|.
175 // @param expected_pp The expected value for |pp|.
176 // @returns true if all the expectations are met, false otherwise.
177 bool MatchExpectations(uint8_t expected_rxb,
178 uint8_t expected_we,
179 uint8_t expected_l,
180 uint8_t expected_pp,
181 const char* instruction);
182
183 // First byte, contains the RXB value and map_select.
184 union {
185 uint8_t first_byte;
186 struct {
187 uint8_t map_select : 5;
188 uint8_t rxb : 3;
huangs 2017/04/28 21:58:04 |rxb| is inverted, and should be labelled as such,
Sébastien Marchand 2017/05/01 16:04:31 Done.
189 };
190 };
191 // Second byte, contains the W/E, ~vvvv, L and pp values.
192 union {
193 uint8_t second_byte;
194 struct {
195 uint8_t pp : 2;
196 uint8_t l : 1;
197 uint8_t vvvv : 4;
huangs 2017/04/28 21:58:04 |vvvv| is inverted, and should be labelled as such
Sébastien Marchand 2017/05/01 16:04:32 Done.
198 uint8_t w_e : 1;
199 };
200 };
201
202 // Opcode of this instruction.
203 uint8_t opcode;
204 };
205
206 // Check if |value| is equal to |expected| value and log verbosely if it's not
207 // the case.
208 bool CheckField(uint8_t expected_value,
209 uint8_t value,
210 const char* field_name,
211 const char* instruction) {
212 if (expected_value != value) {
213 LOG(ERROR) << "Unexpected " << field_name << " value for the "
214 << instruction << " instruction, expecting 0x" << std::hex
215 << static_cast<size_t>(expected_value) << " but got 0x"
216 << static_cast<size_t>(value) << ".";
huangs 2017/04/28 21:58:04 Add << std::dec at end to restore LOG(ERROR)'s s
Sébastien Marchand 2017/05/01 16:04:31 Done.
217 return false;
218 }
219 return true;
220 }
221
222 bool ThreeBytesVexInstruction::MatchExpectations(uint8_t expected_rxb,
223 uint8_t expected_we,
224 uint8_t expected_l,
225 uint8_t expected_pp,
226 const char* instruction) {
227 if (!CheckField(expected_rxb, rxb, "rxb", instruction))
228 return false;
229 if (!CheckField(expected_we, w_e, "we", instruction))
230 return false;
231 if (!CheckField(expected_l, l, "l", instruction))
232 return false;
233 if (!CheckField(expected_pp, pp, "pp", instruction))
234 return false;
235 return true;
huangs 2017/04/28 21:58:04 Shorter to do return CheckField(...) && CheckFi
Sébastien Marchand 2017/05/01 16:04:31 Splitting this into multiple checks make it easier
236 }
237
238 // Return the size of a 3-byte VEX encoded instruction.
239 //
240 // NOTE: We only support the instructions that have been encountered in Chrome
241 // and there's some restrictions on which variants of these instructions are
242 // supported.
50 size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) { 243 size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) {
51 DCHECK_EQ(0xC4, ci->code[0]); 244 // A 3-byte VEX instructions has always a size of 5 bytes or more (the C4
52 // Switch case based on the opcode map used by this instruction. 245 // constant, the 3 VEX bytes and the opcode).
53 switch (ci->code[1] & 0x1F) { 246 DCHECK_GE(ci->codeLen, 5);
247
248 ThreeBytesVexInstruction instruction(ci->code);
249 ModRMByte modRMByte(ci->code[4]);
250
251 const size_t kBaseSize = 4;
252 size_t operand_size = 0;
253 size_t constants_size = 0;
254
255 // Switch case based on the opcode mp used by this instruction.
256 switch (instruction.map_select) {
54 case 0x02: { 257 case 0x02: {
55 switch (ci->code[3]) { 258 switch (instruction.opcode) {
56 case 0x13: return 5; // vcvtps2ps 259 case 0x13: // vcvtph2ps
57 case 0x18: return 5; // vbroadcastss 260 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtph2ps"))
58 case 0x36: return 5; // vpermd 261 operand_size = GetModRMOperandBytesSize(modRMByte, false);
59 case 0x58: return 6; // vpbroadcastd 262 break;
60 case 0x5A: return 6; // vbroadcasti128 263 case 0x18: // vbroadcastss
61 case 0x78: return 5; // vpbroadcastb 264 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcastss"))
62 case 0x8C: return 5; // vpmaskmovd 265 operand_size = GetModRMOperandBytesSize(modRMByte, false);
63 case 0x8E: return 5; // vpmaskmovd 266 break;
64 case 0x90: return 6; // vpgatherdd 267 case 0x36: // vpermd
268 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpermd"))
269 operand_size = GetModRMOperandBytesSize(modRMByte, false);
270 break;
271 case 0x5A: // vbroadcasti128
272 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcasti128")) {
huangs 2017/04/28 21:58:04 NIT: Inconsistent of {} for these if's.
Sébastien Marchand 2017/05/01 16:04:31 Done.
273 operand_size = GetModRMOperandBytesSize(modRMByte, true);
274 }
275 break;
276 case 0x58: // vpbroadcastb
277 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpbroadcastb"))
huangs 2017/04/28 21:58:04 0x58 Should be vpbroadcastd (last letter).
Sébastien Marchand 2017/05/01 16:04:32 Done, thanks for spotting this!
278 operand_size = GetModRMOperandBytesSize(modRMByte, false);
279 break;
280 case 0x78: // vpbroadcastd
huangs 2017/04/28 21:58:04 0x78 Should be vpbroadcastb (last letter). Also,
Sébastien Marchand 2017/05/01 16:04:31 Done.
281 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vpbroadcastd"))
282 operand_size = GetModRMOperandBytesSize(modRMByte, false);
283 break;
284 case 0x8C: // vpmaskmovd
285 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpmaskmovd")) {
286 operand_size = GetModRMOperandBytesSize(modRMByte, true);
287 }
288 break;
289 case 0x90: // vpgatherdd
huangs 2017/04/28 21:58:04 case 0x8E disappeared? This is another vpmaskmovd
Sébastien Marchand 2017/05/01 16:04:31 Yeah, it wasn't properly unittested and I don't se
290 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpgatherdd")) {
291 operand_size = GetModRMOperandBytesSize(modRMByte, true);
292 }
293 break;
65 default: 294 default:
66 break; 295 break;
67 } 296 }
68 break; 297 break;
69 } 298 }
70 case 0x03: { 299 case 0x03: {
71 switch (ci->code[3]) { 300 switch (instruction.opcode) {
72 case 0x00: return 6; // vpermq 301 case 0x38: // vinserti128
huangs 2017/04/28 21:58:04 Sort?
Sébastien Marchand 2017/05/01 16:04:31 Done.
73 case 0x1D: return 6; // vcvtps2ph 302 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vinserti128")) {
74 case 0x38: return 7; // vinserti128 303 operand_size = GetModRMOperandBytesSize(modRMByte, false);
75 case 0x39: return 6; // vextracti128 304 constants_size = 1;
305 }
306 break;
307 case 0x00: // vpermq
308 if (instruction.MatchExpectations(0b111, 1, 1, 1, "vpermq")) {
309 operand_size = GetModRMOperandBytesSize(modRMByte, false);
310 constants_size = 1;
311 }
312 break;
313 case 0x1D: // vcvtps2ph
314 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtps2ph")) {
315 operand_size = GetModRMOperandBytesSize(modRMByte, false);
316 constants_size = 1;
317 }
318 break;
319 case 0x39: // vextracti128
320 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vextracti128"))
321 operand_size = GetModRMOperandBytesSize(modRMByte, false);
76 default: break; 322 default: break;
77 } 323 }
78 break; 324 break;
79 } 325 }
80 default: 326 default:
81 break; 327 break;
82 } 328 }
83 329
330 if (operand_size != 0)
331 return kBaseSize + operand_size + constants_size;
332
84 // Print the instructions that we haven't been able to decompose in a format 333 // Print the instructions that we haven't been able to decompose in a format
85 // that can easily be pasted into ODA (https://onlinedisassembler.com/). 334 // that can easily be pasted into ODA (https://onlinedisassembler.com/).
86 const int kMaxBytes = 10; 335 const int kMaxBytes = 10;
87 size_t byte_count = std::min(ci->codeLen, kMaxBytes); 336 size_t byte_count = std::min(ci->codeLen, kMaxBytes);
88 std::string instruction_bytes; 337 std::string instruction_bytes;
89 for (size_t i = 0; i < byte_count; ++i) { 338 for (size_t i = 0; i < byte_count; ++i) {
90 base::StringAppendF(&instruction_bytes, "%02X", ci->code[i]); 339 base::StringAppendF(&instruction_bytes, "%02X", ci->code[i]);
91 if (i != byte_count - 1) 340 if (i != byte_count - 1)
92 instruction_bytes += " "; 341 instruction_bytes += " ";
93 } 342 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
143 CHECK_EQ(O_NONE, result->ops[3].type); 392 CHECK_EQ(O_NONE, result->ops[3].type);
144 393
145 --result->addr; 394 --result->addr;
146 ++result->size; 395 ++result->size;
147 396
148 *used_instructions_count = 1; 397 *used_instructions_count = 1;
149 *ret = DECRES_SUCCESS; 398 *ret = DECRES_SUCCESS;
150 399
151 return true; 400 return true;
152 } 401 }
402 } else if (ci->code[0] == kThreeByteVexOpcode) {
403 size = Get3ByteVexEncodedInstructionSize(ci);
153 } 404 }
154 405
155 if (ci->code[0] == 0xC4)
156 size = Get3ByteVexEncodedInstructionSize(ci);
157
158 if (size == 0) 406 if (size == 0)
159 return false; 407 return false;
160 408
161 // We set the bare minimum properties that are required for any 409 // We set the bare minimum properties that are required for any
162 // subsequent processing that we perform. 410 // subsequent processing that we perform.
163 411
164 *used_instructions_count = 1; 412 *used_instructions_count = 1;
165 413
166 ::memset(result, 0, sizeof(result[0])); 414 ::memset(result, 0, sizeof(result[0]));
167 result[0].addr = ci->codeOffset; 415 result[0].addr = ci->codeOffset;
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after
444 692
445 default: return assm::kRegisterNone; 693 default: return assm::kRegisterNone;
446 } 694 }
447 } 695 }
448 696
449 const Register& GetRegister(uint32_t distorm_reg_type) { 697 const Register& GetRegister(uint32_t distorm_reg_type) {
450 return Register::Get(GetRegisterId(distorm_reg_type)); 698 return Register::Get(GetRegisterId(distorm_reg_type));
451 } 699 }
452 700
453 } // namespace core 701 } // namespace core
OLDNEW
« no previous file with comments | « syzygy/core/core.gyp ('k') | syzygy/core/disassembler_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698