OLD | NEW |
---|---|
1 // Copyright 2012 Google Inc. All Rights Reserved. | 1 // Copyright 2012 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Licensed under the Apache License, Version 2.0 (the "License"); | 3 // Licensed under the Apache License, Version 2.0 (the "License"); |
4 // you may not use this file except in compliance with the License. | 4 // you may not use this file except in compliance with the License. |
5 // You may obtain a copy of the License at | 5 // You may obtain a copy of the License at |
6 // | 6 // |
7 // http://www.apache.org/licenses/LICENSE-2.0 | 7 // http://www.apache.org/licenses/LICENSE-2.0 |
8 // | 8 // |
9 // Unless required by applicable law or agreed to in writing, software | 9 // Unless required by applicable law or agreed to in writing, software |
10 // distributed under the License is distributed on an "AS IS" BASIS, | 10 // distributed under the License is distributed on an "AS IS" BASIS, |
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 // See the License for the specific language governing permissions and | 12 // See the License for the specific language governing permissions and |
13 // limitations under the License. | 13 // limitations under the License. |
14 | 14 |
15 #include "syzygy/core/disassembler_util.h" | 15 #include "syzygy/core/disassembler_util.h" |
16 | 16 |
17 #include <algorithm> | 17 #include <algorithm> |
18 | 18 |
19 #include "base/logging.h" | 19 #include "base/logging.h" |
20 #include "base/strings/stringprintf.h" | 20 #include "base/strings/stringprintf.h" |
21 #include "mnemonics.h" // NOLINT | 21 #include "mnemonics.h" // NOLINT |
22 | 22 |
23 namespace core { | 23 namespace core { |
24 | 24 |
25 namespace { | 25 namespace { |
26 | 26 |
27 // Return the size of a 3-byte VEX encoded instruction. | 27 // Opcode of the 3-byte VEX instructions. |
28 const uint8_t kThreeByteVexOpcode = 0xC4; | |
29 | |
30 // Structure representing a Mod R/M byte, it has the following format: | |
31 // +---+---+---+---+---+---+---+---+ | |
32 // | mod |reg/opcode | r/m | | |
33 // +---+---+---+---+---+---+---+---+ | |
34 // | |
35 // Here's a description of the different fields (from | |
36 // https://en.wikipedia.org/wiki/VEX_prefix): | |
37 // - mod: combined with the r/m field, encodes either 8 registers or 2 | |
38 // addressing modes. Also encodes opcode information for some | |
39 // instructions. | |
40 // - reg/opcode: specifies either a register or three more bits of | |
41 // opcode information, as specified in the primary opcode byte. | |
42 // - r/m: can specify a register as an operand, or combine with the mod | |
43 // field to encode an addressing mode. | |
44 // | |
45 // The |mod| field can have the following values: | |
46 // - 0b00: Register indirect addressing mode or SIB with no displacement | |
47 // (if R/M = 0b100) or displacement only addressing mode (if R/M = 0b101). | |
48 // - 0b01: One-byte signed displacement follows addressing mode byte(s). | |
49 // - 0b10: Four-byte signed displacement follows addressing mode byte(s). | |
50 // - 0b11: Register addressing mode. | |
51 struct ModRMByte { | |
52 // Constructor. | |
53 // @param value The Value used to initialize this Mod R/M byte. | |
54 explicit ModRMByte(uint8_t value) : raw_value(value) {} | |
55 | |
56 union { | |
57 uint8_t raw_value; | |
58 struct { | |
59 uint8_t r_m : 3; | |
60 uint8_t reg_or_opcode : 3; | |
61 uint8_t mod : 2; | |
62 }; | |
63 }; | |
64 }; | |
65 | |
66 // Calculates the number of bytes used to encode a Mod R/M operand. | |
67 // @param modRMByte The Mod R/M byte. | |
68 // @param no_register_addressing_mode Indicates if the instruction supports | |
69 // the register addressing mode (value of |mod| of 0b11). | |
70 // @returns the total size of this Mod R/M operand (in bytes), 0 on failure. | |
71 size_t GetModRMOperandBytesSize(const ModRMByte& modRMByte, | |
72 bool no_register_addressing_mode) { | |
73 // If the SIB (Scale*Index+Base) bit is set then the operand uses an | |
huangs
2017/04/28 21:58:04
NIT: SIB is not a bit, but a special value. Maybe
Sébastien Marchand
2017/05/01 16:04:32
Done.
| |
74 // additional SIB byte. | |
75 const uint8_t kSIBMask = 0b100; | |
huangs
2017/04/28 21:58:04
NIT: SIB is not a mask; 0b101, 0b110, 0b111 are un
Sébastien Marchand
2017/05/01 16:04:31
Ha, good point :)
| |
76 | |
77 switch (modRMByte.mod) { | |
78 case 0b00: { | |
79 if (modRMByte.r_m == kSIBMask) { | |
80 // SIB with no displacement, e.g.: | |
81 // vpbroadcastb xmm2, BYTE PTR [edx+eax*2] | |
huangs
2017/04/28 21:58:04
NIT: Inconsistent tabbing for disassembly examples
Sébastien Marchand
2017/05/01 16:04:31
Done.
| |
82 return 2; | |
huangs
2017/04/28 21:58:04
Depending how far you want to go down the rabbit h
Sébastien Marchand
2017/05/01 16:04:31
Thanks! This is a simple special case and it doesn
| |
83 } else if (modRMByte.r_m == 0b101) { | |
huangs
2017/04/28 21:58:04
Don't need "else" if returning? Same below.
Sébastien Marchand
2017/05/01 16:04:32
Done.
| |
84 // Displacement only addressing mode, e.g.: | |
85 // vpbroadcastb xmm2, BYTE PTR ds:0x12345678 | |
86 return 5; | |
87 } else { | |
88 // Register indirect addressing mode, e.g.: | |
89 // vpbroadcastb xmm2, BYTE PTR [eax] | |
90 return 1; | |
91 } | |
92 } | |
93 case 0b01: { | |
94 // One-byte displacement. | |
95 if (modRMByte.r_m == kSIBMask) { | |
96 // Additional SIB byte, e.g.: | |
97 // vpbroadcastb xmm2, BYTE PTR [eax+edx*1+0x42] | |
98 return 3; | |
99 } else { | |
100 // No SIB byte, e.g.: | |
101 // vpbroadcastb xmm2, BYTE PTR [eax+0x42] | |
102 return 2; | |
103 } | |
104 } | |
105 case 0b10: { | |
106 // One-byte displacement. | |
107 if (modRMByte.r_m == kSIBMask) { | |
108 // Additional SIB byte, e.g.: | |
109 // vpbroadcastb xmm0, BYTE PTR [edx+edx*1+0x12345678] | |
110 return 6; | |
111 } else { | |
112 // No SIB byte, e.g.: | |
113 // vpbroadcastb xmm0, BYTE PTR [eax+0x34567812] | |
114 return 5; | |
115 } | |
116 } | |
117 case 0b11: | |
118 // Register addressing mode, e.g.: | |
119 // vpbroadcastb xmm2, BYTE PTR [eax] | |
120 if (no_register_addressing_mode) { | |
121 LOG(ERROR) << "Unexpected |mod| value of 0b11 for an instruction that " | |
122 << "doesn't support it."; | |
123 return 0; | |
124 } | |
125 return 1; | |
126 default: | |
127 NOTREACHED(); | |
128 } | |
129 | |
130 return 0; | |
131 } | |
132 | |
133 // Structure representing a 3-byte VEX encoded instruction. | |
28 // | 134 // |
29 // The layout of these instructions is as follows, starting with a byte with | 135 // The layout of these instructions is as follows, starting with a byte with |
30 // value 0xC4: | 136 // value 0xC4: |
137 // - Opcode indicating that this is a 3-byte VEX instruction: | |
138 // +---+---+---+---+---+---+---+---+ | |
139 // | 1 1 0 0 0 1 0 0 | | |
140 // +---+---+---+---+---+---+---+---+ | |
31 // - First byte: | 141 // - First byte: |
32 // +---+---+---+---+---+---+---+---+ | 142 // +---+---+---+---+---+---+---+---+ |
33 // | 1 1 0 0 0 1 0 0 | | 143 // |~R |~X |~B | map_select | |
34 // +---+---+---+---+---+---+---+---+ | 144 // +---+---+---+---+---+---+---+---+ |
35 // - Second byte: | 145 // - Second byte: |
36 // +---+---+---+---+---+---+---+---+ | 146 // +---+---+---+---+---+---+---+---+ |
37 // |~R |~X |~B | map_select | | |
38 // +---+---+---+---+---+---+---+---+ | |
39 // - Third byte: | |
40 // +---+---+---+---+---+---+---+---+ | |
41 // |W/E| ~vvvv | L | pp | | 147 // |W/E| ~vvvv | L | pp | |
42 // +---+---+---+---+---+---+---+---+ | 148 // +---+---+---+---+---+---+---+---+ |
43 // - Fourth byte: The opcode for this instruction. | 149 // - Third byte: The opcode for this instruction. |
44 // | 150 // |
45 // |map_select| Indicates the opcode map that should be used for this | 151 // If this instructions takes some operands then it's followed by a ModR/M byte |
46 // instruction. | 152 // and some optional bytes to represent the operand. We don't represent these |
47 // | 153 // optional bytes here. |
48 // See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_p refix | 154 // |
155 // See | |
156 // http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefi x | |
49 // for more details. | 157 // for more details. |
158 struct ThreeBytesVexInstruction { | |
159 explicit ThreeBytesVexInstruction(const uint8_t* data) { | |
160 DCHECK_NE(nullptr, data); | |
161 CHECK_EQ(kThreeByteVexOpcode, data[0]); | |
162 first_byte = data[1]; | |
163 second_byte = data[2]; | |
164 opcode = data[3]; | |
165 } | |
166 | |
167 // Check if this instruction match the expectations that we have for it. | |
168 // | |
169 // It compares the value of several fields that can have an impact on the | |
170 // instruction size and make sure that they have the expected value. | |
171 // | |
172 // @param expected_rxb The expected value for |rxb|. | |
173 // @param expected_we The expected value for |we|. | |
174 // @param expected_l The expected value for |l|. | |
175 // @param expected_pp The expected value for |pp|. | |
176 // @returns true if all the expectations are met, false otherwise. | |
177 bool MatchExpectations(uint8_t expected_rxb, | |
178 uint8_t expected_we, | |
179 uint8_t expected_l, | |
180 uint8_t expected_pp, | |
181 const char* instruction); | |
182 | |
183 // First byte, contains the RXB value and map_select. | |
184 union { | |
185 uint8_t first_byte; | |
186 struct { | |
187 uint8_t map_select : 5; | |
188 uint8_t rxb : 3; | |
huangs
2017/04/28 21:58:04
|rxb| is inverted, and should be labelled as such,
Sébastien Marchand
2017/05/01 16:04:31
Done.
| |
189 }; | |
190 }; | |
191 // Second byte, contains the W/E, ~vvvv, L and pp values. | |
192 union { | |
193 uint8_t second_byte; | |
194 struct { | |
195 uint8_t pp : 2; | |
196 uint8_t l : 1; | |
197 uint8_t vvvv : 4; | |
huangs
2017/04/28 21:58:04
|vvvv| is inverted, and should be labelled as such
Sébastien Marchand
2017/05/01 16:04:32
Done.
| |
198 uint8_t w_e : 1; | |
199 }; | |
200 }; | |
201 | |
202 // Opcode of this instruction. | |
203 uint8_t opcode; | |
204 }; | |
205 | |
206 // Check if |value| is equal to |expected| value and log verbosely if it's not | |
207 // the case. | |
208 bool CheckField(uint8_t expected_value, | |
209 uint8_t value, | |
210 const char* field_name, | |
211 const char* instruction) { | |
212 if (expected_value != value) { | |
213 LOG(ERROR) << "Unexpected " << field_name << " value for the " | |
214 << instruction << " instruction, expecting 0x" << std::hex | |
215 << static_cast<size_t>(expected_value) << " but got 0x" | |
216 << static_cast<size_t>(value) << "."; | |
huangs
2017/04/28 21:58:04
Add
<< std::dec
at end to restore LOG(ERROR)'s s
Sébastien Marchand
2017/05/01 16:04:31
Done.
| |
217 return false; | |
218 } | |
219 return true; | |
220 } | |
221 | |
222 bool ThreeBytesVexInstruction::MatchExpectations(uint8_t expected_rxb, | |
223 uint8_t expected_we, | |
224 uint8_t expected_l, | |
225 uint8_t expected_pp, | |
226 const char* instruction) { | |
227 if (!CheckField(expected_rxb, rxb, "rxb", instruction)) | |
228 return false; | |
229 if (!CheckField(expected_we, w_e, "we", instruction)) | |
230 return false; | |
231 if (!CheckField(expected_l, l, "l", instruction)) | |
232 return false; | |
233 if (!CheckField(expected_pp, pp, "pp", instruction)) | |
234 return false; | |
235 return true; | |
huangs
2017/04/28 21:58:04
Shorter to do
return CheckField(...) &&
CheckFi
Sébastien Marchand
2017/05/01 16:04:31
Splitting this into multiple checks make it easier
| |
236 } | |
237 | |
238 // Return the size of a 3-byte VEX encoded instruction. | |
239 // | |
240 // NOTE: We only support the instructions that have been encountered in Chrome | |
241 // and there's some restrictions on which variants of these instructions are | |
242 // supported. | |
50 size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) { | 243 size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) { |
51 DCHECK_EQ(0xC4, ci->code[0]); | 244 // A 3-byte VEX instructions has always a size of 5 bytes or more (the C4 |
52 // Switch case based on the opcode map used by this instruction. | 245 // constant, the 3 VEX bytes and the opcode). |
53 switch (ci->code[1] & 0x1F) { | 246 DCHECK_GE(ci->codeLen, 5); |
247 | |
248 ThreeBytesVexInstruction instruction(ci->code); | |
249 ModRMByte modRMByte(ci->code[4]); | |
250 | |
251 const size_t kBaseSize = 4; | |
252 size_t operand_size = 0; | |
253 size_t constants_size = 0; | |
254 | |
255 // Switch case based on the opcode mp used by this instruction. | |
256 switch (instruction.map_select) { | |
54 case 0x02: { | 257 case 0x02: { |
55 switch (ci->code[3]) { | 258 switch (instruction.opcode) { |
56 case 0x13: return 5; // vcvtps2ps | 259 case 0x13: // vcvtph2ps |
57 case 0x18: return 5; // vbroadcastss | 260 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtph2ps")) |
58 case 0x36: return 5; // vpermd | 261 operand_size = GetModRMOperandBytesSize(modRMByte, false); |
59 case 0x58: return 6; // vpbroadcastd | 262 break; |
60 case 0x5A: return 6; // vbroadcasti128 | 263 case 0x18: // vbroadcastss |
61 case 0x78: return 5; // vpbroadcastb | 264 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcastss")) |
62 case 0x8C: return 5; // vpmaskmovd | 265 operand_size = GetModRMOperandBytesSize(modRMByte, false); |
63 case 0x8E: return 5; // vpmaskmovd | 266 break; |
64 case 0x90: return 6; // vpgatherdd | 267 case 0x36: // vpermd |
268 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpermd")) | |
269 operand_size = GetModRMOperandBytesSize(modRMByte, false); | |
270 break; | |
271 case 0x5A: // vbroadcasti128 | |
272 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcasti128")) { | |
huangs
2017/04/28 21:58:04
NIT: Inconsistent of {} for these if's.
Sébastien Marchand
2017/05/01 16:04:31
Done.
| |
273 operand_size = GetModRMOperandBytesSize(modRMByte, true); | |
274 } | |
275 break; | |
276 case 0x58: // vpbroadcastb | |
277 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpbroadcastb")) | |
huangs
2017/04/28 21:58:04
0x58 Should be vpbroadcastd (last letter).
Sébastien Marchand
2017/05/01 16:04:32
Done, thanks for spotting this!
| |
278 operand_size = GetModRMOperandBytesSize(modRMByte, false); | |
279 break; | |
280 case 0x78: // vpbroadcastd | |
huangs
2017/04/28 21:58:04
0x78 Should be vpbroadcastb (last letter).
Also,
Sébastien Marchand
2017/05/01 16:04:31
Done.
| |
281 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vpbroadcastd")) | |
282 operand_size = GetModRMOperandBytesSize(modRMByte, false); | |
283 break; | |
284 case 0x8C: // vpmaskmovd | |
285 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpmaskmovd")) { | |
286 operand_size = GetModRMOperandBytesSize(modRMByte, true); | |
287 } | |
288 break; | |
289 case 0x90: // vpgatherdd | |
huangs
2017/04/28 21:58:04
case 0x8E disappeared? This is another vpmaskmovd
Sébastien Marchand
2017/05/01 16:04:31
Yeah, it wasn't properly unittested and I don't se
| |
290 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpgatherdd")) { | |
291 operand_size = GetModRMOperandBytesSize(modRMByte, true); | |
292 } | |
293 break; | |
65 default: | 294 default: |
66 break; | 295 break; |
67 } | 296 } |
68 break; | 297 break; |
69 } | 298 } |
70 case 0x03: { | 299 case 0x03: { |
71 switch (ci->code[3]) { | 300 switch (instruction.opcode) { |
72 case 0x00: return 6; // vpermq | 301 case 0x38: // vinserti128 |
huangs
2017/04/28 21:58:04
Sort?
Sébastien Marchand
2017/05/01 16:04:31
Done.
| |
73 case 0x1D: return 6; // vcvtps2ph | 302 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vinserti128")) { |
74 case 0x38: return 7; // vinserti128 | 303 operand_size = GetModRMOperandBytesSize(modRMByte, false); |
75 case 0x39: return 6; // vextracti128 | 304 constants_size = 1; |
305 } | |
306 break; | |
307 case 0x00: // vpermq | |
308 if (instruction.MatchExpectations(0b111, 1, 1, 1, "vpermq")) { | |
309 operand_size = GetModRMOperandBytesSize(modRMByte, false); | |
310 constants_size = 1; | |
311 } | |
312 break; | |
313 case 0x1D: // vcvtps2ph | |
314 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtps2ph")) { | |
315 operand_size = GetModRMOperandBytesSize(modRMByte, false); | |
316 constants_size = 1; | |
317 } | |
318 break; | |
319 case 0x39: // vextracti128 | |
320 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vextracti128")) | |
321 operand_size = GetModRMOperandBytesSize(modRMByte, false); | |
76 default: break; | 322 default: break; |
77 } | 323 } |
78 break; | 324 break; |
79 } | 325 } |
80 default: | 326 default: |
81 break; | 327 break; |
82 } | 328 } |
83 | 329 |
330 if (operand_size != 0) | |
331 return kBaseSize + operand_size + constants_size; | |
332 | |
84 // Print the instructions that we haven't been able to decompose in a format | 333 // Print the instructions that we haven't been able to decompose in a format |
85 // that can easily be pasted into ODA (https://onlinedisassembler.com/). | 334 // that can easily be pasted into ODA (https://onlinedisassembler.com/). |
86 const int kMaxBytes = 10; | 335 const int kMaxBytes = 10; |
87 size_t byte_count = std::min(ci->codeLen, kMaxBytes); | 336 size_t byte_count = std::min(ci->codeLen, kMaxBytes); |
88 std::string instruction_bytes; | 337 std::string instruction_bytes; |
89 for (size_t i = 0; i < byte_count; ++i) { | 338 for (size_t i = 0; i < byte_count; ++i) { |
90 base::StringAppendF(&instruction_bytes, "%02X", ci->code[i]); | 339 base::StringAppendF(&instruction_bytes, "%02X", ci->code[i]); |
91 if (i != byte_count - 1) | 340 if (i != byte_count - 1) |
92 instruction_bytes += " "; | 341 instruction_bytes += " "; |
93 } | 342 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
143 CHECK_EQ(O_NONE, result->ops[3].type); | 392 CHECK_EQ(O_NONE, result->ops[3].type); |
144 | 393 |
145 --result->addr; | 394 --result->addr; |
146 ++result->size; | 395 ++result->size; |
147 | 396 |
148 *used_instructions_count = 1; | 397 *used_instructions_count = 1; |
149 *ret = DECRES_SUCCESS; | 398 *ret = DECRES_SUCCESS; |
150 | 399 |
151 return true; | 400 return true; |
152 } | 401 } |
402 } else if (ci->code[0] == kThreeByteVexOpcode) { | |
403 size = Get3ByteVexEncodedInstructionSize(ci); | |
153 } | 404 } |
154 | 405 |
155 if (ci->code[0] == 0xC4) | |
156 size = Get3ByteVexEncodedInstructionSize(ci); | |
157 | |
158 if (size == 0) | 406 if (size == 0) |
159 return false; | 407 return false; |
160 | 408 |
161 // We set the bare minimum properties that are required for any | 409 // We set the bare minimum properties that are required for any |
162 // subsequent processing that we perform. | 410 // subsequent processing that we perform. |
163 | 411 |
164 *used_instructions_count = 1; | 412 *used_instructions_count = 1; |
165 | 413 |
166 ::memset(result, 0, sizeof(result[0])); | 414 ::memset(result, 0, sizeof(result[0])); |
167 result[0].addr = ci->codeOffset; | 415 result[0].addr = ci->codeOffset; |
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
444 | 692 |
445 default: return assm::kRegisterNone; | 693 default: return assm::kRegisterNone; |
446 } | 694 } |
447 } | 695 } |
448 | 696 |
449 const Register& GetRegister(uint32_t distorm_reg_type) { | 697 const Register& GetRegister(uint32_t distorm_reg_type) { |
450 return Register::Get(GetRegisterId(distorm_reg_type)); | 698 return Register::Get(GetRegisterId(distorm_reg_type)); |
451 } | 699 } |
452 | 700 |
453 } // namespace core | 701 } // namespace core |
OLD | NEW |