Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 Google Inc. All Rights Reserved. | 1 // Copyright 2012 Google Inc. All Rights Reserved. |
| 2 // | 2 // |
| 3 // Licensed under the Apache License, Version 2.0 (the "License"); | 3 // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 // you may not use this file except in compliance with the License. | 4 // you may not use this file except in compliance with the License. |
| 5 // You may obtain a copy of the License at | 5 // You may obtain a copy of the License at |
| 6 // | 6 // |
| 7 // http://www.apache.org/licenses/LICENSE-2.0 | 7 // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 // | 8 // |
| 9 // Unless required by applicable law or agreed to in writing, software | 9 // Unless required by applicable law or agreed to in writing, software |
| 10 // distributed under the License is distributed on an "AS IS" BASIS, | 10 // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 // See the License for the specific language governing permissions and | 12 // See the License for the specific language governing permissions and |
| 13 // limitations under the License. | 13 // limitations under the License. |
| 14 | 14 |
| 15 #include "syzygy/core/disassembler_util.h" | 15 #include "syzygy/core/disassembler_util.h" |
| 16 | 16 |
| 17 #include <algorithm> | 17 #include <algorithm> |
| 18 | 18 |
| 19 #include "base/logging.h" | 19 #include "base/logging.h" |
| 20 #include "base/strings/stringprintf.h" | 20 #include "base/strings/stringprintf.h" |
| 21 #include "mnemonics.h" // NOLINT | 21 #include "mnemonics.h" // NOLINT |
| 22 | 22 |
| 23 namespace core { | 23 namespace core { |
| 24 | 24 |
| 25 namespace { | 25 namespace { |
| 26 | 26 |
| 27 // Return the size of a 3-byte VEX encoded instruction. | 27 // Opcode of the 3-byte VEX instructions. |
| 28 const uint8_t kThreeByteVexOpcode = 0xC4; | |
| 29 | |
| 30 // Structure representing a Mod R/M byte, it has the following format: | |
| 31 // +---+---+---+---+---+---+---+---+ | |
| 32 // | mod |reg/opcode | r/m | | |
| 33 // +---+---+---+---+---+---+---+---+ | |
| 34 // | |
| 35 // Here's a description of the different fields (from | |
| 36 // https://en.wikipedia.org/wiki/VEX_prefix): | |
| 37 // - mod: combined with the r/m field, encodes either 8 registers or 24 | |
| 38 // addressing modes. Also encodes opcode information for some | |
| 39 // instructions. | |
| 40 // - reg/opcode: specifies either a register or three more bits of | |
| 41 // opcode information, as specified in the primary opcode byte. | |
| 42 // - r/m: can specify a register as an operand, or combine with the mod | |
| 43 // field to encode an addressing mode. | |
| 44 // | |
| 45 // The |mod| field can have the following values: | |
| 46 // - 0b00: Register indirect addressing mode or SIB with no displacement | |
| 47 // (if r/m = 0b100) or displacement only addressing mode (if r/m = 0b101). | |
| 48 // - 0b01: One-byte signed displacement follows addressing mode byte(s). | |
| 49 // - 0b10: Four-byte signed displacement follows addressing mode byte(s). | |
| 50 // - 0b11: Register addressing mode. | |
| 51 struct ModRMByte { | |
| 52 // Constructor. | |
| 53 // @param value The Value used to initialize this Mod R/M byte. | |
| 54 explicit ModRMByte(uint8_t value) : raw_value(value) {} | |
| 55 | |
| 56 union { | |
| 57 uint8_t raw_value; | |
| 58 struct { | |
| 59 uint8_t r_m : 3; | |
| 60 uint8_t reg_or_opcode : 3; | |
| 61 uint8_t mod : 2; | |
| 62 }; | |
| 63 }; | |
| 64 }; | |
| 65 | |
| 66 // Calculates the number of bytes used to encode a Mod R/M operand. | |
| 67 // @param ci The code information for this instruction. | |
| 68 // @param has_register_addressing_mode Indicates if the instruction supports | |
| 69 // the register addressing mode (value of |mod| of 0b11). | |
| 70 // @returns the total size of this Mod R/M operand (in bytes), 0 on failure. | |
| 71 size_t GetModRMOperandBytesSize(const _CodeInfo* ci, | |
| 72 bool has_register_addressing_mode) { | |
| 73 DCHECK_GE(ci->codeLen, 5); | |
| 74 | |
| 75 // If SIB (Scale*Index+Base) is specified then the operand uses an | |
| 76 // additional SIB byte. | |
| 77 const uint8_t kSIBValue = 0b100; | |
| 78 ModRMByte modRM_byte(ci->code[4]); | |
| 79 | |
| 80 switch (modRM_byte.mod) { | |
| 81 case 0b00: { | |
| 82 if (modRM_byte.r_m == kSIBValue) { | |
| 83 CHECK_GE(ci->codeLen, 6); | |
| 84 // The SIB byte has the following layout: | |
| 85 // +---+---+---+---+---+---+---+---+ | |
| 86 // | scale | index | base | | |
| 87 // +---+---+---+---+---+---+---+---+ | |
| 88 // | |
| 89 // If |base| = 5 then there's an additional 4-byte used to encode the | |
|
chrisha
2017/05/02 15:02:51
4 bytes*
Sébastien Marchand
2017/05/02 15:20:48
Done.
| |
| 90 // displacement, e.g.: | |
| 91 // vpbroadcastd ymm0, DWORD PTR [ebp+eax*8+0x76543210] | |
| 92 const uint8_t kSIBBaseMask = 0b111; | |
| 93 if ((ci->code[5] & kSIBBaseMask) == 5) | |
| 94 return 6; | |
| 95 // If |base| != 5 then there's just the SIB byte, e.g.: | |
| 96 // vpbroadcastd ymm0, DWORD PTR [ecx+edx*1] | |
| 97 return 2; | |
| 98 } | |
| 99 if (modRM_byte.r_m == 0b101) { | |
| 100 // Displacement only addressing mode, e.g.: | |
| 101 // vpbroadcastb xmm2, BYTE PTR ds:0x12345678 | |
| 102 return 5; | |
| 103 } | |
| 104 // Register indirect addressing mode, e.g.: | |
| 105 // vpbroadcastb xmm2, BYTE PTR [eax] | |
| 106 return 1; | |
| 107 } | |
| 108 case 0b01: { | |
| 109 // One-byte displacement. | |
| 110 if (modRM_byte.r_m == kSIBValue) { | |
| 111 // Additional SIB byte, e.g.: | |
| 112 // vpbroadcastb xmm2, BYTE PTR [eax+edx*1+0x42] | |
| 113 return 3; | |
| 114 } | |
| 115 // No SIB byte, e.g.: | |
| 116 // vpbroadcastb xmm2, BYTE PTR [eax+0x42] | |
| 117 return 2; | |
| 118 } | |
| 119 case 0b10: { | |
| 120 // One-byte displacement. | |
| 121 if (modRM_byte.r_m == kSIBValue) { | |
| 122 // Additional SIB byte, e.g.: | |
| 123 // vpbroadcastb xmm0, BYTE PTR [edx+edx*1+0x12345678] | |
| 124 return 6; | |
| 125 } | |
| 126 // No SIB byte, e.g.: | |
| 127 // vpbroadcastb xmm0, BYTE PTR [eax+0x34567812] | |
| 128 return 5; | |
| 129 } | |
| 130 case 0b11: | |
| 131 // Register addressing mode, e.g.: | |
| 132 // vpbroadcastb xmm2, BYTE PTR [eax] | |
| 133 if (has_register_addressing_mode) | |
| 134 return 1; | |
| 135 LOG(ERROR) << "Unexpected |mod| value of 0b11 for an instruction that " | |
| 136 << "doesn't support it."; | |
| 137 return 0; | |
| 138 default: | |
| 139 NOTREACHED(); | |
| 140 } | |
| 141 | |
| 142 return 0; | |
| 143 } | |
| 144 | |
| 145 // Structure representing a 3-byte VEX encoded instruction. | |
| 28 // | 146 // |
| 29 // The layout of these instructions is as follows, starting with a byte with | 147 // The layout of these instructions is as follows, starting with a byte with |
| 30 // value 0xC4: | 148 // value 0xC4: |
| 149 // - Opcode indicating that this is a 3-byte VEX instruction: | |
| 150 // +---+---+---+---+---+---+---+---+ | |
| 151 // | 1 1 0 0 0 1 0 0 | | |
| 152 // +---+---+---+---+---+---+---+---+ | |
| 31 // - First byte: | 153 // - First byte: |
| 32 // +---+---+---+---+---+---+---+---+ | 154 // +---+---+---+---+---+---+---+---+ |
| 33 // | 1 1 0 0 0 1 0 0 | | 155 // |~R |~X |~B | map_select | |
| 34 // +---+---+---+---+---+---+---+---+ | 156 // +---+---+---+---+---+---+---+---+ |
| 35 // - Second byte: | 157 // - Second byte: |
| 36 // +---+---+---+---+---+---+---+---+ | 158 // +---+---+---+---+---+---+---+---+ |
| 37 // |~R |~X |~B | map_select | | |
| 38 // +---+---+---+---+---+---+---+---+ | |
| 39 // - Third byte: | |
| 40 // +---+---+---+---+---+---+---+---+ | |
| 41 // |W/E| ~vvvv | L | pp | | 159 // |W/E| ~vvvv | L | pp | |
| 42 // +---+---+---+---+---+---+---+---+ | 160 // +---+---+---+---+---+---+---+---+ |
| 43 // - Fourth byte: The opcode for this instruction. | 161 // - Third byte: The opcode for this instruction. |
| 44 // | 162 // |
| 45 // |map_select| Indicates the opcode map that should be used for this | 163 // If this instructions takes some operands then it's followed by a ModR/M byte |
| 46 // instruction. | 164 // and some optional bytes to represent the operand. We don't represent these |
| 47 // | 165 // optional bytes here. |
| 48 // See http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_p refix | 166 // |
| 167 // See | |
| 168 // http://wiki.osdev.org/X86-64_Instruction_Encoding#Three_byte_VEX_escape_prefi x | |
| 49 // for more details. | 169 // for more details. |
| 170 struct ThreeBytesVexInstruction { | |
| 171 explicit ThreeBytesVexInstruction(const uint8_t* data) { | |
| 172 DCHECK_NE(nullptr, data); | |
| 173 CHECK_EQ(kThreeByteVexOpcode, data[0]); | |
| 174 first_byte = data[1]; | |
| 175 second_byte = data[2]; | |
| 176 opcode = data[3]; | |
| 177 } | |
| 178 | |
| 179 // Checks if this instruction match the expectations that we have for it. | |
| 180 // | |
| 181 // It compares the value of several fields that can have an impact on the | |
| 182 // instruction size and make sure that they have the expected value. | |
| 183 // | |
| 184 // @param expected_inv_rxb The expected value for |inv_rxb|. | |
| 185 // @param expected_we The expected value for |we|. | |
| 186 // @param expected_l The expected value for |l|. | |
| 187 // @param expected_pp The expected value for |pp|. | |
| 188 // @returns true if all the expectations are met, false otherwise. | |
| 189 bool MatchExpectations(uint8_t expected_inv_rxb, | |
| 190 uint8_t expected_we, | |
| 191 uint8_t expected_l, | |
| 192 uint8_t expected_pp, | |
| 193 const char* instruction); | |
| 194 | |
| 195 // First byte, contains the RXB value and map_select. | |
| 196 union { | |
| 197 uint8_t first_byte; | |
| 198 struct { | |
| 199 uint8_t map_select : 5; | |
| 200 uint8_t inv_rxb : 3; | |
| 201 }; | |
| 202 }; | |
| 203 // Second byte, contains the W/E, ~vvvv, L and pp values. | |
| 204 union { | |
| 205 uint8_t second_byte; | |
| 206 struct { | |
| 207 uint8_t pp : 2; | |
| 208 uint8_t l : 1; | |
| 209 uint8_t inv_vvvv : 4; | |
| 210 uint8_t w_e : 1; | |
| 211 }; | |
| 212 }; | |
| 213 | |
| 214 // Opcode of this instruction. | |
| 215 uint8_t opcode; | |
| 216 }; | |
| 217 | |
| 218 // Checks if |value| is equal to |expected| value and log verbosely if it's not | |
| 219 // the case. | |
| 220 bool CheckField(uint8_t expected_value, | |
| 221 uint8_t value, | |
| 222 const char* field_name, | |
| 223 const char* instruction) { | |
| 224 if (expected_value != value) { | |
| 225 LOG(ERROR) << "Unexpected " << field_name << " value for the " | |
| 226 << instruction << " instruction, expecting 0x" << std::hex | |
| 227 << static_cast<size_t>(expected_value) << " but got 0x" | |
| 228 << static_cast<size_t>(value) << "." << std::dec; | |
| 229 return false; | |
| 230 } | |
| 231 return true; | |
| 232 } | |
| 233 | |
| 234 bool ThreeBytesVexInstruction::MatchExpectations(uint8_t expected_inv_rxb, | |
| 235 uint8_t expected_we, | |
| 236 uint8_t expected_l, | |
| 237 uint8_t expected_pp, | |
| 238 const char* instruction) { | |
| 239 if (!CheckField(expected_inv_rxb, inv_rxb, "inv_rxb", instruction)) | |
| 240 return false; | |
| 241 if (!CheckField(expected_we, w_e, "we", instruction)) | |
| 242 return false; | |
| 243 if (!CheckField(expected_l, l, "l", instruction)) | |
| 244 return false; | |
| 245 if (!CheckField(expected_pp, pp, "pp", instruction)) | |
| 246 return false; | |
| 247 return true; | |
| 248 } | |
| 249 | |
| 250 // Returns the size of a 3-byte VEX encoded instruction. | |
| 251 // | |
| 252 // NOTE: We only support the instructions that have been encountered in Chrome | |
| 253 // and there's some restrictions on which variants of these instructions are | |
| 254 // supported. | |
| 50 size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) { | 255 size_t Get3ByteVexEncodedInstructionSize(_CodeInfo* ci) { |
| 51 DCHECK_EQ(0xC4, ci->code[0]); | 256 // A 3-byte VEX instructions has always a size of 5 bytes or more (the C4 |
| 52 // Switch case based on the opcode map used by this instruction. | 257 // constant, the 3 VEX bytes and the mod R/M byte). |
| 53 switch (ci->code[1] & 0x1F) { | 258 DCHECK_GE(ci->codeLen, 5); |
| 259 | |
| 260 ThreeBytesVexInstruction instruction(ci->code); | |
| 261 | |
| 262 const size_t kBaseSize = 4; | |
| 263 size_t operand_size = 0; | |
| 264 size_t constants_size = 0; | |
| 265 | |
| 266 // Switch case based on the opcode used by this instruction. | |
| 267 switch (instruction.map_select) { | |
| 54 case 0x02: { | 268 case 0x02: { |
| 55 switch (ci->code[3]) { | 269 switch (instruction.opcode) { |
| 56 case 0x13: return 5; // vcvtps2ps | 270 case 0x13: // vcvtph2ps |
| 57 case 0x18: return 5; // vbroadcastss | 271 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtph2ps")) |
| 58 case 0x36: return 5; // vpermd | 272 operand_size = GetModRMOperandBytesSize(ci, true); |
| 59 case 0x58: return 6; // vpbroadcastd | 273 break; |
| 60 case 0x5A: return 6; // vbroadcasti128 | 274 case 0x18: // vbroadcastss |
| 61 case 0x78: return 5; // vpbroadcastb | 275 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcastss")) |
| 62 case 0x8C: return 5; // vpmaskmovd | 276 operand_size = GetModRMOperandBytesSize(ci, true); |
| 63 case 0x8E: return 5; // vpmaskmovd | 277 break; |
| 64 case 0x90: return 6; // vpgatherdd | 278 case 0x36: // vpermd |
| 279 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpermd")) | |
| 280 operand_size = GetModRMOperandBytesSize(ci, true); | |
| 281 break; | |
| 282 case 0x58: // vpbroadcastd | |
| 283 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpbroadcastd")) | |
| 284 operand_size = GetModRMOperandBytesSize(ci, true); | |
| 285 break; | |
| 286 case 0x5A: // vbroadcasti128 | |
| 287 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vbroadcasti128")) | |
| 288 operand_size = GetModRMOperandBytesSize(ci, false); | |
| 289 break; | |
| 290 case 0x78: // vpbroadcastb | |
| 291 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vpbroadcastb")) | |
| 292 operand_size = GetModRMOperandBytesSize(ci, true); | |
| 293 break; | |
| 294 case 0x8C: // vpmaskmovd | |
| 295 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpmaskmovd")) | |
| 296 operand_size = GetModRMOperandBytesSize(ci, false); | |
| 297 break; | |
| 298 case 0x90: // vpgatherdd | |
| 299 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vpgatherdd")) | |
| 300 operand_size = GetModRMOperandBytesSize(ci, false); | |
| 301 break; | |
| 65 default: | 302 default: |
| 66 break; | 303 break; |
| 67 } | 304 } |
| 68 break; | 305 break; |
| 69 } | 306 } |
| 70 case 0x03: { | 307 case 0x03: { |
| 71 switch (ci->code[3]) { | 308 switch (instruction.opcode) { |
| 72 case 0x00: return 6; // vpermq | 309 case 0x00: // vpermq |
| 73 case 0x1D: return 6; // vcvtps2ph | 310 if (instruction.MatchExpectations(0b111, 1, 1, 1, "vpermq")) { |
| 74 case 0x38: return 7; // vinserti128 | 311 operand_size = GetModRMOperandBytesSize(ci, true); |
| 75 case 0x39: return 6; // vextracti128 | 312 constants_size = 1; |
| 313 } | |
| 314 break; | |
| 315 case 0x1D: // vcvtps2ph | |
| 316 if (instruction.MatchExpectations(0b111, 0, 0, 1, "vcvtps2ph")) { | |
| 317 operand_size = GetModRMOperandBytesSize(ci, true); | |
| 318 constants_size = 1; | |
| 319 } | |
| 320 break; | |
| 321 case 0x38: // vinserti128 | |
| 322 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vinserti128")) { | |
| 323 operand_size = GetModRMOperandBytesSize(ci, true); | |
| 324 constants_size = 1; | |
| 325 } | |
| 326 break; | |
| 327 case 0x39: // vextracti128 | |
| 328 if (instruction.MatchExpectations(0b111, 0, 1, 1, "vextracti128")) { | |
| 329 operand_size = GetModRMOperandBytesSize(ci, true); | |
| 330 constants_size = 1; | |
| 331 } | |
| 76 default: break; | 332 default: break; |
| 77 } | 333 } |
| 78 break; | 334 break; |
| 79 } | 335 } |
| 80 default: | 336 default: |
| 81 break; | 337 break; |
| 82 } | 338 } |
| 83 | 339 |
| 340 if (operand_size != 0) | |
| 341 return kBaseSize + operand_size + constants_size; | |
| 342 | |
| 84 // Print the instructions that we haven't been able to decompose in a format | 343 // Print the instructions that we haven't been able to decompose in a format |
| 85 // that can easily be pasted into ODA (https://onlinedisassembler.com/). | 344 // that can easily be pasted into ODA (https://onlinedisassembler.com/). |
| 86 const int kMaxBytes = 10; | 345 const int kMaxBytes = 10; |
| 87 size_t byte_count = std::min(ci->codeLen, kMaxBytes); | 346 size_t byte_count = std::min(ci->codeLen, kMaxBytes); |
| 88 std::string instruction_bytes; | 347 std::string instruction_bytes; |
| 89 for (size_t i = 0; i < byte_count; ++i) { | 348 for (size_t i = 0; i < byte_count; ++i) { |
| 90 base::StringAppendF(&instruction_bytes, "%02X", ci->code[i]); | 349 base::StringAppendF(&instruction_bytes, "%02X", ci->code[i]); |
| 91 if (i != byte_count - 1) | 350 if (i != byte_count - 1) |
| 92 instruction_bytes += " "; | 351 instruction_bytes += " "; |
| 93 } | 352 } |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 143 CHECK_EQ(O_NONE, result->ops[3].type); | 402 CHECK_EQ(O_NONE, result->ops[3].type); |
| 144 | 403 |
| 145 --result->addr; | 404 --result->addr; |
| 146 ++result->size; | 405 ++result->size; |
| 147 | 406 |
| 148 *used_instructions_count = 1; | 407 *used_instructions_count = 1; |
| 149 *ret = DECRES_SUCCESS; | 408 *ret = DECRES_SUCCESS; |
| 150 | 409 |
| 151 return true; | 410 return true; |
| 152 } | 411 } |
| 412 } else if (ci->code[0] == kThreeByteVexOpcode) { | |
| 413 size = Get3ByteVexEncodedInstructionSize(ci); | |
| 153 } | 414 } |
| 154 | 415 |
| 155 if (ci->code[0] == 0xC4) | |
| 156 size = Get3ByteVexEncodedInstructionSize(ci); | |
| 157 | |
| 158 if (size == 0) | 416 if (size == 0) |
| 159 return false; | 417 return false; |
| 160 | 418 |
| 161 // We set the bare minimum properties that are required for any | 419 // We set the bare minimum properties that are required for any |
| 162 // subsequent processing that we perform. | 420 // subsequent processing that we perform. |
| 163 | 421 |
| 164 *used_instructions_count = 1; | 422 *used_instructions_count = 1; |
| 165 | 423 |
| 166 ::memset(result, 0, sizeof(result[0])); | 424 ::memset(result, 0, sizeof(result[0])); |
| 167 result[0].addr = ci->codeOffset; | 425 result[0].addr = ci->codeOffset; |
| (...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 444 | 702 |
| 445 default: return assm::kRegisterNone; | 703 default: return assm::kRegisterNone; |
| 446 } | 704 } |
| 447 } | 705 } |
| 448 | 706 |
| 449 const Register& GetRegister(uint32_t distorm_reg_type) { | 707 const Register& GetRegister(uint32_t distorm_reg_type) { |
| 450 return Register::Get(GetRegisterId(distorm_reg_type)); | 708 return Register::Get(GetRegisterId(distorm_reg_type)); |
| 451 } | 709 } |
| 452 | 710 |
| 453 } // namespace core | 711 } // namespace core |
| OLD | NEW |