| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "x86_decode.h" | |
| 6 | |
| 7 namespace playground { | |
| 8 | |
| 9 #if defined(__x86_64__) || defined(__i386__) | |
| 10 unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix, | |
| 11 char **rex_ptr, char **mod_rm_ptr, char **sib_ptr, | |
| 12 bool *is_group) { | |
| 13 enum { | |
| 14 BYTE_OP = (1<<1), // 0x02 | |
| 15 IMM = (1<<2), // 0x04 | |
| 16 IMM_BYTE = (2<<2), // 0x08 | |
| 17 MEM_ABS = (3<<2), // 0x0C | |
| 18 MODE_MASK = (7<<2), // 0x1C | |
| 19 MOD_RM = (1<<5), // 0x20 | |
| 20 STACK = (1<<6), // 0x40 | |
| 21 GROUP = (1<<7), // 0x80 | |
| 22 GROUP_MASK = 0x7F, | |
| 23 }; | |
| 24 | |
| 25 static unsigned char opcode_types[512] = { | |
| 26 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x00 - 0x07 | |
| 27 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x00, // 0x08 - 0x0F | |
| 28 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x10 - 0x17 | |
| 29 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x18 - 0x1F | |
| 30 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x20 - 0x27 | |
| 31 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x28 - 0x2F | |
| 32 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x30 - 0x37 | |
| 33 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x38 - 0x3F | |
| 34 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40 - 0x47 | |
| 35 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48 - 0x4F | |
| 36 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x50 - 0x57 | |
| 37 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x58 - 0x5F | |
| 38 0x01, 0x01, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0x60 - 0x67 | |
| 39 0x45, 0x25, 0x49, 0x29, 0x03, 0x01, 0x03, 0x01, // 0x68 - 0x6F | |
| 40 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x70 - 0x77 | |
| 41 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x78 - 0x7F | |
| 42 0x27, 0x25, 0x27, 0x29, 0x23, 0x21, 0x23, 0x21, // 0x80 - 0x87 | |
| 43 0x23, 0x21, 0x23, 0x21, 0x21, 0x21, 0x21, 0x80, // 0x88 - 0x8F | |
| 44 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90 - 0x97 | |
| 45 0x01, 0x01, 0x05, 0x01, 0x41, 0x41, 0x01, 0x01, // 0x98 - 0x9F | |
| 46 0x0F, 0x0D, 0x0F, 0x0D, 0x03, 0x01, 0x03, 0x01, // 0xA0 - 0xA7 | |
| 47 0x09, 0x05, 0x03, 0x01, 0x03, 0x01, 0x03, 0x01, // 0xA8 - 0xAF | |
| 48 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // 0xB0 - 0xB7 | |
| 49 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0xB8 - 0xBF | |
| 50 0x27, 0x29, 0x01, 0x01, 0x21, 0x21, 0x27, 0x25, // 0xC0 - 0xC7 | |
| 51 0x01, 0x01, 0x01, 0x01, 0x01, 0x09, 0x01, 0x01, // 0xC8 - 0xCF | |
| 52 0x23, 0x21, 0x23, 0x21, 0x09, 0x09, 0x01, 0x01, // 0xD0 - 0xD7 | |
| 53 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xD8 - 0xDF | |
| 54 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0xE0 - 0xE7 | |
| 55 0x05, 0x05, 0x05, 0x09, 0x03, 0x01, 0x03, 0x01, // 0xE8 - 0xEF | |
| 56 0x00, 0x01, 0x00, 0x00, 0x01, 0x01, 0x88, 0x90, // 0xF0 - 0xF7 | |
| 57 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x98, 0xA0, // 0xF8 - 0xFF | |
| 58 0x00, 0xA8, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, // 0xF00 - 0xF07 | |
| 59 0x01, 0x01, 0x00, 0x01, 0x00, 0x21, 0x01, 0x00, // 0xF08 - 0xF0F | |
| 60 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF10 - 0xF17 | |
| 61 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF18 - 0xF1F | |
| 62 0x21, 0x21, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0xF20 - 0xF27 | |
| 63 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF28 - 0xF2F | |
| 64 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, // 0xF30 - 0xF37 | |
| 65 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF38 - 0xF3F | |
| 66 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF40 - 0xF47 | |
| 67 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF48 - 0xF4F | |
| 68 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF50 - 0xF57 | |
| 69 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF58 - 0xF5F | |
| 70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF60 - 0xF67 | |
| 71 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF68 - 0xF6F | |
| 72 0x21, 0x00, 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, // 0xF70 - 0xF77 | |
| 73 0x21, 0x21, 0x00, 0x00, 0x21, 0x21, 0x21, 0x21, // 0xF78 - 0xF7F | |
| 74 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF80 - 0xF87 | |
| 75 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF88 - 0xF8F | |
| 76 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF90 - 0xF97 | |
| 77 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF98 - 0xF9F | |
| 78 0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x00, 0x00, // 0xFA0 - 0xFA7 | |
| 79 0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x21, 0x21, // 0xFA8 - 0xFAF | |
| 80 0x23, 0x21, 0x00, 0x21, 0x00, 0x00, 0x23, 0x21, // 0xFB0 - 0xFB7 | |
| 81 0x21, 0x00, 0x29, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFB8 - 0xFBF | |
| 82 0x21, 0x21, 0x00, 0x21, 0x00, 0x00, 0x00, 0x21, // 0xFC0 - 0xFC7 | |
| 83 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xFC8 - 0xFCF | |
| 84 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD0 - 0xFD7 | |
| 85 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD8 - 0xFDF | |
| 86 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE0 - 0xFE7 | |
| 87 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE8 - 0xFEF | |
| 88 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF0 - 0xFF7 | |
| 89 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF8 - 0xFFF | |
| 90 }; | |
| 91 | |
| 92 static unsigned char group_table[56] = { | |
| 93 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 1A | |
| 94 0x27, 0x27, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, // Group 3 (Byte) | |
| 95 0x25, 0x25, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // Group 3 | |
| 96 0x23, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 4 | |
| 97 0x21, 0x21, 0x61, 0x21, 0x61, 0x21, 0x61, 0x00, // Group 5 | |
| 98 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, 0x21, 0x23, // Group 7 | |
| 99 0x21, 0x00, 0x00, 0x21, 0x21, 0x00, 0x21, 0x00, // Group 7 (Alternate) | |
| 100 }; | |
| 101 | |
| 102 const unsigned char *insn_ptr = reinterpret_cast<const unsigned char *>(*ip); | |
| 103 int operand_width = 4; | |
| 104 int address_width = 4; | |
| 105 if (is64bit) { | |
| 106 address_width = 8; | |
| 107 } | |
| 108 unsigned char byte, rex = 0; | |
| 109 bool found_prefix = false; | |
| 110 if (rex_ptr) { | |
| 111 *rex_ptr = 0; | |
| 112 } | |
| 113 if (mod_rm_ptr) { | |
| 114 *mod_rm_ptr = 0; | |
| 115 } | |
| 116 if (sib_ptr) { | |
| 117 *sib_ptr = 0; | |
| 118 } | |
| 119 for (;; ++insn_ptr) { | |
| 120 switch (byte = *insn_ptr) { | |
| 121 case 0x66: // Operand width prefix | |
| 122 operand_width ^= 6; | |
| 123 break; | |
| 124 case 0x67: // Address width prefix | |
| 125 address_width ^= is64bit ? 12 : 6; | |
| 126 break; | |
| 127 case 0x26: // Segment selector prefixes | |
| 128 case 0x2e: | |
| 129 case 0x36: | |
| 130 case 0x3e: | |
| 131 case 0x64: | |
| 132 case 0x65: | |
| 133 case 0xF0: | |
| 134 case 0xF2: | |
| 135 case 0xF3: | |
| 136 break; | |
| 137 case 0x40: case 0x41: case 0x42: case 0x43: // 64 bit REX prefixes | |
| 138 case 0x44: case 0x45: case 0x46: case 0x47: | |
| 139 case 0x48: case 0x49: case 0x4A: case 0x4B: | |
| 140 case 0x4C: case 0x4D: case 0x4E: case 0x4F: | |
| 141 if (is64bit) { | |
| 142 if (rex_ptr) { | |
| 143 *rex_ptr = (char *)insn_ptr; | |
| 144 } | |
| 145 rex = byte; | |
| 146 found_prefix = true; | |
| 147 continue; | |
| 148 } | |
| 149 // fall through | |
| 150 default: | |
| 151 ++insn_ptr; | |
| 152 goto no_more_prefixes; | |
| 153 } | |
| 154 rex = 0; | |
| 155 found_prefix = true; | |
| 156 } | |
| 157 no_more_prefixes: | |
| 158 if (has_prefix) { | |
| 159 *has_prefix = found_prefix; | |
| 160 } | |
| 161 if (rex & REX_W) { | |
| 162 operand_width = 8; | |
| 163 } | |
| 164 unsigned char type; | |
| 165 unsigned short insn = byte; | |
| 166 unsigned int idx = 0; | |
| 167 if (byte == 0x0F) { | |
| 168 byte = *insn_ptr++; | |
| 169 insn = (insn << 8) | byte; | |
| 170 idx = 256; | |
| 171 } | |
| 172 type = opcode_types[idx + byte]; | |
| 173 bool found_mod_rm = false; | |
| 174 bool found_group = false; | |
| 175 bool found_sib = false; | |
| 176 unsigned char mod_rm = 0; | |
| 177 unsigned char sib = 0; | |
| 178 if (type & GROUP) { | |
| 179 found_mod_rm = true; | |
| 180 found_group = true; | |
| 181 mod_rm = *insn_ptr; | |
| 182 if (mod_rm_ptr) { | |
| 183 *mod_rm_ptr = (char *)insn_ptr; | |
| 184 } | |
| 185 unsigned char group = (type & GROUP_MASK) + ((mod_rm >> 3) & 0x7); | |
| 186 if ((type & GROUP_MASK) == 40 && (mod_rm >> 6) == 3) { | |
| 187 group += 8; | |
| 188 } | |
| 189 type = group_table[group]; | |
| 190 } | |
| 191 if (!type) { | |
| 192 // We know that we still don't decode some of the more obscure | |
| 193 // instructions, but for all practical purposes that doesn't matter. | |
| 194 // Compilers are unlikely to output them, and even if we encounter | |
| 195 // hand-coded assembly, we will soon synchronize to the instruction | |
| 196 // stream again. | |
| 197 // | |
| 198 // std::cerr << "Unsupported instruction at 0x" << std::hex << | |
| 199 // std::uppercase << reinterpret_cast<long>(*ip) << " [ "; | |
| 200 // for (const unsigned char *ptr = | |
| 201 // reinterpret_cast<const unsigned char *>(*ip); | |
| 202 // ptr < insn_ptr; ) { | |
| 203 // std::cerr << std::hex << std::uppercase << std::setw(2) << | |
| 204 // std::setfill('0') << (unsigned int)*ptr++ << ' '; | |
| 205 // } | |
| 206 // std::cerr << "]" << std::endl; | |
| 207 } else { | |
| 208 if (is64bit && (type & STACK)) { | |
| 209 operand_width = 8; | |
| 210 } | |
| 211 if (type & MOD_RM) { | |
| 212 found_mod_rm = true; | |
| 213 if (mod_rm_ptr) { | |
| 214 *mod_rm_ptr = (char *)insn_ptr; | |
| 215 } | |
| 216 mod_rm = *insn_ptr++; | |
| 217 int mod = (mod_rm >> 6) & 0x3; | |
| 218 int rm = 8*(rex & REX_B) + (mod_rm & 0x7); | |
| 219 if (mod != 3) { | |
| 220 if (address_width == 2) { | |
| 221 switch (mod) { | |
| 222 case 0: | |
| 223 if (rm != 6 /* SI */) { | |
| 224 break; | |
| 225 } | |
| 226 // fall through | |
| 227 case 2: | |
| 228 insn_ptr++; | |
| 229 // fall through | |
| 230 case 1: | |
| 231 insn_ptr++; | |
| 232 break; | |
| 233 } | |
| 234 } else { | |
| 235 if ((rm & 0x7) == 4) { | |
| 236 found_sib = true; | |
| 237 if (sib_ptr) { | |
| 238 *sib_ptr = (char *)insn_ptr; | |
| 239 } | |
| 240 sib = *insn_ptr++; | |
| 241 if (!mod && (sib & 0x7) == 5 /* BP */) { | |
| 242 insn_ptr += 4; | |
| 243 } | |
| 244 } | |
| 245 switch (mod) { | |
| 246 case 0: | |
| 247 if (rm != 5 /* BP */) { | |
| 248 break; | |
| 249 } | |
| 250 // fall through | |
| 251 case 2: | |
| 252 insn_ptr += 3; | |
| 253 // fall through | |
| 254 case 1: | |
| 255 insn_ptr++; | |
| 256 break; | |
| 257 } | |
| 258 } | |
| 259 } | |
| 260 } | |
| 261 switch (insn) { | |
| 262 case 0xC8: // ENTER | |
| 263 insn_ptr++; | |
| 264 // fall through | |
| 265 case 0x9A: // CALL (far) | |
| 266 case 0xC2: // RET (near) | |
| 267 case 0xCA: // LRET | |
| 268 case 0xEA: // JMP (far) | |
| 269 insn_ptr += 2; | |
| 270 break; | |
| 271 case 0xF80: case 0xF81: case 0xF82: case 0xF83: // Jcc (rel) | |
| 272 case 0xF84: case 0xF85: case 0xF86: case 0xF87: | |
| 273 case 0xF88: case 0xF89: case 0xF8A: case 0xF8B: | |
| 274 case 0xF8C: case 0xF8D: case 0xF8E: case 0xF8F: | |
| 275 insn_ptr += operand_width; | |
| 276 break; | |
| 277 } | |
| 278 switch (type & MODE_MASK) { | |
| 279 case IMM: | |
| 280 if (!(type & BYTE_OP)) { | |
| 281 switch (insn) { | |
| 282 case 0xB8: case 0xB9: case 0xBA: case 0xBB: | |
| 283 case 0xBC: case 0xBD: case 0xBE: case 0xBF: | |
| 284 // Allow MOV to/from 64bit addresses | |
| 285 insn_ptr += operand_width; | |
| 286 break; | |
| 287 default: | |
| 288 insn_ptr += (operand_width == 8) ? 4 : operand_width; | |
| 289 break; | |
| 290 } | |
| 291 break; | |
| 292 } | |
| 293 // fall through | |
| 294 case IMM_BYTE: | |
| 295 insn_ptr++; | |
| 296 break; | |
| 297 case MEM_ABS: | |
| 298 insn_ptr += address_width; | |
| 299 break; | |
| 300 } | |
| 301 } | |
| 302 if (is_group) { | |
| 303 *is_group = found_group; | |
| 304 } | |
| 305 *ip = reinterpret_cast<const char *>(insn_ptr); | |
| 306 return insn; | |
| 307 } | |
| 308 #endif | |
| 309 | |
| 310 } // namespace | |
| OLD | NEW |