OLD | NEW |
(Empty) | |
| 1 /* udis86 - libudis86/decode.c |
| 2 * |
| 3 * Copyright (c) 2002-2009 Vivek Thampi |
| 4 * All rights reserved. |
| 5 * |
| 6 * Redistribution and use in source and binary forms, with or without modificati
on, |
| 7 * are permitted provided that the following conditions are met: |
| 8 * |
| 9 * * Redistributions of source code must retain the above copyright notice, |
| 10 * this list of conditions and the following disclaimer. |
| 11 * * Redistributions in binary form must reproduce the above copyright notic
e, |
| 12 * this list of conditions and the following disclaimer in the documentati
on |
| 13 * and/or other materials provided with the distribution. |
| 14 * |
| 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" A
ND |
| 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
| 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE F
OR |
| 19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGE
S |
| 20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND O
N |
| 22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
| 24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 25 */ |
| 26 #include "udint.h" |
| 27 #include "types.h" |
| 28 #include "input.h" |
| 29 #include "decode.h" |
| 30 |
| 31 #ifndef __UD_STANDALONE__ |
| 32 # include <string.h> |
| 33 #endif /* __UD_STANDALONE__ */ |
| 34 |
| 35 /* The max number of prefixes to an instruction */ |
| 36 #define MAX_PREFIXES 15 |
| 37 |
| 38 /* rex prefix bits */ |
| 39 #define REX_W(r) ( ( 0xF & ( r ) ) >> 3 ) |
| 40 #define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 ) |
| 41 #define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 ) |
| 42 #define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 ) |
| 43 #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \ |
| 44 ( P_REXR(n) << 2 ) | \ |
| 45 ( P_REXX(n) << 1 ) | \ |
| 46 ( P_REXB(n) << 0 ) ) |
| 47 |
| 48 /* scable-index-base bits */ |
| 49 #define SIB_S(b) ( ( b ) >> 6 ) |
| 50 #define SIB_I(b) ( ( ( b ) >> 3 ) & 7 ) |
| 51 #define SIB_B(b) ( ( b ) & 7 ) |
| 52 |
| 53 /* modrm bits */ |
| 54 #define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 ) |
| 55 #define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 ) |
| 56 #define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 ) |
| 57 #define MODRM_RM(b) ( ( b ) & 7 ) |
| 58 |
| 59 static int decode_ext(struct ud *u, uint16_t ptr); |
| 60 |
| 61 enum reg_class { /* register classes */ |
| 62 REGCLASS_NONE, |
| 63 REGCLASS_GPR, |
| 64 REGCLASS_MMX, |
| 65 REGCLASS_CR, |
| 66 REGCLASS_DB, |
| 67 REGCLASS_SEG, |
| 68 REGCLASS_XMM |
| 69 }; |
| 70 |
| 71 |
| 72 /* |
| 73 * inp_uint8 |
| 74 * int_uint16 |
| 75 * int_uint32 |
| 76 * int_uint64 |
| 77 * Load little-endian values from input |
| 78 */ |
| 79 static uint8_t |
| 80 inp_uint8(struct ud* u) |
| 81 { |
| 82 return ud_inp_next(u); |
| 83 } |
| 84 |
| 85 static uint16_t |
| 86 inp_uint16(struct ud* u) |
| 87 { |
| 88 uint16_t r, ret; |
| 89 |
| 90 ret = ud_inp_next(u); |
| 91 r = ud_inp_next(u); |
| 92 return ret | (r << 8); |
| 93 } |
| 94 |
| 95 static uint32_t |
| 96 inp_uint32(struct ud* u) |
| 97 { |
| 98 uint32_t r, ret; |
| 99 |
| 100 ret = ud_inp_next(u); |
| 101 r = ud_inp_next(u); |
| 102 ret = ret | (r << 8); |
| 103 r = ud_inp_next(u); |
| 104 ret = ret | (r << 16); |
| 105 r = ud_inp_next(u); |
| 106 return ret | (r << 24); |
| 107 } |
| 108 |
| 109 static uint64_t |
| 110 inp_uint64(struct ud* u) |
| 111 { |
| 112 uint64_t r, ret; |
| 113 |
| 114 ret = ud_inp_next(u); |
| 115 r = ud_inp_next(u); |
| 116 ret = ret | (r << 8); |
| 117 r = ud_inp_next(u); |
| 118 ret = ret | (r << 16); |
| 119 r = ud_inp_next(u); |
| 120 ret = ret | (r << 24); |
| 121 r = ud_inp_next(u); |
| 122 ret = ret | (r << 32); |
| 123 r = ud_inp_next(u); |
| 124 ret = ret | (r << 40); |
| 125 r = ud_inp_next(u); |
| 126 ret = ret | (r << 48); |
| 127 r = ud_inp_next(u); |
| 128 return ret | (r << 56); |
| 129 } |
| 130 |
| 131 |
| 132 static inline int |
| 133 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) |
| 134 { |
| 135 if (dis_mode == 64) { |
| 136 return rex_w ? 64 : (pfx_opr ? 16 : 32); |
| 137 } else if (dis_mode == 32) { |
| 138 return pfx_opr ? 16 : 32; |
| 139 } else { |
| 140 UD_ASSERT(dis_mode == 16); |
| 141 return pfx_opr ? 32 : 16; |
| 142 } |
| 143 } |
| 144 |
| 145 |
| 146 static inline int |
| 147 eff_adr_mode(int dis_mode, int pfx_adr) |
| 148 { |
| 149 if (dis_mode == 64) { |
| 150 return pfx_adr ? 32 : 64; |
| 151 } else if (dis_mode == 32) { |
| 152 return pfx_adr ? 16 : 32; |
| 153 } else { |
| 154 UD_ASSERT(dis_mode == 16); |
| 155 return pfx_adr ? 32 : 16; |
| 156 } |
| 157 } |
| 158 |
| 159 |
| 160 /* Looks up mnemonic code in the mnemonic string table |
| 161 * Returns NULL if the mnemonic code is invalid |
| 162 */ |
| 163 const char* |
| 164 ud_lookup_mnemonic(enum ud_mnemonic_code c) |
| 165 { |
| 166 if (c < UD_MAX_MNEMONIC_CODE) { |
| 167 return ud_mnemonics_str[c]; |
| 168 } else { |
| 169 return NULL; |
| 170 } |
| 171 } |
| 172 |
| 173 |
| 174 /* |
| 175 * decode_prefixes |
| 176 * |
| 177 * Extracts instruction prefixes. |
| 178 */ |
| 179 static int |
| 180 decode_prefixes(struct ud *u) |
| 181 { |
| 182 int done = 0; |
| 183 uint8_t curr; |
| 184 UD_RETURN_ON_ERROR(u); |
| 185 |
| 186 do { |
| 187 ud_inp_next(u); |
| 188 UD_RETURN_ON_ERROR(u); |
| 189 if (inp_len(u) == MAX_INSN_LENGTH) { |
| 190 UD_RETURN_WITH_ERROR(u, "max instruction length"); |
| 191 } |
| 192 curr = inp_curr(u); |
| 193 |
| 194 switch (curr) |
| 195 { |
| 196 case 0x2E : |
| 197 u->pfx_seg = UD_R_CS; |
| 198 break; |
| 199 case 0x36 : |
| 200 u->pfx_seg = UD_R_SS; |
| 201 break; |
| 202 case 0x3E : |
| 203 u->pfx_seg = UD_R_DS; |
| 204 break; |
| 205 case 0x26 : |
| 206 u->pfx_seg = UD_R_ES; |
| 207 break; |
| 208 case 0x64 : |
| 209 u->pfx_seg = UD_R_FS; |
| 210 break; |
| 211 case 0x65 : |
| 212 u->pfx_seg = UD_R_GS; |
| 213 break; |
| 214 case 0x67 : /* adress-size override prefix */ |
| 215 u->pfx_adr = 0x67; |
| 216 break; |
| 217 case 0xF0 : |
| 218 u->pfx_lock = 0xF0; |
| 219 break; |
| 220 case 0x66: |
| 221 u->pfx_opr = 0x66; |
| 222 break; |
| 223 case 0xF2: |
| 224 u->pfx_str = 0xf2; |
| 225 break; |
| 226 case 0xF3: |
| 227 u->pfx_str = 0xf3; |
| 228 break; |
| 229 default: |
| 230 done = 1; |
| 231 break; |
| 232 } |
| 233 } while (!done); |
| 234 |
| 235 if (u->dis_mode == 64 && (curr & 0xF0) == 0x40) { |
| 236 /* rex prefixes in 64bit mode, must be the last prefix |
| 237 */ |
| 238 u->pfx_rex = curr; |
| 239 } else { |
| 240 /* rewind back one byte in stream, since the above loop |
| 241 * stops with a non-prefix byte. |
| 242 */ |
| 243 inp_back(u); |
| 244 } |
| 245 return 0; |
| 246 } |
| 247 |
| 248 |
| 249 static inline unsigned int modrm( struct ud * u ) |
| 250 { |
| 251 if ( !u->have_modrm ) { |
| 252 u->modrm = ud_inp_next( u ); |
| 253 u->have_modrm = 1; |
| 254 } |
| 255 return u->modrm; |
| 256 } |
| 257 |
| 258 |
| 259 static unsigned int |
| 260 resolve_operand_size( const struct ud * u, unsigned int s ) |
| 261 { |
| 262 switch ( s ) |
| 263 { |
| 264 case SZ_V: |
| 265 return ( u->opr_mode ); |
| 266 case SZ_Z: |
| 267 return ( u->opr_mode == 16 ) ? 16 : 32; |
| 268 case SZ_Y: |
| 269 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode; |
| 270 case SZ_RDQ: |
| 271 return ( u->dis_mode == 64 ) ? 64 : 32; |
| 272 default: |
| 273 return s; |
| 274 } |
| 275 } |
| 276 |
| 277 |
| 278 static int resolve_mnemonic( struct ud* u ) |
| 279 { |
| 280 /* resolve 3dnow weirdness. */ |
| 281 if ( u->mnemonic == UD_I3dnow ) { |
| 282 u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic; |
| 283 } |
| 284 /* SWAPGS is only valid in 64bits mode */ |
| 285 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { |
| 286 UDERR(u, "swapgs invalid in 64bits mode"); |
| 287 return -1; |
| 288 } |
| 289 |
| 290 if (u->mnemonic == UD_Ixchg) { |
| 291 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX && |
| 292 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) || |
| 293 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX && |
| 294 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) { |
| 295 u->operand[0].type = UD_NONE; |
| 296 u->operand[1].type = UD_NONE; |
| 297 u->mnemonic = UD_Inop; |
| 298 } |
| 299 } |
| 300 |
| 301 if (u->mnemonic == UD_Inop && u->pfx_repe) { |
| 302 u->pfx_repe = 0; |
| 303 u->mnemonic = UD_Ipause; |
| 304 } |
| 305 return 0; |
| 306 } |
| 307 |
| 308 |
| 309 /* ----------------------------------------------------------------------------- |
| 310 * decode_a()- Decodes operands of the type seg:offset |
| 311 * ----------------------------------------------------------------------------- |
| 312 */ |
| 313 static void |
| 314 decode_a(struct ud* u, struct ud_operand *op) |
| 315 { |
| 316 if (u->opr_mode == 16) { |
| 317 /* seg16:off16 */ |
| 318 op->type = UD_OP_PTR; |
| 319 op->size = 32; |
| 320 op->lval.ptr.off = inp_uint16(u); |
| 321 op->lval.ptr.seg = inp_uint16(u); |
| 322 } else { |
| 323 /* seg16:off32 */ |
| 324 op->type = UD_OP_PTR; |
| 325 op->size = 48; |
| 326 op->lval.ptr.off = inp_uint32(u); |
| 327 op->lval.ptr.seg = inp_uint16(u); |
| 328 } |
| 329 } |
| 330 |
| 331 /* ----------------------------------------------------------------------------- |
| 332 * decode_gpr() - Returns decoded General Purpose Register |
| 333 * ----------------------------------------------------------------------------- |
| 334 */ |
| 335 static enum ud_type |
| 336 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) |
| 337 { |
| 338 switch (s) { |
| 339 case 64: |
| 340 return UD_R_RAX + rm; |
| 341 case 32: |
| 342 return UD_R_EAX + rm; |
| 343 case 16: |
| 344 return UD_R_AX + rm; |
| 345 case 8: |
| 346 if (u->dis_mode == 64 && u->pfx_rex) { |
| 347 if (rm >= 4) |
| 348 return UD_R_SPL + (rm-4); |
| 349 return UD_R_AL + rm; |
| 350 } else return UD_R_AL + rm; |
| 351 default: |
| 352 UD_ASSERT(!"invalid operand size"); |
| 353 return 0; |
| 354 } |
| 355 } |
| 356 |
| 357 static void |
| 358 decode_reg(struct ud *u, |
| 359 struct ud_operand *opr, |
| 360 int type, |
| 361 int num, |
| 362 int size) |
| 363 { |
| 364 int reg; |
| 365 size = resolve_operand_size(u, size); |
| 366 switch (type) { |
| 367 case REGCLASS_GPR : reg = decode_gpr(u, size, num); break; |
| 368 case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break; |
| 369 case REGCLASS_XMM : reg = UD_R_XMM0 + num; break; |
| 370 case REGCLASS_CR : reg = UD_R_CR0 + num; break; |
| 371 case REGCLASS_DB : reg = UD_R_DR0 + num; break; |
| 372 case REGCLASS_SEG : { |
| 373 /* |
| 374 * Only 6 segment registers, anything else is an error. |
| 375 */ |
| 376 if ((num & 7) > 5) { |
| 377 UDERR(u, "invalid segment register value"); |
| 378 return; |
| 379 } else { |
| 380 reg = UD_R_ES + (num & 7); |
| 381 } |
| 382 break; |
| 383 } |
| 384 default: |
| 385 UD_ASSERT(!"invalid register type"); |
| 386 break; |
| 387 } |
| 388 opr->type = UD_OP_REG; |
| 389 opr->base = reg; |
| 390 opr->size = size; |
| 391 } |
| 392 |
| 393 |
| 394 /* |
| 395 * decode_imm |
| 396 * |
| 397 * Decode Immediate values. |
| 398 */ |
| 399 static void |
| 400 decode_imm(struct ud* u, unsigned int size, struct ud_operand *op) |
| 401 { |
| 402 op->size = resolve_operand_size(u, size); |
| 403 op->type = UD_OP_IMM; |
| 404 |
| 405 switch (op->size) { |
| 406 case 8: op->lval.sbyte = inp_uint8(u); break; |
| 407 case 16: op->lval.uword = inp_uint16(u); break; |
| 408 case 32: op->lval.udword = inp_uint32(u); break; |
| 409 case 64: op->lval.uqword = inp_uint64(u); break; |
| 410 default: return; |
| 411 } |
| 412 } |
| 413 |
| 414 |
| 415 /* |
| 416 * decode_mem_disp |
| 417 * |
| 418 * Decode mem address displacement. |
| 419 */ |
| 420 static void |
| 421 decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op) |
| 422 { |
| 423 switch (size) { |
| 424 case 8: |
| 425 op->offset = 8; |
| 426 op->lval.ubyte = inp_uint8(u); |
| 427 break; |
| 428 case 16: |
| 429 op->offset = 16; |
| 430 op->lval.uword = inp_uint16(u); |
| 431 break; |
| 432 case 32: |
| 433 op->offset = 32; |
| 434 op->lval.udword = inp_uint32(u); |
| 435 break; |
| 436 case 64: |
| 437 op->offset = 64; |
| 438 op->lval.uqword = inp_uint64(u); |
| 439 break; |
| 440 default: |
| 441 return; |
| 442 } |
| 443 } |
| 444 |
| 445 |
| 446 /* |
| 447 * decode_modrm_reg |
| 448 * |
| 449 * Decodes reg field of mod/rm byte |
| 450 * |
| 451 */ |
| 452 static inline void |
| 453 decode_modrm_reg(struct ud *u, |
| 454 struct ud_operand *operand, |
| 455 unsigned int type, |
| 456 unsigned int size) |
| 457 { |
| 458 uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); |
| 459 decode_reg(u, operand, type, reg, size); |
| 460 } |
| 461 |
| 462 |
| 463 /* |
| 464 * decode_modrm_rm |
| 465 * |
| 466 * Decodes rm field of mod/rm byte |
| 467 * |
| 468 */ |
| 469 static void |
| 470 decode_modrm_rm(struct ud *u, |
| 471 struct ud_operand *op, |
| 472 unsigned char type, /* register type */ |
| 473 unsigned int size) /* operand size */ |
| 474 |
| 475 { |
| 476 size_t offset = 0; |
| 477 unsigned char mod, rm; |
| 478 |
| 479 /* get mod, r/m and reg fields */ |
| 480 mod = MODRM_MOD(modrm(u)); |
| 481 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u)); |
| 482 |
| 483 /* |
| 484 * If mod is 11b, then the modrm.rm specifies a register. |
| 485 * |
| 486 */ |
| 487 if (mod == 3) { |
| 488 decode_reg(u, op, type, rm, size); |
| 489 return; |
| 490 } |
| 491 |
| 492 /* |
| 493 * !11b => Memory Address |
| 494 */ |
| 495 op->type = UD_OP_MEM; |
| 496 op->size = resolve_operand_size(u, size); |
| 497 |
| 498 if (u->adr_mode == 64) { |
| 499 op->base = UD_R_RAX + rm; |
| 500 if (mod == 1) { |
| 501 offset = 8; |
| 502 } else if (mod == 2) { |
| 503 offset = 32; |
| 504 } else if (mod == 0 && (rm & 7) == 5) { |
| 505 op->base = UD_R_RIP; |
| 506 offset = 32; |
| 507 } else { |
| 508 offset = 0; |
| 509 } |
| 510 /* |
| 511 * Scale-Index-Base (SIB) |
| 512 */ |
| 513 if ((rm & 7) == 4) { |
| 514 ud_inp_next(u); |
| 515 |
| 516 op->scale = (1 << SIB_S(inp_curr(u))) & ~1; |
| 517 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); |
| 518 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); |
| 519 |
| 520 /* special conditions for base reference */ |
| 521 if (op->index == UD_R_RSP) { |
| 522 op->index = UD_NONE; |
| 523 op->scale = UD_NONE; |
| 524 } |
| 525 |
| 526 if (op->base == UD_R_RBP || op->base == UD_R_R13) { |
| 527 if (mod == 0) { |
| 528 op->base = UD_NONE; |
| 529 } |
| 530 if (mod == 1) { |
| 531 offset = 8; |
| 532 } else { |
| 533 offset = 32; |
| 534 } |
| 535 } |
| 536 } |
| 537 } else if (u->adr_mode == 32) { |
| 538 op->base = UD_R_EAX + rm; |
| 539 if (mod == 1) { |
| 540 offset = 8; |
| 541 } else if (mod == 2) { |
| 542 offset = 32; |
| 543 } else if (mod == 0 && rm == 5) { |
| 544 op->base = UD_NONE; |
| 545 offset = 32; |
| 546 } else { |
| 547 offset = 0; |
| 548 } |
| 549 |
| 550 /* Scale-Index-Base (SIB) */ |
| 551 if ((rm & 7) == 4) { |
| 552 ud_inp_next(u); |
| 553 |
| 554 op->scale = (1 << SIB_S(inp_curr(u))) & ~1; |
| 555 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); |
| 556 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); |
| 557 |
| 558 if (op->index == UD_R_ESP) { |
| 559 op->index = UD_NONE; |
| 560 op->scale = UD_NONE; |
| 561 } |
| 562 |
| 563 /* special condition for base reference */ |
| 564 if (op->base == UD_R_EBP) { |
| 565 if (mod == 0) { |
| 566 op->base = UD_NONE; |
| 567 } |
| 568 if (mod == 1) { |
| 569 offset = 8; |
| 570 } else { |
| 571 offset = 32; |
| 572 } |
| 573 } |
| 574 } |
| 575 } else { |
| 576 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, |
| 577 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX }; |
| 578 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI, |
| 579 UD_NONE, UD_NONE, UD_NONE, UD_NONE }; |
| 580 op->base = bases[rm & 7]; |
| 581 op->index = indices[rm & 7]; |
| 582 if (mod == 0 && rm == 6) { |
| 583 offset = 16; |
| 584 op->base = UD_NONE; |
| 585 } else if (mod == 1) { |
| 586 offset = 8; |
| 587 } else if (mod == 2) { |
| 588 offset = 16; |
| 589 } |
| 590 } |
| 591 |
| 592 if (offset) { |
| 593 decode_mem_disp(u, offset, op); |
| 594 } |
| 595 } |
| 596 |
| 597 |
| 598 /* |
| 599 * decode_moffset |
| 600 * Decode offset-only memory operand |
| 601 */ |
| 602 static void |
| 603 decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr) |
| 604 { |
| 605 opr->type = UD_OP_MEM; |
| 606 opr->size = resolve_operand_size(u, size); |
| 607 decode_mem_disp(u, u->adr_mode, opr); |
| 608 } |
| 609 |
| 610 |
| 611 /* ----------------------------------------------------------------------------- |
| 612 * decode_operands() - Disassembles Operands. |
| 613 * ----------------------------------------------------------------------------- |
| 614 */ |
| 615 static int |
| 616 decode_operand(struct ud *u, |
| 617 struct ud_operand *operand, |
| 618 enum ud_operand_code type, |
| 619 unsigned int size) |
| 620 { |
| 621 operand->_oprcode = type; |
| 622 |
| 623 switch (type) { |
| 624 case OP_A : |
| 625 decode_a(u, operand); |
| 626 break; |
| 627 case OP_MR: |
| 628 decode_modrm_rm(u, operand, REGCLASS_GPR, |
| 629 MODRM_MOD(modrm(u)) == 3 ? |
| 630 Mx_reg_size(size) : Mx_mem_size(size)); |
| 631 break; |
| 632 case OP_F: |
| 633 u->br_far = 1; |
| 634 /* intended fall through */ |
| 635 case OP_M: |
| 636 if (MODRM_MOD(modrm(u)) == 3) { |
| 637 UDERR(u, "expected modrm.mod != 3"); |
| 638 } |
| 639 /* intended fall through */ |
| 640 case OP_E: |
| 641 decode_modrm_rm(u, operand, REGCLASS_GPR, size); |
| 642 break; |
| 643 case OP_G: |
| 644 decode_modrm_reg(u, operand, REGCLASS_GPR, size); |
| 645 break; |
| 646 case OP_sI: |
| 647 case OP_I: |
| 648 decode_imm(u, size, operand); |
| 649 break; |
| 650 case OP_I1: |
| 651 operand->type = UD_OP_CONST; |
| 652 operand->lval.udword = 1; |
| 653 break; |
| 654 case OP_N: |
| 655 if (MODRM_MOD(modrm(u)) != 3) { |
| 656 UDERR(u, "expected modrm.mod == 3"); |
| 657 } |
| 658 /* intended fall through */ |
| 659 case OP_Q: |
| 660 decode_modrm_rm(u, operand, REGCLASS_MMX, size); |
| 661 break; |
| 662 case OP_P: |
| 663 decode_modrm_reg(u, operand, REGCLASS_MMX, size); |
| 664 break; |
| 665 case OP_U: |
| 666 if (MODRM_MOD(modrm(u)) != 3) { |
| 667 UDERR(u, "expected modrm.mod == 3"); |
| 668 } |
| 669 /* intended fall through */ |
| 670 case OP_W: |
| 671 decode_modrm_rm(u, operand, REGCLASS_XMM, size); |
| 672 break; |
| 673 case OP_V: |
| 674 decode_modrm_reg(u, operand, REGCLASS_XMM, size); |
| 675 break; |
| 676 case OP_MU: |
| 677 decode_modrm_rm(u, operand, REGCLASS_XMM, |
| 678 MODRM_MOD(modrm(u)) == 3 ? |
| 679 Mx_reg_size(size) : Mx_mem_size(size)); |
| 680 break; |
| 681 case OP_S: |
| 682 decode_modrm_reg(u, operand, REGCLASS_SEG, size); |
| 683 break; |
| 684 case OP_O: |
| 685 decode_moffset(u, size, operand); |
| 686 break; |
| 687 case OP_R0: |
| 688 case OP_R1: |
| 689 case OP_R2: |
| 690 case OP_R3: |
| 691 case OP_R4: |
| 692 case OP_R5: |
| 693 case OP_R6: |
| 694 case OP_R7: |
| 695 decode_reg(u, operand, REGCLASS_GPR, |
| 696 (REX_B(u->pfx_rex) << 3) | (type - OP_R0), size); |
| 697 break; |
| 698 case OP_AL: |
| 699 case OP_AX: |
| 700 case OP_eAX: |
| 701 case OP_rAX: |
| 702 decode_reg(u, operand, REGCLASS_GPR, 0, size); |
| 703 break; |
| 704 case OP_CL: |
| 705 case OP_CX: |
| 706 case OP_eCX: |
| 707 decode_reg(u, operand, REGCLASS_GPR, 1, size); |
| 708 break; |
| 709 case OP_DL: |
| 710 case OP_DX: |
| 711 case OP_eDX: |
| 712 decode_reg(u, operand, REGCLASS_GPR, 2, size); |
| 713 break; |
| 714 case OP_ES: |
| 715 case OP_CS: |
| 716 case OP_DS: |
| 717 case OP_SS: |
| 718 case OP_FS: |
| 719 case OP_GS: |
| 720 /* in 64bits mode, only fs and gs are allowed */ |
| 721 if (u->dis_mode == 64) { |
| 722 if (type != OP_FS && type != OP_GS) { |
| 723 UDERR(u, "invalid segment register in 64bits"); |
| 724 } |
| 725 } |
| 726 operand->type = UD_OP_REG; |
| 727 operand->base = (type - OP_ES) + UD_R_ES; |
| 728 operand->size = 16; |
| 729 break; |
| 730 case OP_J : |
| 731 decode_imm(u, size, operand); |
| 732 operand->type = UD_OP_JIMM; |
| 733 break ; |
| 734 case OP_R : |
| 735 if (MODRM_MOD(modrm(u)) != 3) { |
| 736 UDERR(u, "expected modrm.mod == 3"); |
| 737 } |
| 738 decode_modrm_rm(u, operand, REGCLASS_GPR, size); |
| 739 break; |
| 740 case OP_C: |
| 741 decode_modrm_reg(u, operand, REGCLASS_CR, size); |
| 742 break; |
| 743 case OP_D: |
| 744 decode_modrm_reg(u, operand, REGCLASS_DB, size); |
| 745 break; |
| 746 case OP_I3 : |
| 747 operand->type = UD_OP_CONST; |
| 748 operand->lval.sbyte = 3; |
| 749 break; |
| 750 case OP_ST0: |
| 751 case OP_ST1: |
| 752 case OP_ST2: |
| 753 case OP_ST3: |
| 754 case OP_ST4: |
| 755 case OP_ST5: |
| 756 case OP_ST6: |
| 757 case OP_ST7: |
| 758 operand->type = UD_OP_REG; |
| 759 operand->base = (type - OP_ST0) + UD_R_ST0; |
| 760 operand->size = 80; |
| 761 break; |
| 762 default : |
| 763 break; |
| 764 } |
| 765 return 0; |
| 766 } |
| 767 |
| 768 |
| 769 /* |
| 770 * decode_operands |
| 771 * |
| 772 * Disassemble upto 3 operands of the current instruction being |
| 773 * disassembled. By the end of the function, the operand fields |
| 774 * of the ud structure will have been filled. |
| 775 */ |
| 776 static int |
| 777 decode_operands(struct ud* u) |
| 778 { |
| 779 decode_operand(u, &u->operand[0], |
| 780 u->itab_entry->operand1.type, |
| 781 u->itab_entry->operand1.size); |
| 782 decode_operand(u, &u->operand[1], |
| 783 u->itab_entry->operand2.type, |
| 784 u->itab_entry->operand2.size); |
| 785 decode_operand(u, &u->operand[2], |
| 786 u->itab_entry->operand3.type, |
| 787 u->itab_entry->operand3.size); |
| 788 return 0; |
| 789 } |
| 790 |
| 791 /* ----------------------------------------------------------------------------- |
| 792 * clear_insn() - clear instruction structure |
| 793 * ----------------------------------------------------------------------------- |
| 794 */ |
| 795 static void |
| 796 clear_insn(register struct ud* u) |
| 797 { |
| 798 u->error = 0; |
| 799 u->pfx_seg = 0; |
| 800 u->pfx_opr = 0; |
| 801 u->pfx_adr = 0; |
| 802 u->pfx_lock = 0; |
| 803 u->pfx_repne = 0; |
| 804 u->pfx_rep = 0; |
| 805 u->pfx_repe = 0; |
| 806 u->pfx_rex = 0; |
| 807 u->pfx_str = 0; |
| 808 u->mnemonic = UD_Inone; |
| 809 u->itab_entry = NULL; |
| 810 u->have_modrm = 0; |
| 811 u->br_far = 0; |
| 812 |
| 813 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) ); |
| 814 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) ); |
| 815 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) ); |
| 816 } |
| 817 |
| 818 |
| 819 static inline int |
| 820 resolve_pfx_str(struct ud* u) |
| 821 { |
| 822 if (u->pfx_str == 0xf3) { |
| 823 if (P_STR(u->itab_entry->prefix)) { |
| 824 u->pfx_rep = 0xf3; |
| 825 } else { |
| 826 u->pfx_repe = 0xf3; |
| 827 } |
| 828 } else if (u->pfx_str == 0xf2) { |
| 829 u->pfx_repne = 0xf3; |
| 830 } |
| 831 return 0; |
| 832 } |
| 833 |
| 834 |
| 835 static int |
| 836 resolve_mode( struct ud* u ) |
| 837 { |
| 838 /* if in error state, bail out */ |
| 839 if ( u->error ) return -1; |
| 840 |
| 841 /* propagate prefix effects */ |
| 842 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ |
| 843 |
| 844 /* Check validity of instruction m64 */ |
| 845 if ( P_INV64( u->itab_entry->prefix ) ) { |
| 846 UDERR(u, "instruction invalid in 64bits"); |
| 847 return -1; |
| 848 } |
| 849 |
| 850 /* effective rex prefix is the effective mask for the |
| 851 * instruction hard-coded in the opcode map. |
| 852 */ |
| 853 u->pfx_rex = ( u->pfx_rex & 0x40 ) | |
| 854 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) ); |
| 855 |
| 856 /* whether this instruction has a default operand size of |
| 857 * 64bit, also hardcoded into the opcode map. |
| 858 */ |
| 859 u->default64 = P_DEF64( u->itab_entry->prefix ); |
| 860 /* calculate effective operand size */ |
| 861 if ( REX_W( u->pfx_rex ) ) { |
| 862 u->opr_mode = 64; |
| 863 } else if ( u->pfx_opr ) { |
| 864 u->opr_mode = 16; |
| 865 } else { |
| 866 /* unless the default opr size of instruction is 64, |
| 867 * the effective operand size in the absence of rex.w |
| 868 * prefix is 32. |
| 869 */ |
| 870 u->opr_mode = ( u->default64 ) ? 64 : 32; |
| 871 } |
| 872 |
| 873 /* calculate effective address size */ |
| 874 u->adr_mode = (u->pfx_adr) ? 32 : 64; |
| 875 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ |
| 876 u->opr_mode = ( u->pfx_opr ) ? 16 : 32; |
| 877 u->adr_mode = ( u->pfx_adr ) ? 16 : 32; |
| 878 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ |
| 879 u->opr_mode = ( u->pfx_opr ) ? 32 : 16; |
| 880 u->adr_mode = ( u->pfx_adr ) ? 32 : 16; |
| 881 } |
| 882 |
| 883 /* set flags for implicit addressing */ |
| 884 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix ); |
| 885 |
| 886 return 0; |
| 887 } |
| 888 |
| 889 |
| 890 static inline int |
| 891 decode_insn(struct ud *u, uint16_t ptr) |
| 892 { |
| 893 UD_ASSERT((ptr & 0x8000) == 0); |
| 894 u->itab_entry = &ud_itab[ ptr ]; |
| 895 u->mnemonic = u->itab_entry->mnemonic; |
| 896 return (resolve_pfx_str(u) == 0 && |
| 897 resolve_mode(u) == 0 && |
| 898 decode_operands(u) == 0 && |
| 899 resolve_mnemonic(u) == 0) ? 0 : -1; |
| 900 } |
| 901 |
| 902 |
| 903 /* |
| 904 * decode_3dnow() |
| 905 * |
| 906 * Decoding 3dnow is a little tricky because of its strange opcode |
| 907 * structure. The final opcode disambiguation depends on the last |
| 908 * byte that comes after the operands have been decoded. Fortunately, |
| 909 * all 3dnow instructions have the same set of operand types. So we |
| 910 * go ahead and decode the instruction by picking an arbitrarily chosen |
| 911 * valid entry in the table, decode the operands, and read the final |
| 912 * byte to resolve the menmonic. |
| 913 */ |
| 914 static inline int |
| 915 decode_3dnow(struct ud* u) |
| 916 { |
| 917 uint16_t ptr; |
| 918 UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW); |
| 919 UD_ASSERT(u->le->table[0xc] != 0); |
| 920 decode_insn(u, u->le->table[0xc]); |
| 921 ud_inp_next(u); |
| 922 if (u->error) { |
| 923 return -1; |
| 924 } |
| 925 ptr = u->le->table[inp_curr(u)]; |
| 926 UD_ASSERT((ptr & 0x8000) == 0); |
| 927 u->mnemonic = ud_itab[ptr].mnemonic; |
| 928 return 0; |
| 929 } |
| 930 |
| 931 |
| 932 static int |
| 933 decode_ssepfx(struct ud *u) |
| 934 { |
| 935 uint8_t idx; |
| 936 uint8_t pfx; |
| 937 |
| 938 /* |
| 939 * String prefixes (f2, f3) take precedence over operand |
| 940 * size prefix (66). |
| 941 */ |
| 942 pfx = u->pfx_str; |
| 943 if (pfx == 0) { |
| 944 pfx = u->pfx_opr; |
| 945 } |
| 946 idx = ((pfx & 0xf) + 1) / 2; |
| 947 if (u->le->table[idx] == 0) { |
| 948 idx = 0; |
| 949 } |
| 950 if (idx && u->le->table[idx] != 0) { |
| 951 /* |
| 952 * "Consume" the prefix as a part of the opcode, so it is no |
| 953 * longer exported as an instruction prefix. |
| 954 */ |
| 955 u->pfx_str = 0; |
| 956 if (pfx == 0x66) { |
| 957 /* |
| 958 * consume "66" only if it was used for decoding, leaving |
| 959 * it to be used as an operands size override for some |
| 960 * simd instructions. |
| 961 */ |
| 962 u->pfx_opr = 0; |
| 963 } |
| 964 } |
| 965 return decode_ext(u, u->le->table[idx]); |
| 966 } |
| 967 |
| 968 |
| 969 /* |
| 970 * decode_ext() |
| 971 * |
| 972 * Decode opcode extensions (if any) |
| 973 */ |
| 974 static int |
| 975 decode_ext(struct ud *u, uint16_t ptr) |
| 976 { |
| 977 uint8_t idx = 0; |
| 978 if ((ptr & 0x8000) == 0) { |
| 979 return decode_insn(u, ptr); |
| 980 } |
| 981 u->le = &ud_lookup_table_list[(~0x8000 & ptr)]; |
| 982 if (u->le->type == UD_TAB__OPC_3DNOW) { |
| 983 return decode_3dnow(u); |
| 984 } |
| 985 |
| 986 switch (u->le->type) { |
| 987 case UD_TAB__OPC_MOD: |
| 988 /* !11 = 0, 11 = 1 */ |
| 989 idx = (MODRM_MOD(modrm(u)) + 1) / 4; |
| 990 break; |
| 991 /* disassembly mode/operand size/address size based tables. |
| 992 * 16 = 0,, 32 = 1, 64 = 2 |
| 993 */ |
| 994 case UD_TAB__OPC_MODE: |
| 995 idx = u->dis_mode != 64 ? 0 : 1; |
| 996 break; |
| 997 case UD_TAB__OPC_OSIZE: |
| 998 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; |
| 999 break; |
| 1000 case UD_TAB__OPC_ASIZE: |
| 1001 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32; |
| 1002 break; |
| 1003 case UD_TAB__OPC_X87: |
| 1004 idx = modrm(u) - 0xC0; |
| 1005 break; |
| 1006 case UD_TAB__OPC_VENDOR: |
| 1007 if (u->vendor == UD_VENDOR_ANY) { |
| 1008 /* choose a valid entry */ |
| 1009 idx = (u->le->table[idx] != 0) ? 0 : 1; |
| 1010 } else if (u->vendor == UD_VENDOR_AMD) { |
| 1011 idx = 0; |
| 1012 } else { |
| 1013 idx = 1; |
| 1014 } |
| 1015 break; |
| 1016 case UD_TAB__OPC_RM: |
| 1017 idx = MODRM_RM(modrm(u)); |
| 1018 break; |
| 1019 case UD_TAB__OPC_REG: |
| 1020 idx = MODRM_REG(modrm(u)); |
| 1021 break; |
| 1022 case UD_TAB__OPC_SSE: |
| 1023 return decode_ssepfx(u); |
| 1024 default: |
| 1025 UD_ASSERT(!"not reached"); |
| 1026 break; |
| 1027 } |
| 1028 |
| 1029 return decode_ext(u, u->le->table[idx]); |
| 1030 } |
| 1031 |
| 1032 |
| 1033 static int |
| 1034 decode_opcode(struct ud *u) |
| 1035 { |
| 1036 uint16_t ptr; |
| 1037 UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE); |
| 1038 ud_inp_next(u); |
| 1039 if (u->error) { |
| 1040 return -1; |
| 1041 } |
| 1042 u->primary_opcode = inp_curr(u); |
| 1043 ptr = u->le->table[inp_curr(u)]; |
| 1044 if (ptr & 0x8000) { |
| 1045 u->le = &ud_lookup_table_list[ptr & ~0x8000]; |
| 1046 if (u->le->type == UD_TAB__OPC_TABLE) { |
| 1047 return decode_opcode(u); |
| 1048 } |
| 1049 } |
| 1050 return decode_ext(u, ptr); |
| 1051 } |
| 1052 |
| 1053 |
| 1054 /* ============================================================================= |
| 1055 * ud_decode() - Instruction decoder. Returns the number of bytes decoded. |
| 1056 * ============================================================================= |
| 1057 */ |
| 1058 unsigned int |
| 1059 ud_decode(struct ud *u) |
| 1060 { |
| 1061 inp_start(u); |
| 1062 clear_insn(u); |
| 1063 u->le = &ud_lookup_table_list[0]; |
| 1064 u->error = decode_prefixes(u) == -1 || |
| 1065 decode_opcode(u) == -1 || |
| 1066 u->error; |
| 1067 /* Handle decode error. */ |
| 1068 if (u->error) { |
| 1069 /* clear out the decode data. */ |
| 1070 clear_insn(u); |
| 1071 /* mark the sequence of bytes as invalid. */ |
| 1072 u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */ |
| 1073 u->mnemonic = u->itab_entry->mnemonic; |
| 1074 } |
| 1075 |
| 1076 /* maybe this stray segment override byte |
| 1077 * should be spewed out? |
| 1078 */ |
| 1079 if ( !P_SEG( u->itab_entry->prefix ) && |
| 1080 u->operand[0].type != UD_OP_MEM && |
| 1081 u->operand[1].type != UD_OP_MEM ) |
| 1082 u->pfx_seg = 0; |
| 1083 |
| 1084 u->insn_offset = u->pc; /* set offset of instruction */ |
| 1085 u->asm_buf_fill = 0; /* set translation buffer index to 0 */ |
| 1086 u->pc += u->inp_ctr; /* move program counter by bytes decoded */ |
| 1087 |
| 1088 /* return number of bytes disassembled. */ |
| 1089 return u->inp_ctr; |
| 1090 } |
| 1091 |
| 1092 /* |
| 1093 vim: set ts=2 sw=2 expandtab |
| 1094 */ |
OLD | NEW |