| OLD | NEW |
| (Empty) |
| 1 #ifndef LIBDISASM_H | |
| 2 #define LIBDISASM_H | |
| 3 | |
| 4 #include <stdint.h> | |
| 5 | |
| 6 /* 'NEW" types | |
| 7 * __________________________________________________________________________*/ | |
| 8 #ifndef LIBDISASM_QWORD_H /* do not interfere with qword.h */ | |
| 9 #define LIBDISASM_QWORD_H | |
| 10 #ifdef _MSC_VER | |
| 11 typedef __int64 qword_t; | |
| 12 #else | |
| 13 typedef int64_t qword_t; | |
| 14 #endif | |
| 15 #endif | |
| 16 | |
| 17 #include <sys/types.h> | |
| 18 | |
| 19 #ifdef __cplusplus | |
| 20 extern "C" { | |
| 21 #endif | |
| 22 | |
| 23 /* 'NEW" x86 API | |
| 24 * __________________________________________________________________________*/ | |
| 25 | |
| 26 | |
| 27 /* ========================================= Error Reporting */ | |
| 28 /* REPORT CODES | |
| 29 * These are passed to a reporter function passed at initialization. | |
| 30 * Each code determines the type of the argument passed to the reporter; | |
| 31 * this allows the report to recover from errors, or just log them. | |
| 32 */ | |
| 33 enum x86_report_codes { | |
| 34 report_disasm_bounds, /* RVA OUT OF BOUNDS : The disassembler could | |
| 35 not disassemble the supplied RVA as it is | |
| 36 out of the range of the buffer. The | |
| 37 application should store the address and | |
| 38 attempt to determine what section of the | |
| 39 binary it is in, then disassemble the | |
| 40 address from the bytes in that section. | |
| 41 data: uint32_t rva */ | |
| 42 report_insn_bounds, /* INSTRUCTION OUT OF BOUNDS: The disassembler | |
| 43 could not disassemble the instruction as | |
| 44 the instruction would require bytes beyond | |
| 45 the end of the current buffer. This usually | |
| 46 indicated garbage bytes at the end of a | |
| 47 buffer, or an incorrectly-sized buffer. | |
| 48 data: uint32_t rva */ | |
| 49 report_invalid_insn, /* INVALID INSTRUCTION: The disassembler could | |
| 50 not disassemble the instruction as it has an | |
| 51 invalid combination of opcodes and operands. | |
| 52 This will stop automated disassembly; the | |
| 53 application can restart the disassembly | |
| 54 after the invalid instruction. | |
| 55 data: uint32_t rva */ | |
| 56 report_unknown | |
| 57 }; | |
| 58 | |
| 59 /* 'arg' is optional arbitrary data provided by the code passing the | |
| 60 * callback -- for example, it could be 'this' or 'self' in OOP code. | |
| 61 * 'code' is provided by libdisasm, it is one of the above | |
| 62 * 'data' is provided by libdisasm and is context-specific, per the enums */ | |
| 63 typedef void (*DISASM_REPORTER)( enum x86_report_codes code, | |
| 64 void *data, void *arg ); | |
| 65 | |
| 66 | |
| 67 /* x86_report_error : Call the register reporter to report an error */ | |
| 68 void x86_report_error( enum x86_report_codes code, void *data ); | |
| 69 | |
| 70 /* ========================================= Libdisasm Management Routines */ | |
| 71 enum x86_options { /* these can be ORed together */ | |
| 72 opt_none= 0, | |
| 73 opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes */ | |
| 74 opt_16_bit=2, /* 16-bit/DOS disassembly */ | |
| 75 opt_att_mnemonics=4, /* use AT&T syntax names for alternate opcode mn
emonics */ | |
| 76 }; | |
| 77 | |
| 78 /* management routines */ | |
| 79 /* 'arg' is caller-specific data which is passed as the first argument | |
| 80 * to the reporter callback routine */ | |
| 81 int x86_init( enum x86_options options, DISASM_REPORTER reporter, void *arg); | |
| 82 void x86_set_reporter( DISASM_REPORTER reporter, void *arg); | |
| 83 void x86_set_options( enum x86_options options ); | |
| 84 enum x86_options x86_get_options( void ); | |
| 85 int x86_cleanup(void); | |
| 86 | |
| 87 | |
| 88 /* ========================================= Instruction Representation */ | |
| 89 /* these defines are only intended for use in the array decl's */ | |
| 90 #define MAX_REGNAME 8 | |
| 91 | |
| 92 #define MAX_PREFIX_STR 32 | |
| 93 #define MAX_MNEM_STR 16 | |
| 94 #define MAX_INSN_SIZE 20 /* same as in i386.h */ | |
| 95 #define MAX_OP_STRING 32 /* max possible operand size in string form */ | |
| 96 #define MAX_OP_RAW_STRING 64 /* max possible operand size in raw form */ | |
| 97 #define MAX_OP_XML_STRING 256 /* max possible operand size in xml form */ | |
| 98 #define MAX_NUM_OPERANDS 8 /* max # implicit and explicit operands */ | |
| 99 /* in these, the '2 *' is arbitrary: the max # of operands should require | |
| 100 * more space than the rest of the insn */ | |
| 101 #define MAX_INSN_STRING 512 /* 2 * 8 * MAX_OP_STRING */ | |
| 102 #define MAX_INSN_RAW_STRING 1024 /* 2 * 8 * MAX_OP_RAW_STRING */ | |
| 103 #define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */ | |
| 104 | |
| 105 enum x86_reg_type { /* NOTE: these may be ORed together */ | |
| 106 reg_gen = 0x00001, /* general purpose */ | |
| 107 reg_in = 0x00002, /* incoming args, ala RISC */ | |
| 108 reg_out = 0x00004, /* args to calls, ala RISC */ | |
| 109 reg_local = 0x00008, /* local vars, ala RISC */ | |
| 110 reg_fpu = 0x00010, /* FPU data register */ | |
| 111 reg_seg = 0x00020, /* segment register */ | |
| 112 reg_simd = 0x00040, /* SIMD/MMX reg */ | |
| 113 reg_sys = 0x00080, /* restricted/system register */ | |
| 114 reg_sp = 0x00100, /* stack pointer */ | |
| 115 reg_fp = 0x00200, /* frame pointer */ | |
| 116 reg_pc = 0x00400, /* program counter */ | |
| 117 reg_retaddr = 0x00800, /* return addr for func */ | |
| 118 reg_cond = 0x01000, /* condition code / flags */ | |
| 119 reg_zero = 0x02000, /* zero register, ala RISC */ | |
| 120 reg_ret = 0x04000, /* return value */ | |
| 121 reg_src = 0x10000, /* array/rep source */ | |
| 122 reg_dest = 0x20000, /* array/rep destination */ | |
| 123 reg_count = 0x40000 /* array/rep/loop counter */ | |
| 124 }; | |
| 125 | |
| 126 /* x86_reg_t : an X86 CPU register */ | |
| 127 typedef struct { | |
| 128 char name[MAX_REGNAME]; | |
| 129 enum x86_reg_type type; /* what register is used for */ | |
| 130 unsigned int size; /* size of register in bytes */ | |
| 131 unsigned int id; /* register ID #, for quick compares */ | |
| 132 unsigned int alias; /* ID of reg this is an alias for */ | |
| 133 unsigned int shift; /* amount to shift aliased reg by */ | |
| 134 } x86_reg_t; | |
| 135 | |
| 136 /* x86_ea_t : an X86 effective address (address expression) */ | |
| 137 typedef struct { | |
| 138 unsigned int scale; /* scale factor */ | |
| 139 x86_reg_t index, base; /* index, base registers */ | |
| 140 int32_t disp; /* displacement */ | |
| 141 char disp_sign; /* is negative? 1/0 */ | |
| 142 char disp_size; /* 0, 1, 2, 4 */ | |
| 143 } x86_ea_t; | |
| 144 | |
| 145 /* x86_absolute_t : an X86 segment:offset address (descriptor) */ | |
| 146 typedef struct { | |
| 147 unsigned short segment; /* loaded directly into CS */ | |
| 148 union { | |
| 149 unsigned short off16; /* loaded directly into IP */ | |
| 150 uint32_t off32; /* loaded directly into EIP */ | |
| 151 } offset; | |
| 152 } x86_absolute_t; | |
| 153 | |
| 154 enum x86_op_type { /* mutually exclusive */ | |
| 155 op_unused = 0, /* empty/unused operand: should never occur */ | |
| 156 op_register = 1, /* CPU register */ | |
| 157 op_immediate = 2, /* Immediate Value */ | |
| 158 op_relative_near = 3, /* Relative offset from IP */ | |
| 159 op_relative_far = 4, /* Relative offset from IP */ | |
| 160 op_absolute = 5, /* Absolute address (ptr16:32) */ | |
| 161 op_expression = 6, /* Address expression (scale/index/base/disp) */ | |
| 162 op_offset = 7, /* Offset from start of segment (m32) */ | |
| 163 op_unknown | |
| 164 }; | |
| 165 | |
| 166 #define x86_optype_is_address( optype ) \ | |
| 167 ( optype == op_absolute || optype == op_offset ) | |
| 168 #define x86_optype_is_relative( optype ) \ | |
| 169 ( optype == op_relative_near || optype == op_relative_far ) | |
| 170 #define x86_optype_is_memory( optype ) \ | |
| 171 ( optype > op_immediate && optype < op_unknown ) | |
| 172 | |
| 173 enum x86_op_datatype { /* these use Intel's lame terminology */ | |
| 174 op_byte = 1, /* 1 byte integer */ | |
| 175 op_word = 2, /* 2 byte integer */ | |
| 176 op_dword = 3, /* 4 byte integer */ | |
| 177 op_qword = 4, /* 8 byte integer */ | |
| 178 op_dqword = 5, /* 16 byte integer */ | |
| 179 op_sreal = 6, /* 4 byte real (single real) */ | |
| 180 op_dreal = 7, /* 8 byte real (double real) */ | |
| 181 op_extreal = 8, /* 10 byte real (extended real) */ | |
| 182 op_bcd = 9, /* 10 byte binary-coded decimal */ | |
| 183 op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */ | |
| 184 op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */ | |
| 185 op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */ | |
| 186 op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */ | |
| 187 op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */ | |
| 188 op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */ | |
| 189 op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */ | |
| 190 op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */ | |
| 191 op_bounds16 = 18, /* signed 16:16 lower:upper bounds */ | |
| 192 op_bounds32 = 19, /* signed 32:32 lower:upper bounds */ | |
| 193 op_fpuenv16 = 20, /* 14 byte FPU control/environment data */ | |
| 194 op_fpuenv32 = 21, /* 28 byte FPU control/environment data */ | |
| 195 op_fpustate16 = 22, /* 94 byte FPU state (env & reg stack) */ | |
| 196 op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */ | |
| 197 op_fpregset = 24, /* 512 bytes: register set */ | |
| 198 op_fpreg = 25, /* FPU register */ | |
| 199 op_none = 0xFF, /* operand without a datatype (INVLPG) */ | |
| 200 }; | |
| 201 | |
| 202 enum x86_op_access { /* ORed together */ | |
| 203 op_read = 1, | |
| 204 op_write = 2, | |
| 205 op_execute = 4 | |
| 206 }; | |
| 207 | |
| 208 enum x86_op_flags { /* ORed together, but segs are mutually exclusive */ | |
| 209 op_signed = 1, /* signed integer */ | |
| 210 op_string = 2, /* possible string or array */ | |
| 211 op_constant = 4, /* symbolic constant */ | |
| 212 op_pointer = 8, /* operand points to a memory address */ | |
| 213 op_sysref = 0x010, /* operand is a syscall number */ | |
| 214 op_implied = 0x020, /* operand is implicit in the insn */ | |
| 215 op_hardcode = 0x40, /* operand is hardcoded in insn definition */ | |
| 216 /* NOTE: an 'implied' operand is one which can be considered a side | |
| 217 * effect of the insn, e.g. %esp being modified by PUSH or POP. A | |
| 218 * 'hard-coded' operand is one which is specified in the instruction | |
| 219 * definition, e.g. %es:%edi in MOVSB or 1 in ROL Eb, 1. The difference | |
| 220 * is that hard-coded operands are printed by disassemblers and are | |
| 221 * required to re-assemble, while implicit operands are invisible. */ | |
| 222 op_es_seg = 0x100, /* ES segment override */ | |
| 223 op_cs_seg = 0x200, /* CS segment override */ | |
| 224 op_ss_seg = 0x300, /* SS segment override */ | |
| 225 op_ds_seg = 0x400, /* DS segment override */ | |
| 226 op_fs_seg = 0x500, /* FS segment override */ | |
| 227 op_gs_seg = 0x600 /* GS segment override */ | |
| 228 }; | |
| 229 | |
| 230 /* x86_op_t : an X86 instruction operand */ | |
| 231 typedef struct { | |
| 232 enum x86_op_type type; /* operand type */ | |
| 233 enum x86_op_datatype datatype; /* operand size */ | |
| 234 enum x86_op_access access; /* operand access [RWX] */ | |
| 235 enum x86_op_flags flags; /* misc flags */ | |
| 236 union { | |
| 237 /* sizeof will have to work on these union members! */ | |
| 238 /* immediate values */ | |
| 239 char sbyte; | |
| 240 short sword; | |
| 241 int32_t sdword; | |
| 242 qword_t sqword; | |
| 243 unsigned char byte; | |
| 244 unsigned short word; | |
| 245 uint32_t dword; | |
| 246 qword_t qword; | |
| 247 float sreal; | |
| 248 double dreal; | |
| 249 /* misc large/non-native types */ | |
| 250 unsigned char extreal[10]; | |
| 251 unsigned char bcd[10]; | |
| 252 qword_t dqword[2]; | |
| 253 unsigned char simd[16]; | |
| 254 unsigned char fpuenv[28]; | |
| 255 /* offset from segment */ | |
| 256 uint32_t offset; | |
| 257 /* ID of CPU register */ | |
| 258 x86_reg_t reg; | |
| 259 /* offsets from current insn */ | |
| 260 char relative_near; | |
| 261 int32_t relative_far; | |
| 262 /* segment:offset */ | |
| 263 x86_absolute_t absolute; | |
| 264 /* effective address [expression] */ | |
| 265 x86_ea_t expression; | |
| 266 } data; | |
| 267 /* this is needed to make formatting operands more sane */ | |
| 268 void * insn; /* pointer to x86_insn_t owning operand */ | |
| 269 } x86_op_t; | |
| 270 | |
| 271 /* Linked list of x86_op_t; provided for manual traversal of the operand | |
| 272 * list in an insn. Users wishing to add operands to this list, e.g. to add | |
| 273 * implicit operands, should use x86_operand_new in x86_operand_list.h */ | |
| 274 typedef struct x86_operand_list { | |
| 275 x86_op_t op; | |
| 276 struct x86_operand_list *next; | |
| 277 } x86_oplist_t; | |
| 278 | |
| 279 enum x86_insn_group { | |
| 280 insn_none = 0, /* invalid instruction */ | |
| 281 insn_controlflow = 1, | |
| 282 insn_arithmetic = 2, | |
| 283 insn_logic = 3, | |
| 284 insn_stack = 4, | |
| 285 insn_comparison = 5, | |
| 286 insn_move = 6, | |
| 287 insn_string = 7, | |
| 288 insn_bit_manip = 8, | |
| 289 insn_flag_manip = 9, | |
| 290 insn_fpu = 10, | |
| 291 insn_interrupt = 13, | |
| 292 insn_system = 14, | |
| 293 insn_other = 15 | |
| 294 }; | |
| 295 | |
| 296 enum x86_insn_type { | |
| 297 insn_invalid = 0, /* invalid instruction */ | |
| 298 /* insn_controlflow */ | |
| 299 insn_jmp = 0x1001, | |
| 300 insn_jcc = 0x1002, | |
| 301 insn_call = 0x1003, | |
| 302 insn_callcc = 0x1004, | |
| 303 insn_return = 0x1005, | |
| 304 /* insn_arithmetic */ | |
| 305 insn_add = 0x2001, | |
| 306 insn_sub = 0x2002, | |
| 307 insn_mul = 0x2003, | |
| 308 insn_div = 0x2004, | |
| 309 insn_inc = 0x2005, | |
| 310 insn_dec = 0x2006, | |
| 311 insn_shl = 0x2007, | |
| 312 insn_shr = 0x2008, | |
| 313 insn_rol = 0x2009, | |
| 314 insn_ror = 0x200A, | |
| 315 /* insn_logic */ | |
| 316 insn_and = 0x3001, | |
| 317 insn_or = 0x3002, | |
| 318 insn_xor = 0x3003, | |
| 319 insn_not = 0x3004, | |
| 320 insn_neg = 0x3005, | |
| 321 /* insn_stack */ | |
| 322 insn_push = 0x4001, | |
| 323 insn_pop = 0x4002, | |
| 324 insn_pushregs = 0x4003, | |
| 325 insn_popregs = 0x4004, | |
| 326 insn_pushflags = 0x4005, | |
| 327 insn_popflags = 0x4006, | |
| 328 insn_enter = 0x4007, | |
| 329 insn_leave = 0x4008, | |
| 330 /* insn_comparison */ | |
| 331 insn_test = 0x5001, | |
| 332 insn_cmp = 0x5002, | |
| 333 /* insn_move */ | |
| 334 insn_mov = 0x6001, /* move */ | |
| 335 insn_movcc = 0x6002, /* conditional move */ | |
| 336 insn_xchg = 0x6003, /* exchange */ | |
| 337 insn_xchgcc = 0x6004, /* conditional exchange */ | |
| 338 /* insn_string */ | |
| 339 insn_strcmp = 0x7001, | |
| 340 insn_strload = 0x7002, | |
| 341 insn_strmov = 0x7003, | |
| 342 insn_strstore = 0x7004, | |
| 343 insn_translate = 0x7005, /* xlat */ | |
| 344 /* insn_bit_manip */ | |
| 345 insn_bittest = 0x8001, | |
| 346 insn_bitset = 0x8002, | |
| 347 insn_bitclear = 0x8003, | |
| 348 /* insn_flag_manip */ | |
| 349 insn_clear_carry = 0x9001, | |
| 350 insn_clear_zero = 0x9002, | |
| 351 insn_clear_oflow = 0x9003, | |
| 352 insn_clear_dir = 0x9004, | |
| 353 insn_clear_sign = 0x9005, | |
| 354 insn_clear_parity = 0x9006, | |
| 355 insn_set_carry = 0x9007, | |
| 356 insn_set_zero = 0x9008, | |
| 357 insn_set_oflow = 0x9009, | |
| 358 insn_set_dir = 0x900A, | |
| 359 insn_set_sign = 0x900B, | |
| 360 insn_set_parity = 0x900C, | |
| 361 insn_tog_carry = 0x9010, | |
| 362 insn_tog_zero = 0x9020, | |
| 363 insn_tog_oflow = 0x9030, | |
| 364 insn_tog_dir = 0x9040, | |
| 365 insn_tog_sign = 0x9050, | |
| 366 insn_tog_parity = 0x9060, | |
| 367 /* insn_fpu */ | |
| 368 insn_fmov = 0xA001, | |
| 369 insn_fmovcc = 0xA002, | |
| 370 insn_fneg = 0xA003, | |
| 371 insn_fabs = 0xA004, | |
| 372 insn_fadd = 0xA005, | |
| 373 insn_fsub = 0xA006, | |
| 374 insn_fmul = 0xA007, | |
| 375 insn_fdiv = 0xA008, | |
| 376 insn_fsqrt = 0xA009, | |
| 377 insn_fcmp = 0xA00A, | |
| 378 insn_fcos = 0xA00C, | |
| 379 insn_fldpi = 0xA00D, | |
| 380 insn_fldz = 0xA00E, | |
| 381 insn_ftan = 0xA00F, | |
| 382 insn_fsine = 0xA010, | |
| 383 insn_fsys = 0xA020, | |
| 384 /* insn_interrupt */ | |
| 385 insn_int = 0xD001, | |
| 386 insn_intcc = 0xD002, /* not present in x86 ISA */ | |
| 387 insn_iret = 0xD003, | |
| 388 insn_bound = 0xD004, | |
| 389 insn_debug = 0xD005, | |
| 390 insn_trace = 0xD006, | |
| 391 insn_invalid_op = 0xD007, | |
| 392 insn_oflow = 0xD008, | |
| 393 /* insn_system */ | |
| 394 insn_halt = 0xE001, | |
| 395 insn_in = 0xE002, /* input from port/bus */ | |
| 396 insn_out = 0xE003, /* output to port/bus */ | |
| 397 insn_cpuid = 0xE004, | |
| 398 /* insn_other */ | |
| 399 insn_nop = 0xF001, | |
| 400 insn_bcdconv = 0xF002, /* convert to or from BCD */ | |
| 401 insn_szconv = 0xF003 /* change size of operand */ | |
| 402 }; | |
| 403 | |
| 404 /* These flags specify special characteristics of the instruction, such as | |
| 405 * whether the inatruction is privileged or whether it serializes the | |
| 406 * pipeline. | |
| 407 * NOTE : These may not be accurate for all instructions; updates to the | |
| 408 * opcode tables have not been completed. */ | |
| 409 enum x86_insn_note { | |
| 410 insn_note_ring0 = 1, /* Only available in ring 0 */ | |
| 411 insn_note_smm = 2, /* "" in System Management Mode */ | |
| 412 insn_note_serial = 4, /* Serializing instruction */ | |
| 413 insn_note_nonswap = 8, /* Does not swap arguments in att-style
formatting */ | |
| 414 insn_note_nosuffix = 16, /* Does not have size suffix in att-styl
e formatting */ | |
| 415 }; | |
| 416 | |
| 417 /* This specifies what effects the instruction has on the %eflags register */ | |
| 418 enum x86_flag_status { | |
| 419 insn_carry_set = 0x1, /* CF */ | |
| 420 insn_zero_set = 0x2, /* ZF */ | |
| 421 insn_oflow_set = 0x4, /* OF */ | |
| 422 insn_dir_set = 0x8, /* DF */ | |
| 423 insn_sign_set = 0x10, /* SF */ | |
| 424 insn_parity_set = 0x20, /* PF */ | |
| 425 insn_carry_or_zero_set = 0x40, | |
| 426 insn_zero_set_or_sign_ne_oflow = 0x80, | |
| 427 insn_carry_clear = 0x100, | |
| 428 insn_zero_clear = 0x200, | |
| 429 insn_oflow_clear = 0x400, | |
| 430 insn_dir_clear = 0x800, | |
| 431 insn_sign_clear = 0x1000, | |
| 432 insn_parity_clear = 0x2000, | |
| 433 insn_sign_eq_oflow = 0x4000, | |
| 434 insn_sign_ne_oflow = 0x8000 | |
| 435 }; | |
| 436 | |
| 437 /* The CPU model in which the insturction first appeared; this can be used | |
| 438 * to mask out instructions appearing in earlier or later models or to | |
| 439 * check the portability of a binary. | |
| 440 * NOTE : These may not be accurate for all instructions; updates to the | |
| 441 * opcode tables have not been completed. */ | |
| 442 enum x86_insn_cpu { | |
| 443 cpu_8086 = 1, /* Intel */ | |
| 444 cpu_80286 = 2, | |
| 445 cpu_80386 = 3, | |
| 446 cpu_80387 = 4, | |
| 447 cpu_80486 = 5, | |
| 448 cpu_pentium = 6, | |
| 449 cpu_pentiumpro = 7, | |
| 450 cpu_pentium2 = 8, | |
| 451 cpu_pentium3 = 9, | |
| 452 cpu_pentium4 = 10, | |
| 453 cpu_k6 = 16, /* AMD */ | |
| 454 cpu_k7 = 32, | |
| 455 cpu_athlon = 48 | |
| 456 }; | |
| 457 | |
| 458 /* CPU ISA subsets: These are derived from the Instruction Groups in | |
| 459 * Intel Vol 1 Chapter 5; they represent subsets of the IA32 ISA but | |
| 460 * do not reflect the 'type' of the instruction in the same way that | |
| 461 * x86_insn_group does. In short, these are AMD/Intel's somewhat useless | |
| 462 * designations. | |
| 463 * NOTE : These may not be accurate for all instructions; updates to the | |
| 464 * opcode tables have not been completed. */ | |
| 465 enum x86_insn_isa { | |
| 466 isa_gp = 1, /* general purpose */ | |
| 467 isa_fp = 2, /* floating point */ | |
| 468 isa_fpumgt = 3, /* FPU/SIMD management */ | |
| 469 isa_mmx = 4, /* Intel MMX */ | |
| 470 isa_sse1 = 5, /* Intel SSE SIMD */ | |
| 471 isa_sse2 = 6, /* Intel SSE2 SIMD */ | |
| 472 isa_sse3 = 7, /* Intel SSE3 SIMD */ | |
| 473 isa_3dnow = 8, /* AMD 3DNow! SIMD */ | |
| 474 isa_sys = 9 /* system instructions */ | |
| 475 }; | |
| 476 | |
| 477 enum x86_insn_prefix { | |
| 478 insn_no_prefix = 0, | |
| 479 insn_rep_zero = 1, /* REPZ and REPE */ | |
| 480 insn_rep_notzero = 2, /* REPNZ and REPNZ */ | |
| 481 insn_lock = 4 /* LOCK: */ | |
| 482 }; | |
| 483 | |
| 484 /* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */ | |
| 485 /* x86_insn_t : an X86 instruction */ | |
| 486 typedef struct { | |
| 487 /* information about the instruction */ | |
| 488 uint32_t addr; /* load address */ | |
| 489 uint32_t offset; /* offset into file/buffer */ | |
| 490 enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */ | |
| 491 enum x86_insn_type type; /* type, e.g. INS_BRANCH */ | |
| 492 enum x86_insn_note note; /* note, e.g. RING0 */ | |
| 493 unsigned char bytes[MAX_INSN_SIZE]; | |
| 494 unsigned char size; /* size of insn in bytes */ | |
| 495 /* 16/32-bit mode settings */ | |
| 496 unsigned char addr_size; /* default address size : 2 or 4 */ | |
| 497 unsigned char op_size; /* default operand size : 2 or 4 */ | |
| 498 /* CPU/instruction set */ | |
| 499 enum x86_insn_cpu cpu; | |
| 500 enum x86_insn_isa isa; | |
| 501 /* flags */ | |
| 502 enum x86_flag_status flags_set; /* flags set or tested by insn */ | |
| 503 enum x86_flag_status flags_tested; | |
| 504 /* stack */ | |
| 505 unsigned char stack_mod; /* 0 or 1 : is the stack modified? */ | |
| 506 int32_t stack_mod_val; /* val stack is modified by if known */ | |
| 507 | |
| 508 /* the instruction proper */ | |
| 509 enum x86_insn_prefix prefix; /* prefixes ORed together */ | |
| 510 char prefix_string[MAX_PREFIX_STR]; /* prefixes [might be truncated] */ | |
| 511 char mnemonic[MAX_MNEM_STR]; | |
| 512 x86_oplist_t *operands; /* list of explicit/implicit operands */ | |
| 513 size_t operand_count; /* total number of operands */ | |
| 514 size_t explicit_count; /* number of explicit operands */ | |
| 515 /* convenience fields for user */ | |
| 516 void *block; /* code block containing this insn */ | |
| 517 void *function; /* function containing this insn */ | |
| 518 int tag; /* tag the insn as seen/processed */ | |
| 519 } x86_insn_t; | |
| 520 | |
| 521 | |
| 522 /* returns 0 if an instruction is invalid, 1 if valid */ | |
| 523 int x86_insn_is_valid( x86_insn_t *insn ); | |
| 524 | |
| 525 /* DISASSEMBLY ROUTINES | |
| 526 * Canonical order of arguments is | |
| 527 * (buf, buf_len, buf_rva, offset, len, insn, func, arg, resolve_func) | |
| 528 * ...but of course all of these are not used at the same time. | |
| 529 */ | |
| 530 | |
| 531 | |
| 532 /* Function prototype for caller-supplied callback routine | |
| 533 * These callbacks are intended to process 'insn' further, e.g. by | |
| 534 * adding it to a linked list, database, etc */ | |
| 535 typedef void (*DISASM_CALLBACK)( x86_insn_t *insn, void * arg ); | |
| 536 | |
| 537 /* Function prototype for caller-supplied address resolver. | |
| 538 * This routine is used to determine the rva to disassemble next, given | |
| 539 * the 'dest' operand of a jump/call. This allows the caller to resolve | |
| 540 * jump/call targets stored in a register or on the stack, and also allows | |
| 541 * the caller to prevent endless loops by checking if an address has | |
| 542 * already been disassembled. If an address cannot be resolved from the | |
| 543 * operand, or if the address has already been disassembled, this routine | |
| 544 * should return -1; in all other cases the RVA to be disassembled next | |
| 545 * should be returned. */ | |
| 546 typedef int32_t (*DISASM_RESOLVER)( x86_op_t *op, x86_insn_t * current_insn, | |
| 547 void *arg ); | |
| 548 | |
| 549 | |
| 550 /* x86_disasm: Disassemble a single instruction from a buffer of bytes. | |
| 551 * Returns size of instruction in bytes. | |
| 552 * Caller is responsible for calling x86_oplist_free() on | |
| 553 * a reused "insn" to avoid leaking memory when calling this | |
| 554 * function repeatedly. | |
| 555 * buf : Buffer of bytes to disassemble | |
| 556 * buf_len : Length of the buffer | |
| 557 * buf_rva : Load address of the start of the buffer | |
| 558 * offset : Offset in buffer to disassemble | |
| 559 * insn : Structure to fill with disassembled instruction | |
| 560 */ | |
| 561 unsigned int x86_disasm( unsigned char *buf, unsigned int buf_len, | |
| 562 uint32_t buf_rva, unsigned int offset, | |
| 563 x86_insn_t * insn ); | |
| 564 | |
| 565 /* x86_disasm_range: Sequential disassembly of a range of bytes in a buffer, | |
| 566 * invoking a callback function each time an instruction | |
| 567 * is successfully disassembled. The 'range' refers to the | |
| 568 * bytes between 'offset' and 'offset + len' in the buffer; | |
| 569 * 'len' is assumed to be less than the length of the buffer. | |
| 570 * Returns number of instructions processed. | |
| 571 * buf : Buffer of bytes to disassemble (e.g. .text section) | |
| 572 * buf_rva : Load address of buffer (e.g. ELF Virtual Address) | |
| 573 * offset : Offset in buffer to start disassembly at | |
| 574 * len : Number of bytes to disassemble | |
| 575 * func : Callback function to invoke (may be NULL) | |
| 576 * arg : Arbitrary data to pass to callback (may be NULL) | |
| 577 */ | |
| 578 unsigned int x86_disasm_range( unsigned char *buf, uint32_t buf_rva, | |
| 579 unsigned int offset, unsigned int len, | |
| 580 DISASM_CALLBACK func, void *arg ); | |
| 581 | |
| 582 /* x86_disasm_forward: Flow-of-execution disassembly of the bytes in a buffer, | |
| 583 * invoking a callback function each time an instruction | |
| 584 * is successfully disassembled. | |
| 585 * buf : Buffer to disassemble (e.g. .text section) | |
| 586 * buf_len : Number of bytes in buffer | |
| 587 * buf_rva : Load address of buffer (e.g. ELF Virtual Address) | |
| 588 * offset : Offset in buffer to start disassembly at (e.g. entry point) | |
| 589 * func : Callback function to invoke (may be NULL) | |
| 590 * arg : Arbitrary data to pass to callback (may be NULL) | |
| 591 * resolver: Caller-supplied address resolver. If no resolver is | |
| 592 * supplied, a default internal one is used -- however the | |
| 593 * internal resolver does NOT catch loops and could end up | |
| 594 * disassembling forever.. | |
| 595 * r_arg : Arbitrary data to pass to resolver (may be NULL) | |
| 596 */ | |
| 597 unsigned int x86_disasm_forward( unsigned char *buf, unsigned int buf_len, | |
| 598 uint32_t buf_rva, unsigned int offset, | |
| 599 DISASM_CALLBACK func, void *arg, | |
| 600 DISASM_RESOLVER resolver, void *r_arg ); | |
| 601 | |
| 602 /* Instruction operands: these are stored as a list of explicit and | |
| 603 * implicit operands. It is recommended that the 'foreach' routines | |
| 604 * be used to when examining operands for purposes of data flow analysis */ | |
| 605 | |
| 606 /* Operand FOREACH callback: 'arg' is an abritrary parameter passed to the | |
| 607 * foreach routine, 'insn' is the x86_insn_t whose operands are being | |
| 608 * iterated over, and 'op' is the current x86_op_t */ | |
| 609 typedef void (*x86_operand_fn)(x86_op_t *op, x86_insn_t *insn, void *arg); | |
| 610 | |
| 611 /* FOREACH types: these are used to limit the foreach results to | |
| 612 * operands which match a certain "type" (implicit or explicit) | |
| 613 * or which are accessed in certain ways (e.g. read or write). Note | |
| 614 * that this operates on the operand list of single instruction, so | |
| 615 * specifying the 'real' operand type (register, memory, etc) is not | |
| 616 * useful. Note also that by definition Execute Access implies Read | |
| 617 * Access and implies Not Write Access. | |
| 618 * The "type" (implicit or explicit) and the access method can | |
| 619 * be ORed together, e.g. op_wo | op_explicit */ | |
| 620 enum x86_op_foreach_type { | |
| 621 op_any = 0, /* ALL operands (explicit, implicit, rwx) */ | |
| 622 op_dest = 1, /* operands with Write access */ | |
| 623 op_src = 2, /* operands with Read access */ | |
| 624 op_ro = 3, /* operands with Read but not Write access */ | |
| 625 op_wo = 4, /* operands with Write but not Read access */ | |
| 626 op_xo = 5, /* operands with Execute access */ | |
| 627 op_rw = 6, /* operands with Read AND Write access */ | |
| 628 op_implicit = 0x10, /* operands that are implied by the opcode */ | |
| 629 op_explicit = 0x20 /* operands that are not side-effects */ | |
| 630 }; | |
| 631 | |
| 632 | |
| 633 /* free the operand list associated with an instruction -- useful for | |
| 634 * preventing memory leaks when free()ing an x86_insn_t */ | |
| 635 void x86_oplist_free( x86_insn_t *insn ); | |
| 636 | |
| 637 /* Operand foreach: invokes 'func' with 'insn' and 'arg' as arguments. The | |
| 638 * 'type' parameter is used to select only operands matching specific | |
| 639 * criteria. */ | |
| 640 int x86_operand_foreach( x86_insn_t *insn, x86_operand_fn func, void *arg, | |
| 641 enum x86_op_foreach_type type); | |
| 642 | |
| 643 /* convenience routine: returns count of operands matching 'type' */ | |
| 644 size_t x86_operand_count( x86_insn_t *insn, enum x86_op_foreach_type type ); | |
| 645 | |
| 646 /* accessor functions for the operands */ | |
| 647 x86_op_t * x86_operand_1st( x86_insn_t *insn ); | |
| 648 x86_op_t * x86_operand_2nd( x86_insn_t *insn ); | |
| 649 x86_op_t * x86_operand_3rd( x86_insn_t *insn ); | |
| 650 | |
| 651 /* these allow libdisasm 2.0 accessor functions to still be used */ | |
| 652 #define x86_get_dest_operand( insn ) x86_operand_1st( insn ) | |
| 653 #define x86_get_src_operand( insn ) x86_operand_2nd( insn ) | |
| 654 #define x86_get_imm_operand( insn ) x86_operand_3rd( insn ) | |
| 655 | |
| 656 /* get size of operand data in bytes */ | |
| 657 unsigned int x86_operand_size( x86_op_t *op ); | |
| 658 | |
| 659 /* Operand Convenience Routines: the following three routines are common | |
| 660 * operations on operands, intended to ease the burden of the programmer. */ | |
| 661 | |
| 662 /* Get Address: return the value of an offset operand, or the offset of | |
| 663 * a segment:offset absolute address */ | |
| 664 uint32_t x86_get_address( x86_insn_t *insn ); | |
| 665 | |
| 666 /* Get Relative Offset: return as a sign-extended int32_t the near or far | |
| 667 * relative offset operand, or 0 if there is none. There can be only one | |
| 668 * relaive offset operand in an instruction. */ | |
| 669 int32_t x86_get_rel_offset( x86_insn_t *insn ); | |
| 670 | |
| 671 /* Get Branch Target: return the x86_op_t containing the target of | |
| 672 * a jump or call operand, or NULL if there is no branch target. | |
| 673 * Internally, a 'branch target' is defined as any operand with | |
| 674 * Execute Access set. There can be only one branch target per instruction. */ | |
| 675 x86_op_t * x86_get_branch_target( x86_insn_t *insn ); | |
| 676 | |
| 677 /* Get Immediate: return the x86_op_t containing the immediate operand | |
| 678 * for this instruction, or NULL if there is no immediate operand. There | |
| 679 * can be only one immediate operand per instruction */ | |
| 680 x86_op_t * x86_get_imm( x86_insn_t *insn ); | |
| 681 | |
| 682 /* Get Raw Immediate Data: returns a pointer to the immediate data encoded | |
| 683 * in the instruction. This is useful for large data types [>32 bits] currently | |
| 684 * not supported by libdisasm, or for determining if the disassembler | |
| 685 * screwed up the conversion of the immediate data. Note that 'imm' in this | |
| 686 * context refers to immediate data encoded at the end of an instruction as | |
| 687 * detailed in the Intel Manual Vol II Chapter 2; it does not refer to the | |
| 688 * 'op_imm' operand (the third operand in instructions like 'mul' */ | |
| 689 unsigned char * x86_get_raw_imm( x86_insn_t *insn ); | |
| 690 | |
| 691 | |
| 692 /* More accessor fuctions, this time for user-defined info... */ | |
| 693 /* set the address (usually RVA) of the insn */ | |
| 694 void x86_set_insn_addr( x86_insn_t *insn, uint32_t addr ); | |
| 695 | |
| 696 /* set the offset (usually offset into file) of the insn */ | |
| 697 void x86_set_insn_offset( x86_insn_t *insn, unsigned int offset ); | |
| 698 | |
| 699 /* set a pointer to the function owning the instruction. The | |
| 700 * type of 'func' is user-defined; libdisasm does not use the func field. */ | |
| 701 void x86_set_insn_function( x86_insn_t *insn, void * func ); | |
| 702 | |
| 703 /* set a pointer to the block of code owning the instruction. The | |
| 704 * type of 'block' is user-defined; libdisasm does not use the block field. */ | |
| 705 void x86_set_insn_block( x86_insn_t *insn, void * block ); | |
| 706 | |
| 707 /* instruction tagging: these routines allow the programmer to mark | |
| 708 * instructions as "seen" in a DFS, for example. libdisasm does not use | |
| 709 * the tag field.*/ | |
| 710 /* set insn->tag to 1 */ | |
| 711 void x86_tag_insn( x86_insn_t *insn ); | |
| 712 /* set insn->tag to 0 */ | |
| 713 void x86_untag_insn( x86_insn_t *insn ); | |
| 714 /* return insn->tag */ | |
| 715 int x86_insn_is_tagged( x86_insn_t *insn ); | |
| 716 | |
| 717 | |
| 718 /* Disassembly formats: | |
| 719 * AT&T is standard AS/GAS-style: "mnemonic\tsrc, dest, imm" | |
| 720 * Intel is standard MASM/NASM/TASM: "mnemonic\tdest,src, imm" | |
| 721 * Native is tab-delimited: "RVA\tbytes\tmnemonic\tdest\tsrc\timm" | |
| 722 * XML is your typical <insn> ... </insn> | |
| 723 * Raw is addr|offset|size|bytes|prefix... see libdisasm_formats.7 | |
| 724 */ | |
| 725 enum x86_asm_format { | |
| 726 unknown_syntax = 0, /* never use! */ | |
| 727 native_syntax, /* header: 35 bytes */ | |
| 728 intel_syntax, /* header: 23 bytes */ | |
| 729 att_syntax, /* header: 23 bytes */ | |
| 730 xml_syntax, /* header: 679 bytes */ | |
| 731 raw_syntax /* header: 172 bytes */ | |
| 732 }; | |
| 733 | |
| 734 /* format (sprintf) an operand into 'buf' using specified syntax */ | |
| 735 int x86_format_operand(x86_op_t *op, char *buf, int len, | |
| 736 enum x86_asm_format format); | |
| 737 | |
| 738 /* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */ | |
| 739 int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len, | |
| 740 enum x86_asm_format format); | |
| 741 | |
| 742 /* format (sprintf) an instruction into 'buf' using specified syntax; | |
| 743 * this includes formatting all operands */ | |
| 744 int x86_format_insn(x86_insn_t *insn, char *buf, int len, enum x86_asm_format); | |
| 745 | |
| 746 /* fill 'buf' with a description of the format's syntax */ | |
| 747 int x86_format_header( char *buf, int len, enum x86_asm_format format); | |
| 748 | |
| 749 /* Endianness of an x86 CPU : 0 is big, 1 is little; always returns 1 */ | |
| 750 unsigned int x86_endian(void); | |
| 751 | |
| 752 /* Default address and operand size in bytes */ | |
| 753 unsigned int x86_addr_size(void); | |
| 754 unsigned int x86_op_size(void); | |
| 755 | |
| 756 /* Size of a machine word in bytes */ | |
| 757 unsigned int x86_word_size(void); | |
| 758 | |
| 759 /* maximum size of a code instruction */ | |
| 760 #define x86_max_inst_size(x) x86_max_insn_size(x) | |
| 761 unsigned int x86_max_insn_size(void); | |
| 762 | |
| 763 /* register IDs of Stack, Frame, Instruction pointer and Flags register */ | |
| 764 unsigned int x86_sp_reg(void); | |
| 765 unsigned int x86_fp_reg(void); | |
| 766 unsigned int x86_ip_reg(void); | |
| 767 unsigned int x86_flag_reg(void); | |
| 768 | |
| 769 /* fill 'reg' struct with details of register 'id' */ | |
| 770 void x86_reg_from_id( unsigned int id, x86_reg_t * reg ); | |
| 771 | |
| 772 /* convenience macro demonstrating how to get an aliased register; proto is | |
| 773 * void x86_get_aliased_reg( x86_reg_t *alias_reg, x86_reg_t *output_reg ) | |
| 774 * where 'alias_reg' is a reg operand and 'output_reg' is filled with the | |
| 775 * register that the operand is an alias for */ | |
| 776 #define x86_get_aliased_reg( alias_reg, output_reg ) \ | |
| 777 x86_reg_from_id( alias_reg->alias, output_reg ) | |
| 778 | |
| 779 | |
| 780 /* ================================== Invariant Instruction Representation */ | |
| 781 /* Invariant instructions are used for generating binary signatures; | |
| 782 * the instruction is modified so that all variant bytes in an instruction | |
| 783 * are replaced with a wildcard byte. | |
| 784 * | |
| 785 * A 'variant byte' is one that is expected to be modified by either the | |
| 786 * static or the dynamic linker: for example, an address encoded in an | |
| 787 * instruction. | |
| 788 * | |
| 789 * By comparing the invariant representation of one instruction [or of a | |
| 790 * sequence of instructions] with the invariant representation of another, | |
| 791 * one determine whether the two invariant representations are from the same | |
| 792 * relocatable object [.o] file. Thus one can use binary signatures [which | |
| 793 * are just sequences of invariant instruction representations] to look for | |
| 794 * library routines which have been statically-linked into a binary. | |
| 795 * | |
| 796 * The invariant routines are faster and smaller than the disassembly | |
| 797 * routines; they can be used to determine the size of an instruction | |
| 798 * without all of the overhead of a full instruction disassembly. | |
| 799 */ | |
| 800 | |
| 801 /* This byte is used to replace variant bytes */ | |
| 802 #define X86_WILDCARD_BYTE 0xF4 | |
| 803 | |
| 804 typedef struct { | |
| 805 enum x86_op_type type; /* operand type */ | |
| 806 enum x86_op_datatype datatype; /* operand size */ | |
| 807 enum x86_op_access access; /* operand access [RWX] */ | |
| 808 enum x86_op_flags flags; /* misc flags */ | |
| 809 } x86_invariant_op_t; | |
| 810 | |
| 811 typedef struct { | |
| 812 unsigned char bytes[64]; /* invariant representation */ | |
| 813 unsigned int size; /* number of bytes in insn */ | |
| 814 enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */ | |
| 815 enum x86_insn_type type; /* type, e.g. INS_BRANCH */ | |
| 816 x86_invariant_op_t operands[3]; /* operands: dest, src, imm */ | |
| 817 } x86_invariant_t; | |
| 818 | |
| 819 | |
| 820 /* return a version of the instruction with the variant bytes masked out */ | |
| 821 size_t x86_invariant_disasm( unsigned char *buf, int buf_len, | |
| 822 x86_invariant_t *inv ); | |
| 823 /* return the size in bytes of the intruction pointed to by 'buf'; | |
| 824 * this used x86_invariant_disasm since it faster than x86_disasm */ | |
| 825 size_t x86_size_disasm( unsigned char *buf, unsigned int buf_len ); | |
| 826 | |
| 827 #ifdef __cplusplus | |
| 828 } | |
| 829 #endif | |
| 830 | |
| 831 | |
| 832 #endif | |
| OLD | NEW |