OLD | NEW |
| (Empty) |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "x86_decode.h" | |
6 | |
7 namespace playground { | |
8 | |
9 #if defined(__x86_64__) || defined(__i386__) | |
10 unsigned short next_inst(const char **ip, bool is64bit, bool *has_prefix, | |
11 char **rex_ptr, char **mod_rm_ptr, char **sib_ptr, | |
12 bool *is_group) { | |
13 enum { | |
14 BYTE_OP = (1<<1), // 0x02 | |
15 IMM = (1<<2), // 0x04 | |
16 IMM_BYTE = (2<<2), // 0x08 | |
17 MEM_ABS = (3<<2), // 0x0C | |
18 MODE_MASK = (7<<2), // 0x1C | |
19 MOD_RM = (1<<5), // 0x20 | |
20 STACK = (1<<6), // 0x40 | |
21 GROUP = (1<<7), // 0x80 | |
22 GROUP_MASK = 0x7F, | |
23 }; | |
24 | |
25 static unsigned char opcode_types[512] = { | |
26 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x00 - 0x07 | |
27 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x00, // 0x08 - 0x0F | |
28 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x10 - 0x17 | |
29 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x01, 0x01, // 0x18 - 0x1F | |
30 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x20 - 0x27 | |
31 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x28 - 0x2F | |
32 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x30 - 0x37 | |
33 0x23, 0x21, 0x23, 0x21, 0x09, 0x05, 0x00, 0x01, // 0x38 - 0x3F | |
34 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x40 - 0x47 | |
35 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x48 - 0x4F | |
36 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x50 - 0x57 | |
37 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, // 0x58 - 0x5F | |
38 0x01, 0x01, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0x60 - 0x67 | |
39 0x45, 0x25, 0x49, 0x29, 0x03, 0x01, 0x03, 0x01, // 0x68 - 0x6F | |
40 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x70 - 0x77 | |
41 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0x78 - 0x7F | |
42 0x27, 0x25, 0x27, 0x29, 0x23, 0x21, 0x23, 0x21, // 0x80 - 0x87 | |
43 0x23, 0x21, 0x23, 0x21, 0x21, 0x21, 0x21, 0x80, // 0x88 - 0x8F | |
44 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0x90 - 0x97 | |
45 0x01, 0x01, 0x05, 0x01, 0x41, 0x41, 0x01, 0x01, // 0x98 - 0x9F | |
46 0x0F, 0x0D, 0x0F, 0x0D, 0x03, 0x01, 0x03, 0x01, // 0xA0 - 0xA7 | |
47 0x09, 0x05, 0x03, 0x01, 0x03, 0x01, 0x03, 0x01, // 0xA8 - 0xAF | |
48 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, // 0xB0 - 0xB7 | |
49 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0xB8 - 0xBF | |
50 0x27, 0x29, 0x01, 0x01, 0x21, 0x21, 0x27, 0x25, // 0xC0 - 0xC7 | |
51 0x01, 0x01, 0x01, 0x01, 0x01, 0x09, 0x01, 0x01, // 0xC8 - 0xCF | |
52 0x23, 0x21, 0x23, 0x21, 0x09, 0x09, 0x01, 0x01, // 0xD0 - 0xD7 | |
53 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xD8 - 0xDF | |
54 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, // 0xE0 - 0xE7 | |
55 0x05, 0x05, 0x05, 0x09, 0x03, 0x01, 0x03, 0x01, // 0xE8 - 0xEF | |
56 0x00, 0x01, 0x00, 0x00, 0x01, 0x01, 0x88, 0x90, // 0xF0 - 0xF7 | |
57 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x98, 0xA0, // 0xF8 - 0xFF | |
58 0x00, 0xA8, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, // 0xF00 - 0xF07 | |
59 0x01, 0x01, 0x00, 0x01, 0x00, 0x21, 0x01, 0x00, // 0xF08 - 0xF0F | |
60 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF10 - 0xF17 | |
61 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF18 - 0xF1F | |
62 0x21, 0x21, 0x21, 0x21, 0x00, 0x00, 0x00, 0x00, // 0xF20 - 0xF27 | |
63 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF28 - 0xF2F | |
64 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, // 0xF30 - 0xF37 | |
65 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF38 - 0xF3F | |
66 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF40 - 0xF47 | |
67 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF48 - 0xF4F | |
68 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF50 - 0xF57 | |
69 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF58 - 0xF5F | |
70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF60 - 0xF67 | |
71 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xF68 - 0xF6F | |
72 0x21, 0x00, 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, // 0xF70 - 0xF77 | |
73 0x21, 0x21, 0x00, 0x00, 0x21, 0x21, 0x21, 0x21, // 0xF78 - 0xF7F | |
74 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF80 - 0xF87 | |
75 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xF88 - 0xF8F | |
76 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF90 - 0xF97 | |
77 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xF98 - 0xF9F | |
78 0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x00, 0x00, // 0xFA0 - 0xFA7 | |
79 0x01, 0x01, 0x01, 0x21, 0x29, 0x21, 0x21, 0x21, // 0xFA8 - 0xFAF | |
80 0x23, 0x21, 0x00, 0x21, 0x00, 0x00, 0x23, 0x21, // 0xFB0 - 0xFB7 | |
81 0x21, 0x00, 0x29, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFB8 - 0xFBF | |
82 0x21, 0x21, 0x00, 0x21, 0x00, 0x00, 0x00, 0x21, // 0xFC0 - 0xFC7 | |
83 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, // 0xFC8 - 0xFCF | |
84 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD0 - 0xFD7 | |
85 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFD8 - 0xFDF | |
86 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE0 - 0xFE7 | |
87 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // 0xFE8 - 0xFEF | |
88 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF0 - 0xFF7 | |
89 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0xFF8 - 0xFFF | |
90 }; | |
91 | |
92 static unsigned char group_table[56] = { | |
93 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 1A | |
94 0x27, 0x27, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, // Group 3 (Byte) | |
95 0x25, 0x25, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // Group 3 | |
96 0x23, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Group 4 | |
97 0x21, 0x21, 0x61, 0x21, 0x61, 0x21, 0x61, 0x00, // Group 5 | |
98 0x00, 0x00, 0x21, 0x21, 0x21, 0x00, 0x21, 0x23, // Group 7 | |
99 0x21, 0x00, 0x00, 0x21, 0x21, 0x00, 0x21, 0x00, // Group 7 (Alternate) | |
100 }; | |
101 | |
102 const unsigned char *insn_ptr = reinterpret_cast<const unsigned char *>(*ip); | |
103 int operand_width = 4; | |
104 int address_width = 4; | |
105 if (is64bit) { | |
106 address_width = 8; | |
107 } | |
108 unsigned char byte, rex = 0; | |
109 bool found_prefix = false; | |
110 if (rex_ptr) { | |
111 *rex_ptr = 0; | |
112 } | |
113 if (mod_rm_ptr) { | |
114 *mod_rm_ptr = 0; | |
115 } | |
116 if (sib_ptr) { | |
117 *sib_ptr = 0; | |
118 } | |
119 for (;; ++insn_ptr) { | |
120 switch (byte = *insn_ptr) { | |
121 case 0x66: // Operand width prefix | |
122 operand_width ^= 6; | |
123 break; | |
124 case 0x67: // Address width prefix | |
125 address_width ^= is64bit ? 12 : 6; | |
126 break; | |
127 case 0x26: // Segment selector prefixes | |
128 case 0x2e: | |
129 case 0x36: | |
130 case 0x3e: | |
131 case 0x64: | |
132 case 0x65: | |
133 case 0xF0: | |
134 case 0xF2: | |
135 case 0xF3: | |
136 break; | |
137 case 0x40: case 0x41: case 0x42: case 0x43: // 64 bit REX prefixes | |
138 case 0x44: case 0x45: case 0x46: case 0x47: | |
139 case 0x48: case 0x49: case 0x4A: case 0x4B: | |
140 case 0x4C: case 0x4D: case 0x4E: case 0x4F: | |
141 if (is64bit) { | |
142 if (rex_ptr) { | |
143 *rex_ptr = (char *)insn_ptr; | |
144 } | |
145 rex = byte; | |
146 found_prefix = true; | |
147 continue; | |
148 } | |
149 // fall through | |
150 default: | |
151 ++insn_ptr; | |
152 goto no_more_prefixes; | |
153 } | |
154 rex = 0; | |
155 found_prefix = true; | |
156 } | |
157 no_more_prefixes: | |
158 if (has_prefix) { | |
159 *has_prefix = found_prefix; | |
160 } | |
161 if (rex & REX_W) { | |
162 operand_width = 8; | |
163 } | |
164 unsigned char type; | |
165 unsigned short insn = byte; | |
166 unsigned int idx = 0; | |
167 if (byte == 0x0F) { | |
168 byte = *insn_ptr++; | |
169 insn = (insn << 8) | byte; | |
170 idx = 256; | |
171 } | |
172 type = opcode_types[idx + byte]; | |
173 bool found_mod_rm = false; | |
174 bool found_group = false; | |
175 bool found_sib = false; | |
176 unsigned char mod_rm = 0; | |
177 unsigned char sib = 0; | |
178 if (type & GROUP) { | |
179 found_mod_rm = true; | |
180 found_group = true; | |
181 mod_rm = *insn_ptr; | |
182 if (mod_rm_ptr) { | |
183 *mod_rm_ptr = (char *)insn_ptr; | |
184 } | |
185 unsigned char group = (type & GROUP_MASK) + ((mod_rm >> 3) & 0x7); | |
186 if ((type & GROUP_MASK) == 40 && (mod_rm >> 6) == 3) { | |
187 group += 8; | |
188 } | |
189 type = group_table[group]; | |
190 } | |
191 if (!type) { | |
192 // We know that we still don't decode some of the more obscure | |
193 // instructions, but for all practical purposes that doesn't matter. | |
194 // Compilers are unlikely to output them, and even if we encounter | |
195 // hand-coded assembly, we will soon synchronize to the instruction | |
196 // stream again. | |
197 // | |
198 // std::cerr << "Unsupported instruction at 0x" << std::hex << | |
199 // std::uppercase << reinterpret_cast<long>(*ip) << " [ "; | |
200 // for (const unsigned char *ptr = | |
201 // reinterpret_cast<const unsigned char *>(*ip); | |
202 // ptr < insn_ptr; ) { | |
203 // std::cerr << std::hex << std::uppercase << std::setw(2) << | |
204 // std::setfill('0') << (unsigned int)*ptr++ << ' '; | |
205 // } | |
206 // std::cerr << "]" << std::endl; | |
207 } else { | |
208 if (is64bit && (type & STACK)) { | |
209 operand_width = 8; | |
210 } | |
211 if (type & MOD_RM) { | |
212 found_mod_rm = true; | |
213 if (mod_rm_ptr) { | |
214 *mod_rm_ptr = (char *)insn_ptr; | |
215 } | |
216 mod_rm = *insn_ptr++; | |
217 int mod = (mod_rm >> 6) & 0x3; | |
218 int rm = 8*(rex & REX_B) + (mod_rm & 0x7); | |
219 if (mod != 3) { | |
220 if (address_width == 2) { | |
221 switch (mod) { | |
222 case 0: | |
223 if (rm != 6 /* SI */) { | |
224 break; | |
225 } | |
226 // fall through | |
227 case 2: | |
228 insn_ptr++; | |
229 // fall through | |
230 case 1: | |
231 insn_ptr++; | |
232 break; | |
233 } | |
234 } else { | |
235 if ((rm & 0x7) == 4) { | |
236 found_sib = true; | |
237 if (sib_ptr) { | |
238 *sib_ptr = (char *)insn_ptr; | |
239 } | |
240 sib = *insn_ptr++; | |
241 if (!mod && (sib & 0x7) == 5 /* BP */) { | |
242 insn_ptr += 4; | |
243 } | |
244 } | |
245 switch (mod) { | |
246 case 0: | |
247 if (rm != 5 /* BP */) { | |
248 break; | |
249 } | |
250 // fall through | |
251 case 2: | |
252 insn_ptr += 3; | |
253 // fall through | |
254 case 1: | |
255 insn_ptr++; | |
256 break; | |
257 } | |
258 } | |
259 } | |
260 } | |
261 switch (insn) { | |
262 case 0xC8: // ENTER | |
263 insn_ptr++; | |
264 // fall through | |
265 case 0x9A: // CALL (far) | |
266 case 0xC2: // RET (near) | |
267 case 0xCA: // LRET | |
268 case 0xEA: // JMP (far) | |
269 insn_ptr += 2; | |
270 break; | |
271 case 0xF80: case 0xF81: case 0xF82: case 0xF83: // Jcc (rel) | |
272 case 0xF84: case 0xF85: case 0xF86: case 0xF87: | |
273 case 0xF88: case 0xF89: case 0xF8A: case 0xF8B: | |
274 case 0xF8C: case 0xF8D: case 0xF8E: case 0xF8F: | |
275 insn_ptr += operand_width; | |
276 break; | |
277 } | |
278 switch (type & MODE_MASK) { | |
279 case IMM: | |
280 if (!(type & BYTE_OP)) { | |
281 switch (insn) { | |
282 case 0xB8: case 0xB9: case 0xBA: case 0xBB: | |
283 case 0xBC: case 0xBD: case 0xBE: case 0xBF: | |
284 // Allow MOV to/from 64bit addresses | |
285 insn_ptr += operand_width; | |
286 break; | |
287 default: | |
288 insn_ptr += (operand_width == 8) ? 4 : operand_width; | |
289 break; | |
290 } | |
291 break; | |
292 } | |
293 // fall through | |
294 case IMM_BYTE: | |
295 insn_ptr++; | |
296 break; | |
297 case MEM_ABS: | |
298 insn_ptr += address_width; | |
299 break; | |
300 } | |
301 } | |
302 if (is_group) { | |
303 *is_group = found_group; | |
304 } | |
305 *ip = reinterpret_cast<const char *>(insn_ptr); | |
306 return insn; | |
307 } | |
308 #endif | |
309 | |
310 } // namespace | |
OLD | NEW |