OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Implementation of MiniDisassembler. | |
6 | |
7 #ifdef _WIN64 | |
8 #error The code in this file should not be used on 64-bit Windows. | |
9 #endif | |
10 | |
11 #include "sandbox/win/src/sidestep/mini_disassembler.h" | |
12 | |
13 namespace sidestep { | |
14 | |
15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, | |
16 bool address_default_is_32_bits) | |
17 : operand_default_is_32_bits_(operand_default_is_32_bits), | |
18 address_default_is_32_bits_(address_default_is_32_bits) { | |
19 Initialize(); | |
20 } | |
21 | |
22 MiniDisassembler::MiniDisassembler() | |
23 : operand_default_is_32_bits_(true), | |
24 address_default_is_32_bits_(true) { | |
25 Initialize(); | |
26 } | |
27 | |
28 InstructionType MiniDisassembler::Disassemble( | |
29 unsigned char* start_byte, | |
30 unsigned int* instruction_bytes) { | |
31 // Clean up any state from previous invocations. | |
32 Initialize(); | |
33 | |
34 // Start by processing any prefixes. | |
35 unsigned char* current_byte = start_byte; | |
36 unsigned int size = 0; | |
37 InstructionType instruction_type = ProcessPrefixes(current_byte, &size); | |
38 | |
39 if (IT_UNKNOWN == instruction_type) | |
40 return instruction_type; | |
41 | |
42 current_byte += size; | |
43 size = 0; | |
44 | |
45 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ | |
46 // and address_is_32_bits_ flags are correctly set. | |
47 | |
48 instruction_type = ProcessOpcode(current_byte, 0, &size); | |
49 | |
50 // Check for error processing instruction | |
51 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { | |
52 return IT_UNKNOWN; | |
53 } | |
54 | |
55 current_byte += size; | |
56 | |
57 // Invariant: operand_bytes_ indicates the total size of operands | |
58 // specified by the opcode and/or ModR/M byte and/or SIB byte. | |
59 // pCurrentByte points to the first byte after the ModR/M byte, or after | |
60 // the SIB byte if it is present (i.e. the first byte of any operands | |
61 // encoded in the instruction). | |
62 | |
63 // We get the total length of any prefixes, the opcode, and the ModR/M and | |
64 // SIB bytes if present, by taking the difference of the original starting | |
65 // address and the current byte (which points to the first byte of the | |
66 // operands if present, or to the first byte of the next instruction if | |
67 // they are not). Adding the count of bytes in the operands encoded in | |
68 // the instruction gives us the full length of the instruction in bytes. | |
69 *instruction_bytes += operand_bytes_ + (current_byte - start_byte); | |
70 | |
71 // Return the instruction type, which was set by ProcessOpcode(). | |
72 return instruction_type_; | |
73 } | |
74 | |
75 void MiniDisassembler::Initialize() { | |
76 operand_is_32_bits_ = operand_default_is_32_bits_; | |
77 address_is_32_bits_ = address_default_is_32_bits_; | |
78 operand_bytes_ = 0; | |
79 have_modrm_ = false; | |
80 should_decode_modrm_ = false; | |
81 instruction_type_ = IT_UNKNOWN; | |
82 got_f2_prefix_ = false; | |
83 got_f3_prefix_ = false; | |
84 got_66_prefix_ = false; | |
85 } | |
86 | |
87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, | |
88 unsigned int* size) { | |
89 InstructionType instruction_type = IT_GENERIC; | |
90 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; | |
91 | |
92 switch (opcode.type_) { | |
93 case IT_PREFIX_ADDRESS: | |
94 address_is_32_bits_ = !address_default_is_32_bits_; | |
95 goto nochangeoperand; | |
96 case IT_PREFIX_OPERAND: | |
97 operand_is_32_bits_ = !operand_default_is_32_bits_; | |
98 nochangeoperand: | |
99 case IT_PREFIX: | |
100 | |
101 if (0xF2 == (*start_byte)) | |
102 got_f2_prefix_ = true; | |
103 else if (0xF3 == (*start_byte)) | |
104 got_f3_prefix_ = true; | |
105 else if (0x66 == (*start_byte)) | |
106 got_66_prefix_ = true; | |
107 | |
108 instruction_type = opcode.type_; | |
109 (*size)++; | |
110 // we got a prefix, so add one and check next byte | |
111 ProcessPrefixes(start_byte + 1, size); | |
112 default: | |
113 break; // not a prefix byte | |
114 } | |
115 | |
116 return instruction_type; | |
117 } | |
118 | |
119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, | |
120 unsigned int table_index, | |
121 unsigned int* size) { | |
122 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table | |
123 unsigned char current_byte = (*start_byte) >> table.shift_; | |
124 current_byte = current_byte & table.mask_; // Mask out the bits we will use | |
125 | |
126 // Check whether the byte we have is inside the table we have. | |
127 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { | |
128 instruction_type_ = IT_UNKNOWN; | |
129 return instruction_type_; | |
130 } | |
131 | |
132 const Opcode& opcode = table.table_[current_byte]; | |
133 if (IT_UNUSED == opcode.type_) { | |
134 // This instruction is not used by the IA-32 ISA, so we indicate | |
135 // this to the user. Probably means that we were pointed to | |
136 // a byte in memory that was not the start of an instruction. | |
137 instruction_type_ = IT_UNUSED; | |
138 return instruction_type_; | |
139 } else if (IT_REFERENCE == opcode.type_) { | |
140 // We are looking at an opcode that has more bytes (or is continued | |
141 // in the ModR/M byte). Recursively find the opcode definition in | |
142 // the table for the opcode's next byte. | |
143 (*size)++; | |
144 ProcessOpcode(start_byte + 1, opcode.table_index_, size); | |
145 return instruction_type_; | |
146 } | |
147 | |
148 const SpecificOpcode* specific_opcode = reinterpret_cast< | |
149 const SpecificOpcode*>(&opcode); | |
150 if (opcode.is_prefix_dependent_) { | |
151 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { | |
152 specific_opcode = &opcode.opcode_if_f2_prefix_; | |
153 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { | |
154 specific_opcode = &opcode.opcode_if_f3_prefix_; | |
155 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { | |
156 specific_opcode = &opcode.opcode_if_66_prefix_; | |
157 } | |
158 } | |
159 | |
160 // Inv: The opcode type is known. | |
161 instruction_type_ = specific_opcode->type_; | |
162 | |
163 // Let's process the operand types to see if we have any immediate | |
164 // operands, and/or a ModR/M byte. | |
165 | |
166 ProcessOperand(specific_opcode->flag_dest_); | |
167 ProcessOperand(specific_opcode->flag_source_); | |
168 ProcessOperand(specific_opcode->flag_aux_); | |
169 | |
170 // Inv: We have processed the opcode and incremented operand_bytes_ | |
171 // by the number of bytes of any operands specified by the opcode | |
172 // that are stored in the instruction (not registers etc.). Now | |
173 // we need to return the total number of bytes for the opcode and | |
174 // for the ModR/M or SIB bytes if they are present. | |
175 | |
176 if (table.mask_ != 0xff) { | |
177 if (have_modrm_) { | |
178 // we're looking at a ModR/M byte so we're not going to | |
179 // count that into the opcode size | |
180 ProcessModrm(start_byte, size); | |
181 return IT_GENERIC; | |
182 } else { | |
183 // need to count the ModR/M byte even if it's just being | |
184 // used for opcode extension | |
185 (*size)++; | |
186 return IT_GENERIC; | |
187 } | |
188 } else { | |
189 if (have_modrm_) { | |
190 // The ModR/M byte is the next byte. | |
191 (*size)++; | |
192 ProcessModrm(start_byte + 1, size); | |
193 return IT_GENERIC; | |
194 } else { | |
195 (*size)++; | |
196 return IT_GENERIC; | |
197 } | |
198 } | |
199 } | |
200 | |
201 bool MiniDisassembler::ProcessOperand(int flag_operand) { | |
202 bool succeeded = true; | |
203 if (AM_NOT_USED == flag_operand) | |
204 return succeeded; | |
205 | |
206 // Decide what to do based on the addressing mode. | |
207 switch (flag_operand & AM_MASK) { | |
208 // No ModR/M byte indicated by these addressing modes, and no | |
209 // additional (e.g. immediate) parameters. | |
210 case AM_A: // Direct address | |
211 case AM_F: // EFLAGS register | |
212 case AM_X: // Memory addressed by the DS:SI register pair | |
213 case AM_Y: // Memory addressed by the ES:DI register pair | |
214 case AM_IMPLICIT: // Parameter is implicit, occupies no space in | |
215 // instruction | |
216 break; | |
217 | |
218 // There is a ModR/M byte but it does not necessarily need | |
219 // to be decoded. | |
220 case AM_C: // reg field of ModR/M selects a control register | |
221 case AM_D: // reg field of ModR/M selects a debug register | |
222 case AM_G: // reg field of ModR/M selects a general register | |
223 case AM_P: // reg field of ModR/M selects an MMX register | |
224 case AM_R: // mod field of ModR/M may refer only to a general register | |
225 case AM_S: // reg field of ModR/M selects a segment register | |
226 case AM_T: // reg field of ModR/M selects a test register | |
227 case AM_V: // reg field of ModR/M selects a 128-bit XMM register | |
228 have_modrm_ = true; | |
229 break; | |
230 | |
231 // In these addressing modes, there is a ModR/M byte and it needs to be | |
232 // decoded. No other (e.g. immediate) params than indicated in ModR/M. | |
233 case AM_E: // Operand is either a general-purpose register or memory, | |
234 // specified by ModR/M byte | |
235 case AM_M: // ModR/M byte will refer only to memory | |
236 case AM_Q: // Operand is either an MMX register or memory (complex | |
237 // evaluation), specified by ModR/M byte | |
238 case AM_W: // Operand is either a 128-bit XMM register or memory (complex | |
239 // eval), specified by ModR/M byte | |
240 have_modrm_ = true; | |
241 should_decode_modrm_ = true; | |
242 break; | |
243 | |
244 // These addressing modes specify an immediate or an offset value | |
245 // directly, so we need to look at the operand type to see how many | |
246 // bytes. | |
247 case AM_I: // Immediate data. | |
248 case AM_J: // Jump to offset. | |
249 case AM_O: // Operand is at offset. | |
250 switch (flag_operand & OT_MASK) { | |
251 case OT_B: // Byte regardless of operand-size attribute. | |
252 operand_bytes_ += OS_BYTE; | |
253 break; | |
254 case OT_C: // Byte or word, depending on operand-size attribute. | |
255 if (operand_is_32_bits_) | |
256 operand_bytes_ += OS_WORD; | |
257 else | |
258 operand_bytes_ += OS_BYTE; | |
259 break; | |
260 case OT_D: // Doubleword, regardless of operand-size attribute. | |
261 operand_bytes_ += OS_DOUBLE_WORD; | |
262 break; | |
263 case OT_DQ: // Double-quadword, regardless of operand-size attribute. | |
264 operand_bytes_ += OS_DOUBLE_QUAD_WORD; | |
265 break; | |
266 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size | |
267 // attribute. | |
268 if (operand_is_32_bits_) | |
269 operand_bytes_ += OS_48_BIT_POINTER; | |
270 else | |
271 operand_bytes_ += OS_32_BIT_POINTER; | |
272 break; | |
273 case OT_PS: // 128-bit packed single-precision floating-point data. | |
274 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; | |
275 break; | |
276 case OT_Q: // Quadword, regardless of operand-size attribute. | |
277 operand_bytes_ += OS_QUAD_WORD; | |
278 break; | |
279 case OT_S: // 6-byte pseudo-descriptor. | |
280 operand_bytes_ += OS_PSEUDO_DESCRIPTOR; | |
281 break; | |
282 case OT_SD: // Scalar Double-Precision Floating-Point Value | |
283 case OT_PD: // Unaligned packed double-precision floating point value | |
284 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; | |
285 break; | |
286 case OT_SS: | |
287 // Scalar element of a 128-bit packed single-precision | |
288 // floating data. | |
289 // We simply return enItUnknown since we don't have to support | |
290 // floating point | |
291 succeeded = false; | |
292 break; | |
293 case OT_V: // Word or doubleword, depending on operand-size attribute. | |
294 if (operand_is_32_bits_) | |
295 operand_bytes_ += OS_DOUBLE_WORD; | |
296 else | |
297 operand_bytes_ += OS_WORD; | |
298 break; | |
299 case OT_W: // Word, regardless of operand-size attribute. | |
300 operand_bytes_ += OS_WORD; | |
301 break; | |
302 | |
303 // Can safely ignore these. | |
304 case OT_A: // Two one-word operands in memory or two double-word | |
305 // operands in memory | |
306 case OT_PI: // Quadword MMX technology register (e.g. mm0) | |
307 case OT_SI: // Doubleword integer register (e.g., eax) | |
308 break; | |
309 | |
310 default: | |
311 break; | |
312 } | |
313 break; | |
314 | |
315 default: | |
316 break; | |
317 } | |
318 | |
319 return succeeded; | |
320 } | |
321 | |
322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, | |
323 unsigned int* size) { | |
324 // If we don't need to decode, we just return the size of the ModR/M | |
325 // byte (there is never a SIB byte in this case). | |
326 if (!should_decode_modrm_) { | |
327 (*size)++; | |
328 return true; | |
329 } | |
330 | |
331 // We never care about the reg field, only the combination of the mod | |
332 // and r/m fields, so let's start by packing those fields together into | |
333 // 5 bits. | |
334 unsigned char modrm = (*start_byte); | |
335 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field | |
336 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field | |
337 mod = mod >> 3; // shift the mod field to the right place | |
338 modrm = mod | modrm; // combine the r/m and mod fields as discussed | |
339 mod = mod >> 3; // shift the mod field to bits 2..0 | |
340 | |
341 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field | |
342 // in bits 2..0, and mod contains the mod field in bits 2..0 | |
343 | |
344 const ModrmEntry* modrm_entry = 0; | |
345 if (address_is_32_bits_) | |
346 modrm_entry = &s_ia32_modrm_map_[modrm]; | |
347 else | |
348 modrm_entry = &s_ia16_modrm_map_[modrm]; | |
349 | |
350 // Invariant: modrm_entry points to information that we need to decode | |
351 // the ModR/M byte. | |
352 | |
353 // Add to the count of operand bytes, if the ModR/M byte indicates | |
354 // that some operands are encoded in the instruction. | |
355 if (modrm_entry->is_encoded_in_instruction_) | |
356 operand_bytes_ += modrm_entry->operand_size_; | |
357 | |
358 // Process the SIB byte if necessary, and return the count | |
359 // of ModR/M and SIB bytes. | |
360 if (modrm_entry->use_sib_byte_) { | |
361 (*size)++; | |
362 return ProcessSib(start_byte + 1, mod, size); | |
363 } else { | |
364 (*size)++; | |
365 return true; | |
366 } | |
367 } | |
368 | |
369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte, | |
370 unsigned char mod, | |
371 unsigned int* size) { | |
372 // get the mod field from the 2..0 bits of the SIB byte | |
373 unsigned char sib_base = (*start_byte) & 0x07; | |
374 if (0x05 == sib_base) { | |
375 switch (mod) { | |
376 case 0x00: // mod == 00 | |
377 case 0x02: // mod == 10 | |
378 operand_bytes_ += OS_DOUBLE_WORD; | |
379 break; | |
380 case 0x01: // mod == 01 | |
381 operand_bytes_ += OS_BYTE; | |
382 break; | |
383 case 0x03: // mod == 11 | |
384 // According to the IA-32 docs, there does not seem to be a disp | |
385 // value for this value of mod | |
386 default: | |
387 break; | |
388 } | |
389 } | |
390 | |
391 (*size)++; | |
392 return true; | |
393 } | |
394 | |
395 }; // namespace sidestep | |
OLD | NEW |