Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(60)

Side by Side Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """
6 Traverse the validator's DFA, collect all "normal" instructions and then
7 compress output. Note: "anybyte fields" (immediates and displacements)
8 are always filled with zeros. Otherwise processing of sextillions (sic!)
9 of possibilities will take too long.
10
11 Each rule is applied only when all variants are accepted by validator.
12 The following compression rules are present:
13
14 1. Compress ModR/M (+SIB & displacement).
15 Instruction: 00 00 add %al,(%rax)
16 ...
17 Instruction: 00 ff add %bh,%bh
18 becomes
19 Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]
20
21 1a. Compress ModR/M (+SIB & displacement) memory-only.
22 Instruction: f0 01 00 lock add %eax,(%eax)
23 ...
24 Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)
25 becomes
26 Instruction: f0 01 XX lock add [%eax..edi],[memory]
27
28 1b. Compress ModR/M register only.
29 Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax
30 ...
31 Instruction: 66 0f 50 ff movmskpd %xmm7,%edi
32 becomes
33 Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]
34
35 2. Compress ModR/M (+SIB & displacement) with opcode extension.
36 Instruction: 0f 90 00 seto (%eax)
37 ...
38 Instruction: 0f 90 c7 seto %bh
39 becomes
40 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]
41
42 2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.
43 Instruction: f0 ff 00 lock incl (%eax)
44 ...
45 Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)
46 becomes
47 Instruction: f0 ff XX/1 lock decl [memory]
48
49 2b. Compress ModR/M register-only with opcode extension.
50 Instruction: 0f 71 d0 00 psrlw $0x0,%mm0
51 ...
52 Instruction: 0f 71 d7 00 psrlw $0x0,%mm7
53 becomes
54 Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7]
55
56 3. Compress register-in-opcode.
57 Instruction: d9 c0 fld %st(0)
58 ...
59 Instruction: d9 c7 fld %st(7)
60 becomes
61 Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]
62
63 Only applies if all possible register accesses are accepted by validator.
64
65 4. Special compressor for "set" instruction.
66 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]
67 ...
68 Instruction: 0f 90 XX/7 seto [%al..%bh or memory]
69 becomes
70 Instruction: 0f 90 XX seto [%al..%bh or memory]
71 """
72
73 import itertools
74 import multiprocessing
75 import optparse
76 import os
77 import re
78 import subprocess
79 import sys
80 import tempfile
81 import traceback
82
83 import dfa_parser
84 import dfa_traversal
85 import validator
86
87
88 # Register names in 'natual' order (as defined by IA32/x86-64 ABI)
89 #
90 # X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field
91 # in opcode, ModR/M, and/or SIB bytes to encode them.
92 #
93 # In most cases there are 16 registers of a given kind and two such groups,
94 # but there are couple of exceptions:
95 # 1. There are 20 8-bit registers and three groups (two of them overlap)
96 # 2. There are eight X87 and MMX registers thus two groups are identical
97 #
98 # We use typical register from a group to name the whole group. Most groups
99 # are named by the first register but 'spl' group is named by the fifth register
100 # because it's first four registers are the same as 'al' group.
101 # We use mnemonic name 'mmalt' to represent the "evil mirror" of the 'mm0'
102 # group: it the same as 'mm0', but implies use of the appropriate REX bit.
103 REGISTERS = {
104 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],
105 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],
106 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],
107 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],
108 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],
109 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],
110 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],
111 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],
112 'r8': [ 'r{}'.format(N) for N in range(8,16) ],
113 'mm0': [ 'mm{}'.format(N) for N in range(8) ],
114 'mmalt': [ 'mm{}'.format(N) for N in range(8) ],
115 'st(0)': [ 'st({})'.format(N) for N in range(8) ],
116 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],
117 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],
118 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],
119 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]
120 }
121
122
123 NOP = 0x90
124
125
126 def PadToBundleSize(bytes):
127 assert len(bytes) <= validator.BUNDLE_SIZE
128 return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))
129
130
131 # In x86-64 mode we have so-called 'restricted register' which is used to
132 # tie two groups together. Some instructions require particular value to
133 # be stored in this variable, while some accept any non-special restricted
134 # register (%ebp and %esp are special because they can only be accepted by
135 # a few 'special' instructions).
136 #
137 # You can find more details in the "NaCl SFI model on x86-64 systems" manual.
138 #
139 # We try to feed all possible 'restricted registers' into validator and then
140 # classify the instruction using this map. If set of acceptable 'restricted
141 # registers' is not here, then it's an error in validator.
142 ACCEPTABLE_X86_64_INPUTS = {
143 0x00001: 'input_rr=%eax',
144 0x00002: 'input_rr=%ecx',
145 0x00004: 'input_rr=%edx',
146 0x00008: 'input_rr=%ebx',
147 0x00010: 'input_rr=%esp',
148 0x00020: 'input_rr=%ebp',
149 0x00040: 'input_rr=%esi',
150 0x00080: 'input_rr=%edi',
151 0x00100: 'input_rr=%r8d',
152 0x00200: 'input_rr=%r9d',
153 0x00400: 'input_rr=%r10d',
154 0x00800: 'input_rr=%r11d',
155 0x01000: 'input_rr=%r12d',
156 0x02000: 'input_rr=%r13d',
157 0x04000: 'input_rr=%r14d',
158 0x08000: 'input_rr=%r15d',
159 0x1ffcf: 'input_rr=any_nonspecial'
160 }
161
162 # Any instruction must produce either None or one of fifteen registers as an
163 # output 'restricted register' value. 'r15d' is NOT acceptable as an output.
164 ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple(
165 '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1])
166
167
168 def ValidateInstruction(instruction, validator_inst):
169 bundle = ''.join(map(chr, PadToBundleSize(instruction)))
170 if options.bitness == 32:
171 result = validator_inst.ValidateChunk(bundle, bitness=32)
172 return result, []
173 else:
174 valid_inputs = 0
175 known_final_rr = None
176 output_rr = None
177 # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array
178 # above.
179 for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]):
180 valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(
181 bundle, len(instruction), initial_rr)
182 if valid:
183 # final_rr should not depend on input_rr
184 assert valid_inputs == 0 or known_final_rr == final_rr
185 valid_inputs |= 1 << bit
186 known_final_rr = final_rr
187 # If nothing is accepted then instruction is not valid. Easy and simple.
188 if valid_inputs == 0: return False, []
189 # If returned value in unacceptable we'll get IndexError here and this
190 # test will fail
191 if known_final_rr is not None:
192 output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr]
193 # If collected valid_inputs are unacceptable we'll get KeyError here and
194 # this test will fail
195 return True, [ACCEPTABLE_X86_64_INPUTS[valid_inputs],
196 'output_rr={}'.format(output_rr)]
197
198
199 class WorkerState(object):
200 def __init__(self, prefix, validator):
201 self.total_instructions = 0
202 self.num_valid = 0
203 self.validator = validator
204 self.output = set()
205 self.trace = []
206
207
208 def ReceiveInstruction(self, bytes):
209 self.total_instructions += 1
210 result, notes = ValidateInstruction(bytes, self.validator)
211 if result:
212 self.num_valid += 1
213 dis = self.validator.DisassembleChunk(
214 ''.join(map(chr, bytes)),
215 bitness=options.bitness)
216 for line_nr in xrange(len(dis)):
217 dis[line_nr] = str(dis[line_nr])
218 assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '
219 assert dis[line_nr][-1:] == ')'
220 dis[line_nr] = dis[line_nr][17:-1]
221 # If %rip is involved then comment will be different depending on the
222 # instruction length. Eliminate it.
223 if '(%rip)' in dis[0]:
224 dis[0] = re.sub(' # 0x[ ]*[0-9a-fA-F]*', '', dis[0])
225 # Zero displacements are represented as 0x0 for all instructions except
226 # jumps where they disassembled as non-zero due to %eip/%rip-relative
227 # addressing. We replace this displacement with %eip/%rip to simplify
228 # compression.
229 if ' 0x' in dis[0] and ' 0x0' not in dis[0]:
230 for bytes in xrange(1, 16):
231 dis[0] = re.sub(
232 '(' + '(?:[0-9a-fA-F][0-9a-fA-F] ){' + str(bytes) + '} .* )' +
233 hex(bytes) + '(.*)',
234 r'\1%eip\2' if options.bitness == 32 else r'\1%rip\2',
235 dis[0]);
236 dis[0] = 'Instruction: ' + dis[0]
237 dis += notes
238 self.output.add('; '.join(dis))
239
240
241 def RecordTrace(self, compressor_nr, instruction):
242 self.trace.append((compressor_nr, instruction))
243
244
245 # Compressor has three slots: regex (which picks apart given instruction),
246 # subst (which is used to create the compressed version) and replacements (which
247 # are used to generate set of instructions from a given code).
248 #
249 # Example compressor:
250 # regex = r'.*?[0-9a-fA-F]([0-7]) \w* (%e(?:[abcd]x|[sb]p|[sd]i)).*()'
251 # subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode')
252 # replacements = ((0, '%eax', ''), (1, '%ecx', ''),
253 # (2, '%edx', ''), (3, '%ebx', '')
254 # (4, '%esp', ''), (5, '%ebp', ''),
255 # (6, '%esi', ''), (7, '%edi', ''))
256 #
257 # When faced with instruction '40 inc %eax' it will capture the following
258 # pieces of said instruction: '4{0} inc {%eax}{}'.
259 #
260 # Then it will replace groups to produce the following 8 instructions:
261 # '40 inc %eax'
262 # '41 inc %ecx'
263 # '42 inc %edx'
264 # '43 inc %ebx'
265 # '44 inc %esp'
266 # '45 inc %ebp'
267 # '46 inc %esi'
268 # '47 inc %edi'
269 #
270 # If all these instructions can be found in a set of instructions then
271 # compressor will remove them from said set and will substitute them with
272 # one "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'.
273 class Compressor(object):
274 __slots__ = [
275 'regex',
276 'subst',
277 'replacements'
278 ]
279
280 def __init__(self, regex, subst, replacements):
281 self.regex = re.compile(regex)
282 self.subst = subst
283 self.replacements = replacements
284
285
286 def CompressionTemplate(instruction, match, mark):
287 """ Replace all match groups with the mark. """
288 pos = 0
289 format_str = ''
290 for group in range(0, len(match.groups())):
291 format_str += instruction[pos:match.start(group + 1)] + mark
292 pos = match.end(group + 1)
293 return format_str + instruction[pos:]
294
295
296 def CompressOneMatch(instructions, instruction, match, compressor):
297 format_str = CompressionTemplate(instruction, match, '{}')
298 subset = set()
299 for replacement in compressor.replacements:
300 replacement_str = format_str.format(*replacement)
301 if not replacement_str in instructions:
302 return (False, instructions)
303 subset.add(replacement_str)
304 assert instruction in subset
305 instructions -= subset
306 instructions.add(format_str.format(*compressor.subst))
307 return (True, instructions)
308
309
310 def CompressOneInstruction(instructions, compressors, split, cache):
311 sorted_instructions = (sorted(i for i in instructions if i > split) +
312 sorted(i for i in instructions if i < split))
313 for instruction in sorted_instructions:
314 if instruction in cache:
315 compressors_list = cache[instruction]
316 for compressor_nr, match, compressor in compressors_list:
317 result, instructions = CompressOneMatch(
318 instructions, instruction, match, compressor)
319 if result:
320 return (instructions, compressor_nr, instruction)
321 else:
322 compressors_list = []
323 for compressor_nr, compressor in enumerate(compressors):
324 match = compressor.regex.match(instruction)
325 if match:
326 compressors_list.append((compressor_nr, match, compressor))
327 result, instructions = CompressOneMatch(
328 instructions, instruction, match, compressor)
329 if result:
330 return (instructions, compressor_nr, instruction)
331 cache[instruction] = compressors_list
332 return (instructions, False, False)
333
334
335 def Compressed(instructions, compressors, show_progress):
336 split = ''
337 cache = {}
338 while True:
339 instructions, rule, split = CompressOneInstruction(
340 instructions, compressors, split, cache)
341 if rule is False: break
342 show_progress(rule, split)
343 return instructions
344
345
346 def Worker((prefix, state_index)):
347 worker_state = WorkerState(prefix, worker_validator)
348
349 try:
350 dfa_traversal.TraverseTree(
351 dfa.states[state_index],
352 final_callback=worker_state.ReceiveInstruction,
353 prefix=prefix,
354 anyfield=0)
355 if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions
356 worker_state.output = Compressed(set(worker_state.output),
357 compressors,
358 worker_state.RecordTrace)
359 except Exception as e:
360 traceback.print_exc() # because multiprocessing imap swallows traceback
361 raise
362
363 return (
364 prefix,
365 worker_state.total_instructions,
366 worker_state.num_valid,
367 worker_state.output,
368 worker_state.trace)
369
370
371 def ParseOptions():
372 parser = optparse.OptionParser(usage='%prog [options] xmlfile')
373
374 parser.add_option('--bitness',
375 choices=['32', '64'],
376 help='The subarchitecture: 32 or 64')
377 parser.add_option('--validator_dll',
378 help='Path to librdfa_validator_dll')
379 parser.add_option('--decoder_dll',
380 help='Path to librdfa_decoder_dll')
381
382 options, args = parser.parse_args()
383 options.bitness = int(options.bitness)
384
385 if len(args) != 1:
386 parser.error('specify one xml file')
387
388 (xml_file, ) = args
389
390 return options, xml_file
391
392
393 # Version suitable for use in regular expressions
394 REGISTERS_RE = REGISTERS.copy()
395 REGISTERS_RE['st(0)'] = [ r'st\({}\)'.format(N) for N in range(8) ]
396
397 # Index names in 'natual' order (as defined by IA32/x86-64 ABI)
398 INDEXES = {
399 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],
400 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],
401 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]
402 }
403 # Register which can be used as base in 64-bit mode in all their incarnations.
404 X86_64_BASE_REGISTERS = set([
405 '%spl', '%bpl', '%r15b',
406 '%sp', '%bp', '%r15w',
407 '%esp', '%ebp', '%r15d',
408 '%rsp', '%rbp', '%r15',
409 '%rip'
410 ])
411
412
413 def InstructionIsDangerous(input, output, register_write,
414 writes_to, memory_accessed=False,
415 base_text='%riz', index_text='%riz'):
416 """ Check if instruction with given replacements will be dangerous
417
418 Args:
419 input: input argument
420 output: output argument
421 register_write: three-state selector
422 'sandbox' - instruction can be used to produce "restricted register"
423 'protect' - instruction can damage output, protect "special registers"
424 'ignore' - instruction does not affect it's operands (e.g. test) or
425 is used with non-GP registers (X87, MMX, XMM, etc)
426 memory_accessed: True if instruction accesses memory
427 base: base register (if memory is accessed)
428 index: index register (if memory is accessed)
429
430 Returns:
431 True if instruction should be rejected by validator
432 """
433 if memory_accessed:
434 if base_text not in X86_64_BASE_REGISTERS:
435 return True
436 # Surprisingly enough %r15 is considered "valid index register" by our
437 # validator: it can not be ever produced as "restricted register" which
438 # means that such instruction can not ever occur in real program (it's
439 # guaranteed because ACCEPTABLE_X86_64_OUTPUT_REGISTERS excludes it), but
440 # when instruction is tested in isolation it's allowed.
441 if index_text in X86_64_BASE_REGISTERS - set(['%r15']):
442 return True
443 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:
444 return True
445 if register_write == 'sandbox' and output == '%r15d':
446 return True
447 if writes_to == 'both' and input in X86_64_BASE_REGISTERS:
448 return True
449 return False
450
451
452 def AppendOperandsReplacement(replacement, rm_text, reg, modrm, writes_to):
453 """ Appends replacement text to replacement list
454
455 Args:
456 replacement: replacement list
457 rm_text: replacement for rm field
458 reg: register kind (or None if reg field is used as opcode extension)
459 modrm: modrm byte
460 writes_to: three-state selector
461 'reg' - instruction uses rm as source, reg as destination
462 'rm' - instruction uses reg as source, rm as destination
463 'both' - instruction writes to both reg and rm
464
465 Returns:
466 input: textual representation of input argument
467 output: textual representation of output argument
468
469 Side-effect:
470 output (if reg is None) or (input, output) tuple (if reg is not None)
471 are added to replacement list.
472 """
473 if reg is None:
474 assert writes_to == 'rm'
475 input, output = None, rm_text
476 replacement.append(output)
477 else:
478 reg_field = (modrm >> 3) & 0x07
479 reg_text = '%' + REGISTERS[reg][reg_field]
480 if writes_to == 'reg':
481 input, output = rm_text, reg_text
482 else: # rm, both
483 input, output = reg_text, rm_text
484 replacement.extend([input, output])
485 return input, output
486
487
488 def ModRMRegisterReplacements(rm, reg=None, writes_to='rm', opcode_bits=0,
489 register_write='ignore', note=''):
490 """Creates replacement tuples list for register-to-register instructions
491
492 Args:
493 rm: rm operand kind (see REGISTERS array)
494 reg: reg operand kind (see REGISTERS array) or None if reg is not used
495 writes_to: three-state selector
496 'reg' - instruction uses rm as source, reg as destination
497 'rm' - instruction uses reg as source, rm as destination
498 'both' - instruction writes to both reg and rm
499 opcode_bits: opcode extensions code (used when reg is None)
500 register_write: three-state selector
501 'sandbox' - instruction can be used to produce "restricted register"
502 'protect' - instruction can damage output, protect "special registers"
503 'ignore' - instruction does not affect it's operands (e.g. test) or
504 is used with non-GP registers (X87, MMX, XMM, etc)
505 note: note to include at the end of input instruction
506 Returns:
507 List of replacement tuples
508 """
509 # Reg field can be used either as reg or as opcode extension, but not both
510 assert reg is None or opcode_bits == 0
511
512 output_key = (options.bitness, reg, rm, writes_to, opcode_bits,
513 register_write)
514 if output_key in ModRMRegisterReplacements.replacements:
515 return ModRMRegisterReplacements.replacements[output_key]
516
517 replacements = []
518
519 # Two upper bits of ModR/M byte (mod field) must be equal to 11
520 # This gives us range from 0xc0 to 0xff but we are going from the
521 # end to make rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).
522 if reg is None:
523 # reg field is used as opcode extension
524 byte_range = [byte
525 for byte in range(0xff, 0xbf, -1)
526 if (byte >> 3) & 0x7 == opcode_bits]
527 else:
528 byte_range = range(0xff, 0xbf, -1)
529
530 for modrm in byte_range:
531 rm_field = (modrm & 0x07)
532 rm_text = '%' + REGISTERS[rm][rm_field]
533 byte_text = '{:02x}'.format(modrm)
534 replacement = [byte_text]
535 input, output = AppendOperandsReplacement(
536 replacement, rm_text, reg, modrm, writes_to)
537 if options.bitness == 64:
538 replacement.append('any_nonspecial') # input_rr
539 replacement.append(output if register_write == 'sandbox' else None)
540 if InstructionIsDangerous(input, output, register_write, writes_to):
541 continue
542 replacement.append(note)
543 replacements.append(tuple(replacement))
544 ModRMRegisterReplacements.replacements[output_key] = tuple(replacements)
545 return ModRMRegisterReplacements.replacements[output_key]
546 ModRMRegisterReplacements.replacements = {}
547
548
549 def BaseOnlyMemoryOperand(modrm, base):
550 """Creates replacement tuples list for register-to-memory instructions
551 (base only, no SIB)
552
553 Args:
554 modrm: modrm byte
555 base: register kind for base
556 Returns:
557 bytes_text: replacement for "bytes" group
558 rm_text: textual representation of "rm" argument
559 base_text: textual representation of "base" register
560 """
561 mod_field = (modrm >> 6) & 0x03
562 rm_field = (modrm & 0x07)
563 base_text = '%' + REGISTERS[base][rm_field]
564 # If RM field == %rbp and MOD field is zero then it's absolute address
565 # in 32-bit mode and %rip-based address in 64-bit mode
566 if mod_field == 0 and rm_field == validator.REG_RBP:
567 bytes_text = '{:02x} 00 00 00 00'.format(modrm)
568 rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'
569 base_text = '%eiz' if options.bitness == 32 else '%rip'
570 # Memory access with just a base register
571 elif mod_field == 0:
572 bytes_text = '{:02x}'.format(modrm)
573 rm_text = '({})'.format(base_text)
574 # Memory access with base and 8bit offset
575 elif mod_field == 1:
576 bytes_text = '{:02x} 00'.format(modrm)
577 rm_text = '0x0({})'.format(base_text)
578 # Memory access with base and 32bit offset
579 else: # mod_field == 2
580 bytes_text = '{:02x} 00 00 00 00'.format(modrm)
581 rm_text = '0x0({})'.format(base_text)
582 return bytes_text, rm_text, base_text
583
584
585 def SIBMemoryOperand(modrm, sib, base, index):
586 """Creates replacement tuples list for register-to-memory instructions
587 (base only, no SIB)
588
589 Args:
590 modrm: modrm byte
591 base: register kind for base
592 Returns:
593 bytes_text: replacement for "bytes" group
594 rm_text: textual representation of "rm" argument
595 base_text: textual representation of "base" register
596 index_text: textual representation of "index" register
597 """
598 mod_field = (modrm >> 6) & 0x03
599 scale_field = (sib >> 6) & 0x03
600 index_field = (sib >> 3) & 0x07
601 base_field = (sib & 0x07)
602 index_text = '%' + INDEXES[index][index_field]
603 base_text = '%' + REGISTERS[base][base_field]
604 scale_text = str(1 << scale_field)
605 # If BASE is %rbp and MOD == 0 then index with 32bit offset is used
606 if mod_field == 0 and base_field == validator.REG_RBP:
607 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)
608 if (options.bitness == 64 and
609 index_text == '%riz' and
610 scale_text == '1'):
611 rm_text = '0x0'
612 else:
613 rm_text = '0x0(,{},{})'.format(index_text, scale_text)
614 # There are no base in this case
615 base_text = '%eiz' if options.bitness == 32 else '%riz'
616 # Memory access with base and index (no offset)
617 elif mod_field == 0:
618 bytes_text = '{:02x} {:02x}'.format(modrm, sib)
619 rm_text = '({},{},{})'.format(base_text, index_text, scale_text)
620 # Memory access with base, index and 8bit offset
621 elif mod_field == 1:
622 bytes_text = '{:02x} {:02x} 00'.format(modrm, sib)
623 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)
624 # Memory access with base, index and 32bit offset
625 elif mod_field == 2:
626 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)
627 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)
628 # Pretty-printing of access via %rsp (or %r12)
629 if (base_field == validator.REG_RSP and
630 index_text in ('%eiz', '%riz') and
631 scale_text == '1'):
632 if mod_field == 0: # no offset
633 rm_text = '({})'.format(base_text)
634 else: # 8-bit or 32-bit offset
635 rm_text = '0x0({})'.format(base_text)
636 return bytes_text, rm_text, base_text, index_text
637
638
639 def ModRMMemoryReplacements(reg=None, writes_to='rm', opcode_bits=0,
640 memory_accessed=True, register_write='ignore',
641 base_r8=False, index_r8=False, note=''):
642 """Creates replacement tuples list for register-to-memory instructions
643
644 Args:
645 reg: reg operand kind (see REGISTERS array) or None if reg is not used
646 writes_to: three-state selector
647 'reg' - instruction uses rm as source, reg as destination
648 'rm' - instruction uses reg as source, rm as destination
649 'both' - instruction writes to both reg and rm
650 opcode_bits: opcode extensions code (used when reg is None)
651 memory_accessed: True if instruction accesses memory
652 register_write: three-state selector
653 'sandbox' - instruction can be used to produce "restricted register"
654 'protect' - instruction can damage output, protect "special registers"
655 'ignore' - instruction does not affect it's operands (e.g. test) or
656 is used with non-GP registers (X87, MMX, XMM, etc)
657 index_r8: True if REX.X bit in the instruction set to 1
658 note: note to include at the end of input instruction
659
660 Returns:
661 List of replacement tuples
662 """
663 # Reg field can be used either as reg or as opcode extension, but not both
664 assert reg is None or opcode_bits == 0
665
666 output_key = (options.bitness, reg, writes_to, opcode_bits,
667 base_r8, index_r8, memory_accessed, register_write)
668 if output_key in ModRMMemoryReplacements.replacements:
669 return ModRMMemoryReplacements.replacements[output_key]
670
671 if options.bitness == 32:
672 base = 'eax'
673 index = 'eax'
674 else:
675 base = 'r8' if base_r8 else 'rax'
676 index = 'r8' if index_r8 else 'rax'
677
678 replacements = []
679
680 # Two upper bits of ModR/M byte (mod field) must be equal to 00, 01, or 10
681 # This gives us range from 0x00 to 0xbf but we are going from the end to make
682 # rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).
683 if reg is None:
684 # reg field is used as opcode extension
685 byte_range = [byte
686 for byte in range(0xbf, -1, -1)
687 if (byte >> 3) & 0x7 == opcode_bits]
688 else:
689 byte_range = range(0xbf, -1, -1)
690
691 for modrm in byte_range:
692 # If RM field != %rsp then there are no SIB byte
693 if (modrm & 0x07) != validator.REG_RSP:
694 bytes_text, rm_text, base_text = BaseOnlyMemoryOperand(modrm, base)
695 replacement = [bytes_text]
696 input, output = AppendOperandsReplacement(
697 replacement, rm_text, reg, modrm, writes_to)
698 if options.bitness == 64:
699 replacement.append('any_nonspecial')
700 # If writes_to is equal to 'reg' then output is register
701 if writes_to == 'reg' and register_write == 'sandbox':
702 replacement.append(output)
703 else:
704 # Note that instruction like xchg could write to another register:
705 # it's "input"! But validator currently does not support this case
706 # thus we are ignoring it here and writing "None" in this case, too.
707 replacement.append(None)
708 if InstructionIsDangerous(input, output, register_write, writes_to,
709 memory_accessed, base_text):
710 continue
711 replacement.append(note)
712 replacements.append(tuple(replacement))
713 else:
714 # If RM field == %rsp then we have SIB byte
715 for sib in xrange(0x100):
716 bytes_text, rm_text, base_text, index_text = SIBMemoryOperand(
717 modrm, sib, base, index)
718 replacement = [bytes_text]
719 input, output = AppendOperandsReplacement(
720 replacement, rm_text, reg, modrm, writes_to)
721 if options.bitness == 64:
722 if not memory_accessed or index_text == '%riz':
723 replacement.append('any_nonspecial')
724 else:
725 if index_r8:
726 # Convert %r8 to %r8d, %r9 to %r9d, etc
727 replacement.append(index_text + 'd')
728 else:
729 # Convert %rax to %eax, %rsp to %esp, etc
730 replacement.append('%e' + index_text[2:])
731 # If writes_to is equal to 'reg' then output is register
732 if writes_to == 'reg' and register_write == 'sandbox':
733 replacement.append(output)
734 else:
735 # Note that instruction like xchg could write to another register:
736 # it's "input"! But validator currently does not support this case
737 # thus we are ignoring it here and writing "None" in this case, too.
738 replacement.append(None)
739 if InstructionIsDangerous(input, output, register_write, writes_to,
740 memory_accessed, base_text, index_text):
741 continue
742 replacement.append(note)
743 replacements.append(tuple(replacement))
744 ModRMMemoryReplacements.replacements[output_key] = tuple(replacements)
745 return ModRMMemoryReplacements.replacements[output_key]
746 ModRMMemoryReplacements.replacements = {}
747
748
749 # Map from "REX bit off" group of registers to "REX bit on" group of registers
750 r8 = {
751 'al': 'r8b',
752 'ax': 'r8w',
753 'eax': 'r8d',
754 'rax': 'r8',
755 'mm0': 'mmalt',
756 'xmm0': 'xmm8',
757 'ymm0': 'ymm8'
758 }
759
760
761 def RegisterKinds():
762 """ Return list of register kinds we process with register compressors """
763
764 if options.bitness == 32:
765 return ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')
766 else:
767 return ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0',
768 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8')
769
770
771 def RegisterKindPairs():
772 """ Return hand-picked pairs which we must consider in compressors """
773
774 if options.bitness == 32:
775 return (
776 ( 'al', 'al'),
777 ( 'ax', 'al'),
778 ( 'al', 'ax'),
779 ( 'ax', 'ax'),
780 ( 'eax', 'al'),
781 ( 'al', 'eax'),
782 ( 'eax', 'ax'),
783 ( 'ax', 'eax'),
784 ( 'eax', 'eax'),
785 ( 'eax', 'mm0'),
786 ( 'mm0', 'eax'),
787 ( 'eax', 'xmm0'),
788 ('xmm0', 'eax'),
789 ( 'mm0', 'mm0'),
790 ( 'mm0', 'xmm0'),
791 ('xmm0', 'mm0'),
792 ('xmm0', 'xmm0'),
793 ('xmm0', 'ymm0'),
794 ('ymm0', 'xmm0'),
795 ('ymm0', 'ymm0')
796 )
797 else:
798 return (
799 ( 'al', 'al'),
800 ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),
801 ( 'ax', 'al'),
802 ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),
803 ( 'al', 'ax'),
804 ( 'spl', 'ax'), ( 'spl', 'r8w'), ( 'r8b', 'ax'), ( 'r8b', 'r8w'),
805 ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),
806 ( 'eax', 'al'),
807 ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),
808 ( 'al', 'eax'),
809 ( 'spl', 'eax'), ( 'spl', 'r8d'), ( 'r8b', 'eax'), ( 'r8b', 'r8d'),
810 ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),
811 ( 'ax', 'eax'), ( 'ax', 'r8d'), ( 'r8w', 'eax'), ( 'r8w', 'r8d'),
812 ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),
813 ( 'rax', 'al'),
814 ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),
815 ( 'al', 'rax'),
816 ( 'spl', 'rax'), ( 'spl', 'r8'), ( 'r8b', 'rax'), ( 'r8b', 'r8'),
817 ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'),
818 ( 'ax', 'rax'), ( 'ax', 'r8'), ( 'r8w', 'rax'), ( 'r8w', 'r8'),
819 ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'),
820 ( 'eax', 'rax'), ( 'eax', 'r8'), ( 'r8d', 'rax'), ( 'r8d', 'r8'),
821 ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),
822 ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'),
823 ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'),
824 ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'),
825 ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'),
826 ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),
827 ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),
828 ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),
829 ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),
830 ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'),
831 ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'),
832 ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'),
833 ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),
834 ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),
835 ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),
836 ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')
837 )
838
839
840 def RegisterRegex(register_kind, opcode_bits = 0):
841 """ Returns modrm regex for the first register """
842
843 regex_mod = '^.*?({:02x})'.format(0xc0 + opcode_bits * 8)
844 regex_rm = '(%' + REGISTERS[register_kind][0] + ')'
845 return regex_mod, regex_rm
846
847
848 def MemoryRegex(base_r8, index_r8, opcode_bits = 0):
849 """ Returns one regex for bytes and one for the textual representation. """
850
851 # We only need to process ModR/M+SIB '04 04' or '04 07' here. We only match
852 # memory operands without offset, so we don't need to add zeros to the end of
853 # regex_modrm and regex_modrm regular expressions.
854 #
855 # We pick version which is "always there" (in both 32bit and 64bit mode) and
856 # which is not 0x00 (to avoid confusion with offsets and immediates).
857 if base_r8:
858 regex_modrm_sib = '^.*?({:02x} 07)'.format(0x04 + opcode_bits * 8)
859 else:
860 regex_modrm_sib = '^.*?({:02x} 04)'.format(0x04 + opcode_bits * 8)
861 if options.bitness == 32:
862 regex_mem = r'(\(%esp,%eax,1\))'
863 elif base_r8:
864 regex_mem = r'(\(%r15,%r8,1\))' if index_r8 else r'(\(%r15,%rax,1\))'
865 else:
866 regex_mem = r'(\(%rsp,%r8,1\))' if index_r8 else r'(\(%rsp,%rax,1\))'
867 return regex_modrm_sib, regex_mem
868
869
870 def RegToRmCompressors(rm, reg, writes_to, memory_accessed=True,
871 register_write='ignore', index_r8=False,
872 rm_compression='register', is_3Dnow=False, notes=''):
873 """Returns a list of reg <-> rm compressors for a given set of parameters.
874
875 Args:
876 rm: rm operand kind (see REGISTERS array)
877 reg: reg operand kind (see REGISTERS array) or None if reg is not used
878 writes_to: three-state selector
879 'reg' - instruction uses rm as source, reg as destination
880 'rm' - instruction uses reg as source, rm as destination
881 'both' - instruction writes to both reg and rm
882 memory_accessed: True if instruction accesses memory
883 register_write: three-state selector
884 'sandbox' - instruction can be used to produce "restricted register"
885 'protect' - instruction can damage output, protect "special registers"
886 'ignore' - instruction does not affect it's operands (e.g. test) or
887 is used with non-GP registers (X87, MMX, XMM, etc)
888 index_r8: True if REX.X bit in the instruction set to 1
889 rm_compression: three-state selector
890 'register' - instruction supports register-only rm parameter
891 'memory' - instruction supports memory-only rm parameter
892 'register_or_memory' - instruction supports all kinds of rm parameters
893 is_3DNow - True if instruction is 3DNow! style (opcode in immidiate)
894 notes - Notes to add to the description
895
896 Returns:
897 List of compressors
898 """
899
900 start_reg = REGISTERS[reg][0]
901 end_reg = REGISTERS[reg][-1]
902 # Exclude r15 from the interval, if instruction can write to reg.
903 if (end_reg.startswith('r15') and register_write != 'ignore' and
904 writes_to in ('reg', 'both')):
905 end_reg = REGISTERS[reg][-2]
906 start_rm = REGISTERS[rm][0]
907 end_rm = REGISTERS[rm][-1]
908 # Exclude r15 from the interval, if instruction can write to rm.
909 if (end_rm.startswith('r15') and register_write != 'ignore' and
910 writes_to in ('rm', 'both')):
911 end_rm = REGISTERS[rm][-2]
912 # Prepare substitution texts
913 subst_reg = '[%{}..%{}]'.format(start_reg, end_reg)
914 if rm_compression == 'register':
915 subst_rm = '[%{}..%{}]'.format(start_rm, end_rm)
916 elif rm_compression == 'register_or_memory':
917 subst_rm = '[%{}..%{} or memory]'.format(start_rm, end_rm)
918 else:
919 assert rm_compression == 'memory'
920 subst_rm = '[memory]'
921 base_r8 = rm in r8.values()
922 regex, regex_reg = RegisterRegex(reg)
923 if rm_compression == 'register':
924 regex, regex_rm = RegisterRegex(rm)
925 else: # memory and register_or_memory
926 regex, regex_rm = MemoryRegex(base_r8, index_r8)
927 if is_3Dnow:
928 # Immediate byte is opcode extension with 3DNow! instructions.
929 regex += ' [0-9a-fA-F][0-9a-fA-F]'
930 else:
931 # Accept immediate (which is always zeros in our case).
932 regex += '(?: 00)*'
933 # 2 spaces separate byte code from instruction name.
934 regex += ' '
935 # Optional "lock" prefix.
936 regex += '(?:lock )?'
937 # Instruction name.
938 regex += r'\w* '
939 # Immediate or implicit operand that can precede reg/rm pair.
940 regex += r'(?:\$0x0,|\$0x0,\$0x0,|%cl,|%xmm0,)?'
941 regex_output_rr = None
942 subst_output_rr = None
943 if writes_to == 'reg':
944 regex += regex_rm + ',' + regex_reg
945 if register_write == 'sandbox':
946 assert reg in ('eax', 'r8d')
947 regex_output_rr = '%' + reg
948 subst_output_rr = '[%eax..%edi]' if reg == 'eax' else '[%r8d..%r14d]'
949 subst = ('XX', subst_rm, '[%{}..%{}]'.format(start_reg, end_reg))
halyavin 2013/12/10 08:03:34 You created subst_reg which you can use here.
khim 2013/12/10 21:06:39 Done.
950 else:
951 regex += regex_reg + ',' + regex_rm
952 if register_write == 'sandbox':
953 assert rm in ('eax', 'r8d')
954 # Instruction can be sandboxing one and yet produce output_rr == None
955 # This looks strange, but it's perfectly logical because we can't ever
956 # sandbox memory!
957 if rm_compression == 'register':
958 regex_output_rr = '%' + rm
959 if rm_compression != 'memory':
960 subst_output_rr = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'
961 subst = ('XX', '[%{}..%{}]'.format(start_reg, end_reg), subst_rm)
halyavin 2013/12/10 08:03:34 The same.
khim 2013/12/10 21:06:39 Done.
962 # Skip implicit arguments (if any)
963 regex += '.*'
964 if options.bitness == 64:
965 if memory_accessed and rm_compression != 'register':
966 regex += '; input_rr=(%r8d)' if index_r8 else '; input_rr=(%eax)'
967 subst_input_rr = '[%r8d..%r15d]' if index_r8 else '[%eax..%edi]'
968 else:
969 regex += '; input_rr=(any_nonspecial)'
970 subst_input_rr = 'any_nonspecial'
971 regex += '; output_rr=({})'.format(regex_output_rr)
972 subst = subst + (subst_input_rr, subst_output_rr)
973 # With "or memory" or "memory" compressors we always can see where is
974 # reg and where is rm, but with reg to rm or rm to reg it's impossible.
975 # Add the appropriate comment
976 notes = () if notes == '' else (notes, )
977 if rm_compression == 'register':
978 notes += ('rm to reg' if writes_to == 'reg' else 'reg to rm', )
979 notes = '; '.join(notes)
980 if notes != '':
981 notes = ' # ' + notes
982 subst += (notes, )
983 regex += '()$'
984 replacement = ()
985 if rm_compression != 'memory':
986 replacement += ModRMRegisterReplacements(
987 reg=reg, rm=rm, writes_to=writes_to, register_write=register_write)
988 if rm_compression != 'register':
989 replacement += ModRMMemoryReplacements(
990 reg=reg, base_r8=base_r8, index_r8=index_r8, writes_to=writes_to,
991 memory_accessed=memory_accessed, register_write=register_write)
992 return Compressor(regex, subst, replacement)
993
994
995 def AllRegToRmCompressors():
996 """ Return list of all Reg to RM (and RM to Reg) compressors. """
997
998 compressors = []
999
1000 for reg, rm in RegisterKindPairs():
1001 instruction_kinds = [
1002 # Normal instructions with two operands (rm to reg)
1003 {'writes_to': 'reg'},
1004 # Normal instructions with two operands (reg to rm)
1005 {'writes_to': 'rm'}
1006 ]
1007 # In 64 bit mode we have many different types of instructions depending
1008 # on whether we are accessing memory or whether we are writing to registers.
1009 if options.bitness == 64:
1010 # Lea in 64 bit mode is truly unique instruction for now
1011 if reg in ('ax', 'r8w', 'eax', 'r8d', 'rax', 'r8'):
1012 register_write = 'sandbox' if reg in ('eax', 'r8d') else 'protect'
1013 instruction_kinds.append(
1014 {'writes_to': 'reg',
1015 'memory_accessed': False,
1016 'register_write': register_write,
1017 'notes': 'lea'})
1018 # There are few more forms in 64 bit case (rm to reg)
1019 if reg in ('eax', 'r8d'):
1020 # Zero-extending version.
1021 instruction_kinds.append(
1022 {'writes_to': 'reg',
1023 'register_write': 'sandbox'})
1024 # More forms in 64 bit case (reg to rm)
1025 if rm in ('eax', 'r8d'):
1026 # Zero-extending version.
1027 instruction_kinds.append(
1028 {'writes_to': 'rm',
1029 'register_write': 'sandbox'})
1030 # Zero-extending xchg/xadd
1031 instruction_kinds.append(
1032 {'writes_to': 'both',
1033 'register_write': 'sandbox',
1034 'notes': 'write to both'})
1035 # Still more forms for 64 bit case (rm to reg).
1036 if reg in ('al', 'spl', 'ax', 'eax', 'rax', 'r8b', 'r8w', 'r8d', 'r8'):
1037 # Dangerous instructions (rm to reg)
1038 instruction_kinds.append(
1039 {'writes_to': 'reg',
1040 'register_write': 'protect'})
1041 # Still more forms for 64 bit case (reg to rm)
1042 if rm in ('al', 'spl', 'ax', 'eax', 'rax', 'r8b', 'r8w', 'r8d', 'r8'):
1043 # Dangerous instructions (reg to rm)
1044 instruction_kinds.append(
1045 {'writes_to': 'rm',
1046 'register_write': 'protect'})
1047 # Dangerous xchg/xadd
1048 instruction_kinds.append(
1049 {'writes_to': 'both',
1050 'register_write': 'protect',
1051 'notes': 'write to both'})
1052 # 3DNow! instructions
1053 if reg in ('mm0', 'mmalt') or rm in ('mm0', 'mmalt'):
1054 instruction_kinds.append(
1055 {'writes_to': 'reg',
1056 'is_3Dnow': True})
1057 for instruction_kind in instruction_kinds:
1058 for rm_compression in ('register', 'memory', 'register_or_memory'):
1059 if rm_compression == 'register':
1060 # Register-to-register instructions can not access memory anyway
1061 # which means special compressors are not needed.
1062 if 'memory_accessed' in instruction_kind:
1063 continue
1064 elif options.bitness == 64:
1065 # We generate 2 variants of compressors with memory operands to
1066 # enumerate 2 possible values of index_r8 property. Register only
1067 # instructions have only one version since they don't use index.
1068 compressors.append(RegToRmCompressors(
1069 reg=reg, rm=rm, index_r8=True,
1070 rm_compression=rm_compression, **instruction_kind))
1071 # Create "normal" compressor now
1072 compressors.append(RegToRmCompressors(
1073 reg=reg, rm=rm,
1074 rm_compression=rm_compression, **instruction_kind))
1075 return compressors
1076
1077
1078 def RmCompressors(rm, opcode_bits, memory_accessed=True, index_r8=False,
1079 rm_compression='register', register_write='ignore'):
1080 """Returns a list of rm compressors for a given set of parameters.
1081
1082 Args:
1083 rm: rm operand kind (see REGISTERS array)
1084 memory_accessed: True if instruction accesses memory
1085 index_r8: True if REX.X bit in the instruction set to 1
1086 rm_compression: three-state selector
1087 'register' - instruction supports register-only rm parameter
1088 'memory' - instruction supports memory-only rm parameter
1089 'register_or_memory' - instruction supports all kinds of rm parameters
1090 register_write: three-state selector
1091 'sandbox' - instruction can be used to produce "restricted register"
1092 'protect' - instruction can damage output, protect "special registers"
1093 'ignore' - instruction does not affect it's operands (e.g. test) or
1094 is used with non-GP registers (X87, MMX, XMM, etc)
1095
1096 Returns:
1097 List of compressors
1098 """
1099 start_rm = REGISTERS[rm][0]
1100 end_rm = REGISTERS[rm][-1]
1101 # Exclude r15 from the interval, if instruction can write to rm.
1102 if end_rm.startswith('r15') and register_write != 'ignore':
1103 end_rm = REGISTERS[rm][-2]
1104 byte_mark = 'XX/' + str(opcode_bits)
1105 # Prepare substitution texts
1106 if rm_compression == 'register':
1107 subst_rm = '[%{}..%{}]'.format(start_rm, end_rm)
1108 elif rm_compression == 'register_or_memory':
1109 subst_rm = '[%{}..%{} or memory]'.format(start_rm, end_rm)
1110 else:
1111 assert rm_compression == 'memory'
1112 subst_rm = '[memory]'
1113 base_r8 = rm in r8.values()
1114 if rm_compression == 'register':
1115 regex, regex_rm = RegisterRegex(rm, opcode_bits)
1116 else: # memory and register_or_memory
1117 regex, regex_rm = MemoryRegex(base_r8, index_r8, opcode_bits)
1118 # Regex here matches everything AFTER the ModR/M (+ SIB) bytes.
1119 regex += '(?: 00)*'
1120 # 2 spaces separate byte code from instruction name.
1121 regex += ' '
1122 # Optional "lock" prefix.
1123 regex += '(?:lock )?'
1124 # Instruction name.
1125 regex += r'\w* '
1126 # Immediate or implicit operand that can precede rm argument.
1127 regex += r'(?:\$0x0,|%cl,)?'
1128 # Register or memory access
1129 regex += regex_rm
1130 # Skip everything after that
1131 regex += '.*'
1132 regex_output_rr = None
1133 subst_output_rr = None
1134 subst = (byte_mark, subst_rm)
1135 if options.bitness == 32:
1136 subst += ('', )
1137 else:
1138 if register_write == 'sandbox':
1139 assert rm in ('eax', 'r8d')
1140 # Instruction can be sandboxing one and yet produce output_rr == None
1141 # This looks strange, but it's perfectly logical because we can't ever
1142 # sandbox memory!
1143 if rm_compression == 'register':
1144 regex_output_rr = '%' + rm
1145 if rm_compression != 'memory':
1146 subst_output_rr = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'
1147 if memory_accessed and rm_compression != 'register':
1148 regex += '; input_rr=(%r8d)' if index_r8 else '; input_rr=(%eax)'
1149 subst_input_rr = '[%r8d..%r15d]' if index_r8 else '[%eax..%edi]'
1150 else:
1151 regex += '; input_rr=(any_nonspecial)'
1152 subst_input_rr = 'any_nonspecial'
1153 regex += '; output_rr=({})'.format(regex_output_rr)
1154 subst = subst + (subst_input_rr, subst_output_rr, '')
1155 regex += '()'
1156 replacement = ()
1157 if rm_compression != 'memory':
1158 replacement += ModRMRegisterReplacements(
1159 rm=rm, opcode_bits=opcode_bits, register_write=register_write)
1160 if rm_compression != 'register':
1161 replacement += ModRMMemoryReplacements(
1162 base_r8=base_r8, index_r8=index_r8, opcode_bits=opcode_bits,
1163 memory_accessed=memory_accessed, register_write=register_write)
1164 return Compressor(regex, subst, replacement)
1165
1166
1167 def AllRmCompressors():
1168 """ Return list of all RM (with reg as opcode extension) compressors. """
1169 compressors = []
1170
1171 for rm in RegisterKinds():
1172 for opcode_bits in xrange(8):
1173 instruction_kinds = [
1174 # The most basic form
1175 {}
1176 ]
1177 # In 64 bit mode we have many different types of instructions depending
1178 # on whether we are accessing memory or whether we are writing to
1179 # registers.
1180 if options.bitness == 64:
1181 # No memory access (e.g. prefetch)
1182 instruction_kinds.append(
1183 {'memory_accessed':False})
1184 # More forms in 64 bit case.
1185 if rm in ('eax', 'r8d'):
1186 # Zero-extending version.
1187 instruction_kinds.append(
1188 {'register_write':'sandbox'})
1189 # Still more forms for 64 bit case (reg to rm).
1190 if rm in ('al', 'spl', 'ax', 'eax', 'rax',
1191 'r8b', 'r8w', 'r8d', 'r8'):
1192 # Dangerous instructions.
1193 instruction_kinds.append(
1194 {'register_write':'protect'})
1195 for instruction_kind in instruction_kinds:
1196 for rm_compression in ('register', 'memory', 'register_or_memory'):
1197 if rm_compression == 'register':
1198 # Register-to-register instructions can not access memory anyway
1199 # which means special compressors are not needed.
1200 if 'memory_accessed' in instruction_kind:
1201 continue
1202 elif options.bitness == 64:
1203 # We generate 2 variants of compressors with memory operands to
1204 # enumerate 2 possible values of index_r8 property. Register only
1205 # instructions have only one version since they don't use index.
1206 compressors.append(RmCompressors(
1207 rm=rm, opcode_bits=opcode_bits, index_r8=True,
1208 rm_compression=rm_compression, **instruction_kind))
1209 # Create "normal" compressor now
1210 compressors.append(RmCompressors(
1211 rm=rm, opcode_bits=opcode_bits,
1212 rm_compression=rm_compression, **instruction_kind))
1213 return compressors
1214
1215
1216 def AllOpcodeCompressors():
1217 """ Return "register in opcode" compressors. """
1218 compressors = []
1219
1220 for reg in RegisterKinds() + ('st(0)',):
1221 for opcode in xrange(8):
1222 for text1, text2, nibble in (
1223 ('[0..7]', '[8..f]', xrange(8)),
1224 ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)),
1225 ('[0..6]', '[8..e]', xrange(7))
1226 ):
1227 start_reg = REGISTERS[reg][0]
1228 end_reg = REGISTERS[reg][-1 if 7 in nibble else -2]
1229 subst_regs = '[%{}..%{}]'.format(start_reg, end_reg)
1230 # Note that we use instruction with 1st register, not 0th register
1231 # here to avoid ambiguity when opcode is 0x00
1232 compressors.append(Compressor(
1233 r'.*?[0-9a-fA-F](1)(?: 00)*'
1234 r' \w* (?:\$0x0,|%ax,|%st,)?'
1235 r'(%' + REGISTERS_RE[reg][1] + ').*',
1236 (text1, subst_regs),
1237 [('{:x}'.format(n), '%' + REGISTERS[reg][n])
1238 for n in nibble]))
1239 compressors.append(Compressor(
1240 r'.*?[0-9a-fA-F](8)(?: 00)*'
1241 r' \w* (?:\$0x0,|%ax,|%st,)?'
1242 r'(%' + REGISTERS_RE[reg][0] + ').*',
1243 (text2, subst_regs),
1244 [('{:x}'.format(n + 8), '%' + REGISTERS[reg][n])
1245 for n in nibble]))
1246 # Another version for 64 bit case
1247 if options.bitness == 64 and reg in ('eax', 'r8d'):
1248 compressors.append(Compressor(
1249 r'.*?[0-9a-fA-F](1)(?: 00)*'
1250 r' \w* (?:\$0x0,|%ax,|%st,)?'
1251 r'(%' + REGISTERS_RE[reg][1] + ').*'
1252 r'output_rr=(%'+ REGISTERS_RE[reg][1] + ').*',
1253 (text1, subst_regs, subst_regs),
1254 [['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2
1255 for n in nibble]))
1256 compressors.append(Compressor(
1257 r'.*?[0-9a-fA-F](8)(?: 00)*'
1258 r' \w* (?:\$0x0,|%ax,|%st,)?'
1259 r'(%' + REGISTERS_RE[reg][0] + ').*'
1260 r'output_rr=(%'+ REGISTERS_RE[reg][0] + ').*',
1261 (text2, subst_regs, subst_regs),
1262 [['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2
1263 for n in nibble]))
1264 return compressors
1265
1266
1267 def AllMemoryNonMemoryCompressors():
1268 """ Return memory/nonmemory compressors. """
1269
1270 compressors = []
1271
1272 if options.bitness == 32:
1273 letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', ''))
1274 else:
1275 letters_and_registers = (
1276 ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'),
1277 ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'),
1278 ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'),
1279 ('q', 'rax', 'eax'), ('q', 'r8', 'r8d')
1280 )
1281 for letter, reg, out_reg in letters_and_registers:
1282 start_reg = REGISTERS[reg][0]
1283 if reg[0:2] == 'r8':
halyavin 2013/12/10 08:03:34 REGISTERS[reg][-1].startswith('r15')
khim 2013/12/10 21:06:39 This code is removed compretely.
1284 end_regs = (REGISTERS[reg][-2], REGISTERS[reg][-1])
1285 else:
1286 end_regs = (REGISTERS[reg][-1], )
1287 for end_reg in end_regs:
1288 all_regs = '[%{}..%{}]'.format(start_reg, end_reg)
1289 regs_mark = '[%{}..%{} or memory]'.format(start_reg, end_reg)
1290 if options.bitness == 64:
1291 start_out = REGISTERS[out_reg][0]
1292 end_out = REGISTERS[out_reg][-1 if out_reg[0:2] != 'r8' else -2]
halyavin 2013/12/10 08:03:34 Use end_out.startswith('r15') instead.
khim 2013/12/10 21:06:39 This code is removed completely.
1293 out_regs = '[%{}..%{}]'.format(start_out, end_out)
1294 for notes in ('', ' # rm to reg', ' # reg to rm'):
1295 compressors.append(Compressor(
1296 r'.* \w*(' + letter + r') .*(\[memory]).*()',
1297 ('[{}]?'.format(letter), regs_mark, ''),
1298 ((letter, '[memory]', ''), ('', all_regs, notes))))
1299 if options.bitness == 64:
1300 for index_reg in ('eax', 'r8d'):
1301 start_index = REGISTERS[index_reg][0]
1302 end_index = REGISTERS[index_reg][-1]
1303 index_regs = '[%{}..%{}]'.format(start_index, end_index)
1304 for output_rrs in ((None, out_regs),
1305 (out_regs, None),
1306 (None, None)):
1307 compressors.append(Compressor(
1308 r'.* \w*(' + letter + r') .*(\[memory]).*; '
1309 r'input_rr=(\[%[a-z0-9]*..%[a-z0-9]*\]); '
1310 r'output_rr=(\[%[a-z0-9]*..%[a-z0-9]*\]|None)()',
1311 ('[{}]?'.format(letter), regs_mark, index_regs,
1312 output_rrs[1] if output_rrs[0] is None else output_rrs[0],
1313 ''),
1314 ((letter, '[memory]', index_regs, output_rrs[0], ''),
1315 ('', all_regs, 'any_nonspecial', output_rrs[1], notes))))
1316 return compressors
1317
1318
1319 def RexCompressor(rm, rmR15, reg, regR15, rexw, input_rr, output_rr, rm_to_reg):
1320 """ Return REX compressor (or nothing) for a given set of paramenters.
1321
1322 Args:
1323 rm: rm operand kind (see REGISTERS array) or None if rm is not used
1324 reg: reg operand kind (see REGISTERS array) or None if reg is not used
1325 rmR15: True if R15 register is included
1326 regR15: True if R15 register is included
1327 rexw: True if REX.W should be set
1328 input_rr: True if input_rr is used
1329 output_rr: true if output_rr is used
1330 rm_to_reg: True if direction is rm to reg
1331 """
1332
1333 # reg and rm can be of three different possible intervals
1334 # start_reg/rm to end_reg/rm (e.g. from %al to %bh)
1335 # start_reg/rm to end_reg0/rm0 (e.g from %al to %dil)
1336 # start_reg8/rm8 to end_reg8/rm8 (e.g. from %r8b to %r15b)
1337 # First form can be observed if there are not REX, second form
1338 # if REX.R/REX.B is not set and third form is where it's set.
1339 if reg:
1340 start_reg = REGISTERS[reg][0]
1341 start_reg8 = REGISTERS[r8[reg]][0]
1342 end_reg = REGISTERS[reg][-1]
1343 end_reg0 = 'dil' if reg == 'al' else end_reg
1344 end_reg8 = REGISTERS[r8[reg]][-1 if regR15 else -2]
1345 if rexw:
1346 regex_reg = r'\[(%' + start_reg + r'\.\.%' + end_reg0 + ')]'
1347 else:
1348 regex_reg = r'\[(%' + start_reg + r'\.\.%' + end_reg + ')]'
1349 if rm:
1350 start_rm = REGISTERS[rm][0]
1351 start_rm8 = REGISTERS[r8[rm]][0]
1352 end_rm = REGISTERS[rm][-1]
1353 end_rm0 = 'dil' if rm == 'al' else end_rm
1354 end_rm8 = REGISTERS[r8[rm]][-1 if rmR15 else -2]
1355 if rexw:
1356 regex_rm = (r'\[(%' + start_rm + r'\.\.%' + end_rm0 + ')'
1357 r'(?: or memory)?]')
1358 else:
1359 regex_rm = (r'\[(%' + start_rm + r'\.\.%' + end_rm + ')'
1360 r'(?: or memory)?]')
1361
1362 # Legacy prefixes
1363 regex = '.*:(?: 26| 2e| 36| 3e| 64| 65| 66| 67| f0| f2| f3)*'
1364 # REX
1365 regex += '( 48).*' if rexw else '( 40|).*'
1366 # Replacement text
1367 replacement_tuple = (' [REX:48..4f]' if rexw else ' [REX:40..47]?', )
1368 if reg: replacement_regs = '%{}..%{}'.format(start_reg, end_reg8)
1369 if rm: replacement_rms = '%{}..%{}'.format(start_rm, end_rm8)
1370 # Instruction arguments
1371 if not reg and not rm:
1372 pass
1373 elif not reg and rm:
1374 regex += regex_rm + '.*'
1375 replacement_tuple += (replacement_rms, )
1376 elif reg and not rm:
1377 regex += regex_reg + '.*'
1378 replacement_tuple += (replacement_regs, )
1379 elif rm_to_reg:
1380 regex += regex_rm + ',' + regex_reg + '.*'
1381 replacement_tuple += (replacement_rms, replacement_regs)
1382 else:
1383 regex += regex_reg + ',' + regex_rm + '.*'
1384 replacement_tuple += (replacement_regs, replacement_rms)
1385 # Input and output restricted registers
1386 if input_rr:
1387 regex += r'input_rr=\[(%eax\.\.%edi)].*'
1388 replacement_tuple += ('%eax..%r15d', )
1389 if output_rr:
1390 regex += r'output_rr=\[(%eax\.\.%edi)].*'
1391 replacement_tuple += ('%eax..%r14d', )
1392 # Replacement cases
1393 replacement_tuples = ()
1394 for byte in (range(0x48, 0x50) if rexw else range(0x40, 0x48) + [0]):
1395 replacement_case = (' {:02x}'.format(byte) if byte else '', )
1396 if rm:
1397 if byte & 0x1:
1398 replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8)
1399 elif byte:
1400 replacement_rms = '%{}..%{}'.format(start_rm, end_rm0)
1401 else:
1402 replacement_rms = '%{}..%{}'.format(start_rm, end_rm)
1403 if byte & 0x2:
1404 replacement_index = '%r8d..%r15d'
1405 else:
1406 replacement_index = '%eax..%edi'
1407 if reg:
1408 if byte & 0x4:
1409 replacement_regs = '%{}..%{}'.format(start_reg8, end_reg8)
1410 elif byte:
1411 replacement_regs = '%{}..%{}'.format(start_reg, end_reg0)
1412 else:
1413 replacement_regs = '%{}..%{}'.format(start_reg, end_reg)
1414 if not reg and not rm:
1415 pass
1416 elif not reg and rm:
1417 replacement_case += (replacement_rms, )
1418 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'
1419 elif reg and not rm:
1420 replacement_case += (replacement_regs, )
1421 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'
1422 elif rm_to_reg:
1423 replacement_case += (replacement_rms, replacement_regs)
1424 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'
1425 else:
1426 replacement_case += (replacement_regs, replacement_rms)
1427 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'
1428 if input_rr: replacement_case += (replacement_index, )
1429 if output_rr: replacement_case += (final_rr, )
1430 replacement_tuples += (replacement_case, )
1431 return Compressor(regex, replacement_tuple, replacement_tuples)
1432
1433
1434 def AllRexCompressors():
1435 """ Return "REX" compressors which combine different REX prefixes. """
1436
1437 if options.bitness != 64:
1438 return []
1439
1440 compressors = []
1441
1442 # First pretty complex set of compressors to combine versions of REX with
1443 # three lowest bits in different states.
1444 register_kind_pairs = (
1445 ( None, None),
1446 ( 'al', 'al'), ( 'al', None), (None, 'al'),
1447 ( 'ax', 'al'), ( 'al', 'ax'),
1448 ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'),
1449 ( 'eax', 'al'), ( 'al', 'eax'),
1450 ( 'eax', 'ax'), ( 'ax', 'eax'),
1451 ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'),
1452 ( 'rax', 'al'), ( 'al', 'rax'),
1453 ( 'rax', 'ax'), ( 'ax', 'rax'),
1454 ( 'rax', 'eax'), ( 'eax', 'rax'),
1455 ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'),
1456 ( 'eax', 'mm0'), ( 'mm0', 'eax'),
1457 ( 'rax', 'mm0'), ( 'mm0', 'rax'),
1458 ( 'mm0', 'eax'), ( 'eax', 'mm0'),
1459 ( 'mm0', 'rax'), ( 'rax', 'mm0'),
1460 ( 'eax', 'xmm0'),
1461 ( 'rax', 'xmm0'),
1462 ('xmm0', 'eax'),
1463 ('xmm0', 'rax'),
1464 ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'),
1465 ( 'mm0', 'xmm0'),
1466 ('xmm0', 'mm0'),
1467 ('xmm0', 'xmm0'),
1468 ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'),
1469 ('ymm0', 'xmm0'),
1470 ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'),
1471 )
1472 for reg, rm in register_kind_pairs:
1473 for regR15 in (True, False):
1474 for rmR15 in (True, False):
1475 for rexw in (True, False):
1476 for input_rr in (True, False):
1477 for output_rr in (True, False):
1478 for rm_to_reg in (True, False):
1479 # These combinations will just produce useless duplicates
1480 if not reg and not regR15: continue
1481 if not rm and not rmR15: continue
1482 if not reg and not rm and (output_rr or rm_to_reg): continue
1483 compressors.append(RexCompressor(
1484 rm=rm, rmR15=rmR15, reg=reg, regR15=regR15,
1485 rexw=rexw, input_rr=input_rr, output_rr=output_rr,
1486 rm_to_reg=rm_to_reg))
1487
1488 # This is pretty simple compressor to combine two lines with different REX.W
1489 # bits (only if they are otherwise identical).
1490 compressors.append(Compressor(
1491 r'.*(\[REX:40\.\.47]\?).*', ('[REX:40..4f]?', ),
1492 (('[REX:40..47]?', ), ('[REX:48..4f]', ))))
1493 return compressors
1494
1495
1496 def AllSpecialCompressors():
1497 """ Return all "special" compressors. """
1498
1499 compressors = []
1500
1501 # Special compressors: will handle some cosmetic issues.
1502 #
1503 # SETxx ignores reg field and thus are described as many separate instructions
1504 compressors.append(Compressor(
1505 '.*0f 9[0-9a-fA-F] XX(/[0-7]) set.*', ('', ),
1506 [('/' + str(i), ) for i in range(8)]))
1507 # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual
1508 if options.bitness == 32:
1509 compressors.append(Compressor(
1510 '.*(XX/1) bswap.*ax.*', ('c[8..f]', ), (('XX/1', ), )))
1511 else:
1512 compressors.append(Compressor(
1513 '.*(XX/1) bswap.*ax.*', ('c[89abef]', ), (('XX/1', ), )))
1514 compressors.append(Compressor(
1515 '.*(XX/1) bswap.*r8.*', ('c[8..e]', ), (('XX/1', ), )))
1516 # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG
1517 if options.bitness == 64:
1518 compressors.append(Compressor(
1519 r'.* (?:cmpxchg|xadd|xchg).*%al\.\.%bh[^#]*()$',
1520 (' # write to both', ),
1521 (('', ), )))
1522 # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all
1523 # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.
1524 # Without this rule these ands comprise 2/3 of the whole output!
1525 if options.bitness == 32:
1526 compressors.append(Compressor(
1527 r'.*83 (e0 01 and \$0x1,%eax)()',
1528 ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', ' # special and'),
1529 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]),
1530 '')
1531 for i in range(0x01, 0x100) for r in range(8)] +
1532 [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', '')]))
1533 else:
1534 for reg in ('eax', 'r8d'):
1535 start_reg = REGISTERS[reg][0]
1536 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]
1537 for index_reg in ('eax', 'r8d'):
1538 start_index = REGISTERS[index_reg][0]
1539 end_index = REGISTERS[index_reg][-1]
1540 compressors.append(Compressor(
1541 r'.*83 (e0 01 and \$0x1,%' + reg + ').*'
1542 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()',
1543 ('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,
1544 end_reg),
1545 '[%{}..%{}]'.format(start_index, end_index),
1546 '[%{}..%{}]'.format(start_reg, end_reg),
1547 ' # special and'),
1548 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]),
1549 'any_nonspecial', '%' + REGISTERS[reg][r], '')
1550 for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] +
1551 [('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,
1552 end_reg),
1553 '[%{}..%{}]'.format(start_index, end_index),
1554 '[%{}..%{}]'.format(start_reg, end_reg),
1555 '')]))
1556
1557 # "and $e0" and similar are used to align %rsp. All negative values are
1558 # accepted by validator and there are 127 of these.
1559 # Consolidate them into one line.
1560 if options.bitness == 64:
1561 compressors.append(Compressor(
1562 r'.*(?:81|83) (?:e4|e5) (80) (?:00 00 00 |) and \$0x(80),%r[bs]p.*()',
1563 ('[80..ff]', '[80..ff]', ' # alignment and'),
1564 [('{:02x}'.format(i), '{:02x}'.format(i), '')
1565 for i in range(0x80, 0x100)]))
1566 return compressors
1567
1568
1569 def PrepareCompressors():
1570 """ Return list of all compressors sorted from bigger ones to smaller ones """
1571
1572 return tuple(sorted(
1573 AllRegToRmCompressors() +
1574 AllRmCompressors() +
1575 AllOpcodeCompressors() +
1576 AllMemoryNonMemoryCompressors() +
1577 AllRexCompressors() +
1578 AllSpecialCompressors(),
1579 key=lambda compressor: -len(compressor.replacements)))
1580
1581
1582 def ShowProgress(rule, instruction):
1583 if rule not in ShowProgress.rules_shown:
1584 first_print = True
1585 ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown)
1586 else:
1587 first_print = False
1588 print >> sys.stderr, '-------- Compressed --------'
1589 print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule]
1590 print >> sys.stderr, '--------'
1591 compressor = compressors[rule]
1592 match = compressor.regex.match(instruction)
1593 assert match
1594 format_str = CompressionTemplate(instruction, match, '{{{}}}')
1595 replacements = sorted(format_str.format(*replacement)
1596 for replacement in compressor.replacements)
1597 if len(compressor.replacements) <= 4 or first_print:
1598 for replacement in replacements:
1599 print >> sys.stderr, replacement
1600 else:
1601 print >> sys.stderr, replacements[0]
1602 print >> sys.stderr, '...'
1603 print >> sys.stderr, replacements[-1]
1604 print >> sys.stderr, '--------'
1605 print >> sys.stderr, 'Compressed', format_str.format(*compressor.subst)
1606 ShowProgress.rules_shown = {}
1607
1608
1609 def main():
1610 # We are keeping these global to share state graph and compressors
1611 # between workers spawned by multiprocess. Passing them every time is slow.
1612 global options, xml_file
1613 global dfa
1614 global worker_validator
1615 options, xml_file = ParseOptions()
1616 dfa = dfa_parser.ParseXml(xml_file)
1617 worker_validator = validator.Validator(
1618 validator_dll=options.validator_dll,
1619 decoder_dll=options.decoder_dll)
1620 global compressors
1621 compressors = PrepareCompressors()
1622
1623 assert dfa.initial_state.is_accepting
1624 assert not dfa.initial_state.any_byte
1625
1626 print >> sys.stderr, len(dfa.states), 'states'
1627
1628 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)
1629
1630 # We can't just write 'num_suffixes[dfa.initial_state]' because
1631 # initial state is accepting.
1632 total_instructions = sum(
1633 num_suffixes[t.to_state]
1634 for t in dfa.initial_state.forward_transitions.values())
1635 print >> sys.stderr, total_instructions, 'regular instructions total'
1636
1637 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)
1638 print >> sys.stderr, len(tasks), 'tasks'
1639
1640 pool = multiprocessing.Pool()
1641
1642 results = pool.imap(Worker, tasks)
1643
1644 total = 0
1645 num_valid = 0
1646 full_output = set()
1647 for prefix, count, valid_count, output, trace in results:
1648 print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))
1649 total += count
1650 num_valid += valid_count
1651 full_output |= output
1652 for rule, instruction in trace:
1653 ShowProgress(rule, instruction)
1654 for instruction in sorted(Compressed(full_output,
1655 compressors,
1656 ShowProgress)):
1657 print instruction
1658
1659 print >> sys.stderr, total, 'instructions were processed'
1660 print >> sys.stderr, num_valid, 'valid instructions'
1661
1662
1663 if __name__ == '__main__':
1664 main()
OLDNEW
« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698