Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(653)

Side by Side Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """
6 Traverse the validator's DFA, collect all "normal" instruction and then
7 compress output. Note: "anybyte fields" (immediates and displacements)
8 are always filled with zeros. Otherwise processing of sextillions (sic!)
9 of possibilities will take too long.
10
11 Each rule is applied only when all variants are accepted by validator.
12 The following compression rules are present:
13
14 1. Compress ModR/M (+SIB & displacement).
15 Instruction: 00 00 add %al,(%rax)
16 ...
17 Instruction: 00 ff add %bh,%bh
18 becomes
19 Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]
20
21 1a. Compress ModR/M (+SIB & displacement) memory-only.
22 Instruction: f0 01 00 lock add %eax,(%eax)
23 ...
24 Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)
25 becomes
26 Instruction: f0 01 XX lock add [%eax..edi],[memory]
27
28 1b. Compress ModR/M register only.
29 Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax
30 ...
31 Instruction: 66 0f 50 ff movmskpd %xmm7,%edi
32 becomes
33 Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]
34
35 2. Compress ModR/M (+SIB & displacement) with opcode extension.
36 Instruction: 0f 90 00 seto (%eax)
37 ...
38 Instruction: 0f 90 c7 seto %bh
39 becomes
40 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]
41
42 2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.
43 Instruction: f0 ff 00 lock incl (%eax)
44 ...
45 Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)
46 becomes
47 Instruction: f0 ff XX/1 lock decl [memory]
48
49 2b. Compress ModR/M register-only with opcode extension.
50 Instruction: 0f 71 d0 00 psrlw $0x0,%mm0
51 ...
52 Instruction: 0f 71 d7 00 psrlw $0x0,%mm7
53 becomes
54 Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7]
55
56 3. Compress register-in-opcode.
57 Instruction: d9 c0 fld %st(0)
58 ...
59 Instruction: d9 c7 fld %st(7)
60 becomes
61 Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]
62
63 Only applies if all possible register accesses are accepted by validator.
64
65 4. Special compressor for "set" instruction.
66 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]
67 ...
68 Instruction: 0f 90 XX/7 seto [%al..%bh or memory]
69 becomes
70 Instruction: 0f 90 XX seto [%al..%bh or memory]
71 """
72
73 import itertools
74 import multiprocessing
75 import optparse
76 import os
77 import re
78 import subprocess
79 import sys
80 import tempfile
81 import traceback
82
83 import dfa_parser
84 import dfa_traversal
85 import validator
86
87
88 # Register names in 'natual' order (as defined by IA32/x86-64 ABI)
89 #
90 # X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field
91 # in opcode, ModR/M, and/or SIB bytes to encode them.
92 #
93 # In most cases there are 16 registers of a given kind and two such groups,
94 # but there are couple of exceptions:
95 # 1. There are 20 8-bit registers and three groups (two of them overlap)
96 # 2. There are eight X87 and MMX registers thus two groups are identical
97 #
98 # We use typical register from a group to name the whole group. Most groups
99 # use first register, but 'spl' group uses fifth register because it's first
100 # four registers are the same as 'al' group. We use mnemonic name 'mmalt'
101 # to represent the "evil mirror" of the 'mm0' group.
102 REGISTERS = {
103 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],
104 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],
105 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],
106 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],
107 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],
108 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],
109 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],
110 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],
111 'r8': [ 'r{}'.format(N) for N in range(8,16) ],
112 'mm0': [ 'mm{}'.format(N) for N in range(8) ],
113 'mmalt': [ 'mm{}'.format(N) for N in range(8) ],
114 'st(0)': [ 'st({})'.format(N) for N in range(8) ],
115 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],
116 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],
117 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],
118 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]
119 }
120
121
122 NOP = 0x90
123
124
125 def PadToBundleSize(bytes):
126 assert len(bytes) <= validator.BUNDLE_SIZE
127 return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))
128
129
130 # In x86-64 mode we have so-called 'restricted register' which is used to
131 # tie two groups together. Some instructions require particular value to
132 # be stored in this variable, while some accept any non-special restricted
133 # register (%ebp and %esp are special because they can only be accepted by
134 # a few 'special' instructions).
135 #
136 # You can find more details in the "NaCl SFI model on x86-64 systems" manual.
137 #
138 # We try to feed all possible 'restricted registers' into validator and then
139 # classify the instruction using this map. If set of acceptable 'restricted
140 # registers' is not here, then it's an error in validator.
141 ACCEPTABLE_X86_64_INPUTS = {
142 0x00001: 'input_rr=%eax',
143 0x00002: 'input_rr=%ecx',
144 0x00004: 'input_rr=%edx',
145 0x00008: 'input_rr=%ebx',
146 0x00010: 'input_rr=%esp',
147 0x00020: 'input_rr=%ebp',
148 0x00040: 'input_rr=%esi',
149 0x00080: 'input_rr=%edi',
150 0x00100: 'input_rr=%r8d',
151 0x00200: 'input_rr=%r9d',
152 0x00400: 'input_rr=%r10d',
153 0x00800: 'input_rr=%r11d',
154 0x01000: 'input_rr=%r12d',
155 0x02000: 'input_rr=%r13d',
156 0x04000: 'input_rr=%r14d',
157 0x08000: 'input_rr=%r15d',
158 0x1ffcf: 'input_rr=any_nonspecial'
159 }
160
161 # Any instruction must produce either None or one of fifteen registers as an
162 # output 'restricted register' value. 'r15d' is NOT acceptable as an output.
163 ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple(
164 '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1])
165
166
167 def ValidateInstruction(instruction, validator_inst):
168 bundle = ''.join(map(chr, PadToBundleSize(instruction)))
169 if options.bitness == 32:
170 result = validator_inst.ValidateChunk(bundle, bitness=32)
171 return result
172 else:
173 valid_inputs = 0
174 known_final_rr = None
175 output_rr = None
176 bit_position = 1
177 # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array
178 # above.
179 for initial_rr in validator.ALL_REGISTERS + [None]:
halyavin 2013/11/07 10:54:38 I think that using index and "valid_inputs |= 1 <<
khim 2013/11/07 17:05:31 Done.
180 valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(
181 bundle, len(instruction), initial_rr)
182 if valid:
183 # final_rr should not depend on input_rr
184 assert valid_inputs == 0 or known_final_rr == final_rr
185 valid_inputs |= bit_position
186 known_final_rr = final_rr
187 bit_position += bit_position
188 # If nothing is accepted then instruction is not valid. Easy and simple.
189 if valid_inputs == 0: return False
190 # If returned value in unacceptable we'll get IndexError here and this
191 # test will fail
192 if known_final_rr is not None:
193 output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr]
194 # If collected valid_inputs are unacceptable we'll get KeyError here and
195 # this test will fail
196 return [ACCEPTABLE_X86_64_INPUTS[valid_inputs],
halyavin 2013/11/07 10:54:38 we wanted to use true/false + array.
khim 2013/11/07 17:05:31 Done.
197 'output_rr={}'.format(output_rr)]
198
199
200 class WorkerState(object):
201 def __init__(self, prefix, validator):
202 self.total_instructions = 0
203 self.num_valid = 0
204 self.validator = validator
205 self.output = set()
206 self.trace = []
207
208
209 def ReceiveInstruction(self, bytes):
210 self.total_instructions += 1
211 result = ValidateInstruction(bytes, self.validator)
212 if result is not False:
213 self.num_valid += 1
214 dis = self.validator.DisassembleChunk(
215 ''.join(map(chr, bytes)),
216 bitness=options.bitness)
217 for line_nr in xrange(len(dis)):
218 dis[line_nr] = str(dis[line_nr])
219 assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '
220 assert dis[line_nr][-1:] == ')'
221 dis[line_nr] = dis[line_nr][17:-1]
222 # If %rip is involved then comment will be different depending on the
223 # instruction length. Eliminate it.
224 if '(%rip)' in dis[0]:
225 dis[0] = re.sub(' # 0x[ ]*[0-9a-fA-F]*', '', dis[0])
226 # Zero displacements are represented as 0x0 for all instructions except
227 # jumps where they disassembled as non-zero due to %eip/%rip-relative
228 # addressing. We replace this displacement with %eip/%rip to simplify
229 # compression.
230 if ' 0x' in dis[0] and ' 0x0' not in dis[0]:
231 for bytes in xrange(1, 16):
232 dis[0] = re.sub(
233 '(' + '[0-9a-fA-F][0-9a-fA-F] ' * bytes + ' .* )'
234 '0x' + str(bytes) + '(.*)',
halyavin 2013/11/07 10:54:38 it should be hex(bytes) + '(.*)', (note that hex e
khim 2013/11/07 17:05:31 Done.
235 '\\1%eip\\2' if options.bitness == 32 else '\\1%rip\\2',
236 dis[0]);
237 dis[0] = 'Instruction: ' + dis[0]
238 if result is not True:
239 dis += result
240 self.output.add('; '.join(dis))
241
242
243 def RecordTrace(self, compressor_nr, instruction):
244 self.trace.append((compressor_nr, instruction))
245
246
247 # Compressor has three slots: regex (which picks apart given instruction),
248 # subst (which is used to denote compressed version) and replacements (which
249 # are used to generate set of instructions from a given code).
250 #
251 # Example compressor:
252 # regex = '.*?[0-9a-fA-F]([0-7]) \\w* (%e(?:[abcd]x|[sb]p|[sd]i)).*()'
253 # subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode')
254 # replacements = ((0, '%eax'), (1, '%ecx'), (2, '%edx'), (3, '%ebx')
255 # (4, '%esp'), (5, '%ebp'), (6, '%esi'), (7, '%edi'))
256 #
257 # When faced with instriuction '40 inc %eax' it will capture the following
258 # pieces of said instruction: '4[0] inc [%eax]'.
259 #
260 # Then it will produce the following eight instructions:
261 # '40 inc %eax'
262 # '41 inc %ecx'
263 # '42 inc %edx'
264 # '43 inc %ebx'
265 # '44 inc %esp'
266 # '45 inc %ebp'
267 # '46 inc %esi'
268 # '47 inc %edi'
269 #
270 # If all these instructions can be found in a set of instructions then
271 # compressor will remove them from said set and will insert one replacement
272 # "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'.
273 #
274 # Note that last group is only used in the replacement. It's used to grab marks
275 # added by previous compressors and to replace them with a new mark.
276 class Compressor(object):
277 __slots__ = [
278 'regex',
279 'subst',
280 'replacements'
281 ]
282
283 def __init__(self, regex, subst, replacements=None):
284 self.regex = re.compile(regex)
285 self.subst = subst
286 self.replacements = [] if replacements is None else replacements
287
288
289 def CompressionTemplate(instruction, match, mark):
halyavin 2013/11/07 10:54:38 """ Replace all match groups with the mark. """
khim 2013/11/07 17:05:31 Done.
290 pos = 0
291 format_str = ''
292 for group in range(1, len(match.groups())):
293 format_str += instruction[pos:match.start(group)] + mark
294 pos = match.end(group)
295 return format_str + instruction[pos:match.start(len(match.groups()))]
296
297
298 def CompressOneMatch(instructions, compressors,
halyavin 2013/11/07 10:54:38 compressors parameter is not used.
khim 2013/11/07 17:05:31 Done.
299 instruction, match, compressor):
300 format_str = CompressionTemplate(instruction, match, '{}')
301 subset = set()
302 for replacement in compressor.replacements:
303 replacement_str = format_str.format(*replacement)
304 if not replacement_str in instructions: break
halyavin 2013/11/07 10:54:38 Use return instead of break. Then you can remove e
khim 2013/11/07 17:05:31 Done.
305 subset.add(replacement_str)
306 else:
307 instructions -= subset
308 instructions.add((format_str + '{}').format(*compressor.subst))
309 return (True, instructions)
310 return (False, instructions)
311
312
313 def CompressOneInstruction(instructions, compressors, split, cache):
314 sorted_instructions = (sorted(i for i in instructions if i > split) +
315 sorted(i for i in instructions if i < split))
316 for instruction in sorted_instructions:
317 try:
318 compressors_list = cache[instruction]
319 for compressor_nr, match, compressor in compressors_list:
320 result, instructions = CompressOneMatch(
321 instructions, compressors, instruction, match, compressor)
322 if result:
323 return (instructions, compressor_nr, instruction)
324 except KeyError:
halyavin 2013/11/07 10:54:38 Use simple conditional logic instead of exceptions
khim 2013/11/07 17:05:31 Done.
325 compressors_list = []
326 for compressor_nr, compressor in enumerate(compressors):
327 match = compressor.regex.match(instruction)
328 if match:
329 compressors_list.append((compressor_nr, match, compressor))
330 result, instructions = CompressOneMatch(
331 instructions, compressors, instruction, match, compressor)
332 if result:
333 return (instructions, compressor_nr, instruction)
334 else:
halyavin 2013/11/07 10:54:38 You don't need else, because you return from the f
khim 2013/11/07 17:05:31 Done.
335 cache[instruction] = compressors_list
336 return (instructions, False, False)
337
338
339 def Compressed(instructions, compressors, show_progress):
340 split = ''
341 cache = {}
342 while True:
343 instructions, rule, split = CompressOneInstruction(
344 instructions, compressors, split, cache)
345 if rule is False: break
346 show_progress(rule, split)
347 return instructions
348
349
350 def Worker((prefix, state_index)):
351 worker_state = WorkerState(prefix, worker_validator)
352
353 try:
354 dfa_traversal.TraverseTree(
355 dfa.states[state_index],
356 final_callback=worker_state.ReceiveInstruction,
357 prefix=prefix,
358 anyfield=0)
359 if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions
360 worker_state.output = Compressed(set(worker_state.output),
361 compressors,
362 worker_state.RecordTrace)
363 except Exception as e:
364 traceback.print_exc() # because multiprocessing imap swallows traceback
365 raise
366
367 return (
368 prefix,
369 worker_state.total_instructions,
370 worker_state.num_valid,
371 worker_state.output,
372 worker_state.trace)
373
374
375 def ParseOptions():
376 parser = optparse.OptionParser(usage='%prog [options] xmlfile')
377
378 parser.add_option('--bitness',
379 choices=['32', '64'],
380 help='The subarchitecture: 32 or 64')
381 parser.add_option('--validator_dll',
382 help='Path to librdfa_validator_dll')
383 parser.add_option('--decoder_dll',
384 help='Path to librdfa_decoder_dll')
385
386 options, args = parser.parse_args()
387 options.bitness = int(options.bitness)
388
389 if len(args) != 1:
390 parser.error('specify one xml file')
391
392 (xml_file, ) = args
393
394 return options, xml_file
395
396
397 # Version suitable for use in regular expressions
398 REGISTERS_RE = REGISTERS.copy()
399 REGISTERS_RE['st(0)'] = [ 'st\\({}\\)'.format(N) for N in range(8) ]
400 REGISTERS_RE['st\\(0\\)'] = REGISTERS_RE['st(0)']
401
402 # Index names in 'natual' order (as defined by IA32/x86-64 ABI)
403 INDEXES = {
404 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],
405 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],
406 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]
407 }
408 # Register which can not be used as base in 64-bit mode in all incarnations
409 X86_64_BASE_REGISTERS = set([
410 '%spl', '%bpl', '%r15b', '%sp', '%bp', '%r15w',
411 '%esp', '%ebp', '%r15d', '%rsp', '%rbp', '%r15',
412 '%rip'
halyavin 2013/11/07 11:08:49 I don't understand the space-alignment algorithm i
khim 2013/11/07 17:05:31 The idea was to make sure different kinds of the s
413 ])
414
415 def AddModRMCompressor(regex, subst, subst_register, subst_memory,
416 reg=None, rm=None, writes_to='rm', start_byte=0,
halyavin 2013/11/07 11:08:49 Bad indentation.
khim 2013/11/07 17:05:31 Done.
417 index_r8=False, memory_accessed=True,
418 register_write='ignore'):
419 """Adds three compressors to the list of compressors:
420 main_compressors (register <-> register or memory instructions)
421 register_compressors (register <-> register instructions)
422 memory_compressors (regsiter <-> memory instructions)
423
424 Args:
425 regex: regular expressions for the compressor
halyavin 2013/11/07 11:08:49 regular expression
khim 2013/11/07 17:05:31 Done.
426 subst: replacement for register <-> register or memory instructions
427 subst_register: replacement for register <-> register instructions
428 subst_memory: replacement for regsiter <-> memory instructions
429 reg: reg operand kind (see REGISTERS array) or None
halyavin 2013/11/07 12:07:28 What None means? Does it mean that reg operand is
khim 2013/11/07 17:05:31 Done.
430 rm: rm operand kind (see REGISTERS array)
halyavin 2013/11/07 12:07:28 Can rm equal to None too?
khim 2013/11/07 17:05:31 No, it can not be None. Fixed.
431 writes_to: three-state selector
432 'reg' - instruction uses rm as source, reg as destination
433 'rm' - instruction uses reg as source, rm as destination
434 'both' - instruction writes to both reg and rm
435 start_byte: first valid byte ModR/M byte (used when reg is None)
halyavin 2013/11/07 11:08:49 "first valid ModR/M byte". Is there multiple ModR/
khim 2013/11/07 17:05:31 Replaced with opcode_bits
436 memory_accessed: True if instruction accesses memory
437 register_write: three-state selector
438 'sandbox' - instruction can be used to produce "restricted register"
439 'protect' - instruction can damage output, protect "special registers"
440 'ignore' - instruction does not affect it's operands (e.g. test) or
441 is used with non-GP registers (X87, MMX, XMM, etc)
442 Internal:
443 index_r8: must be called in False position (used to create two compressors
halyavin 2013/11/07 11:08:49 must not be set by external users
halyavin 2013/11/07 11:08:49 If index_r8 is internal, we should make it the las
khim 2013/11/07 17:05:31 Done.
khim 2013/11/07 17:05:31 Done.
444 in 64-bit mode with index == %rax..%rdi or index == %r8..%r14)
445 Returns:
446 None
447 """
448
halyavin 2013/11/07 11:59:38 # Expand RR_NOTES section in regex.
khim 2013/11/07 17:05:31 Done.
449 if options.bitness == 32:
450 base = 'eax'
451 index = 'eax'
452 expanded_regex = re.sub('{RR_NOTES}', '', regex)
453 else:
454 base = 'r8' if '8' in rm or rm == 'mmalt' else 'rax'
halyavin 2013/11/07 11:59:38 Strange construction. Should base be set to 'r8' i
khim 2013/11/07 17:05:31 Done.
455 index = 'r8' if index_r8 else 'rax'
456 input = 'r8d' if index_r8 else 'eax'
457 if register_write == 'sandbox':
458 output_regs = reg if writes_to == 'reg' else rm
459 assert output_regs in ('eax', 'r8d')
460 expanded_regex = re.sub('{RR_NOTES}', '; input_rr=((?:%{'+ input +
halyavin 2013/11/07 11:59:38 It is hard to read the replacement string with thi
khim 2013/11/07 17:05:31 Done.
461 '}|any_nonspecial)); output_rr=(%{' + output_regs + '}|None)', regex)
462 else:
463 expanded_regex = re.sub('{RR_NOTES}', '; input_rr=((?:%{' + input +
464 '}|any_nonspecial)); output_rr=(None)', regex)
465 if 'RM_BYTE' in regex:
466 address_regex = '(?:0x0|(?:0x0)?\\((?:%{' + base + '})?\\))'
halyavin 2013/11/07 11:59:38 Are "()" and "0x0()" valid addresses? I think we d
khim 2013/11/07 17:05:31 Done.
467 else:
468 address_regex = (
469 '(?:0x0|(?:0x0)?\\((?:%{' + base + '})?(?:,(?:%{' + index + '}))?'
halyavin 2013/11/07 11:59:38 I would split the line into 5 to make it easier to
khim 2013/11/07 17:05:31 Done.
470 '(?:,(?:1|2|4|8))?\\))')
471
472 # We need to process either modrm or reg
473 assert rm is not None or reg is not None
474 # If both modrm and reg are given then ModR/M
halyavin 2013/11/07 12:07:28 then ModR/M what? Is this comment related to asser
khim 2013/11/07 17:05:31 Done.
475 assert reg is None or start_byte == 0
476 # Replace RM_BYTE placeholders.
477 # Handle only cases without displacement.
478 expanded_regex = re.sub('{RM_BYTE}', '[0-9a-fA-F][0-9a-fA-F]', expanded_regex)
479 expanded_regex = re.sub('{RM_BYTE/0}', '[048cC][0-7]', expanded_regex)
480 expanded_regex = re.sub('{RM_BYTE/1}', '[048cC][89a-fA-F]', expanded_regex)
481 expanded_regex = re.sub('{RM_BYTE/2}', '[159dD][0-7]', expanded_regex)
482 expanded_regex = re.sub('{RM_BYTE/3}', '[159dD][89a-fA-F]', expanded_regex)
483 expanded_regex = re.sub('{RM_BYTE/4}', '[26aAeE][0-7]', expanded_regex)
484 expanded_regex = re.sub('{RM_BYTE/5}', '[26aAeE][89a-fA-F]', expanded_regex)
485 expanded_regex = re.sub('{RM_BYTE/6}', '[37bBfF][0-7]', expanded_regex)
486 expanded_regex = re.sub('{RM_BYTE/7}', '[37bBfF][89a-fA-F]', expanded_regex)
487 register_regex = expanded_regex
488 # Replace RM_SIB_BYTES placeholders.
489 # Handle only cases without displacement.
490 expanded_regex = re.sub(
491 '{RM_SIB_BYTES}', '[0-b][4c] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
492 expanded_regex = re.sub(
493 '{RM_SIB_BYTES/0}', '[048]4 [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
494 expanded_regex = re.sub(
495 '{RM_SIB_BYTES/1}', '[048][cC] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
496 expanded_regex = re.sub(
497 '{RM_SIB_BYTES/2}', '[159]4 [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
498 expanded_regex = re.sub(
499 '{RM_SIB_BYTES/3}', '[159][cC] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
500 expanded_regex = re.sub(
501 '{RM_SIB_BYTES/4}', '[26aA]4 [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
502 expanded_regex = re.sub(
503 '{RM_SIB_BYTES/5}', '[26aA][cC] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
504 expanded_regex = re.sub(
505 '{RM_SIB_BYTES/6}', '[37bB]4 [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
506 expanded_regex = re.sub(
507 '{RM_SIB_BYTES/7}', '[37bB][cC] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)
508 register_regex = re.sub(
509 '{RM_SIB_BYTES}', '[c-fC-F][0-9a-fA-F]', register_regex)
510 register_regex = re.sub('{RM_SIB_BYTES/0}', '[cC][0-7]', register_regex)
511 register_regex = re.sub('{RM_SIB_BYTES/1}', '[cC][8-9a-fA-F]', register_regex)
512 register_regex = re.sub('{RM_SIB_BYTES/2}', '[dD][0-7]', register_regex)
513 register_regex = re.sub('{RM_SIB_BYTES/3}', '[dD][8-9a-fA-F]', register_regex)
514 register_regex = re.sub('{RM_SIB_BYTES/4}', '[eE][0-7]', register_regex)
515 register_regex = re.sub('{RM_SIB_BYTES/5}', '[eE][8-9a-fA-F]', register_regex)
516 register_regex = re.sub('{RM_SIB_BYTES/6}', '[fF][0-7]', register_regex)
517 register_regex = re.sub('{RM_SIB_BYTES/7}', '[fF][8-9a-fA-F]', register_regex)
518 # Replace register placeholders
519 for register, value in REGISTERS_RE.iteritems():
520 expanded_regex = re.sub('{%' + register + '}',
521 '(?:%' + '|%'.join(value) + '|' + address_regex +')', expanded_regex)
522 register_regex = re.sub('{%' + register + '}',
523 '(?:%' + '|%'.join(value) +')', register_regex)
524 for register, value in REGISTERS_RE.iteritems():
525 expanded_regex = re.sub('{' + register + '}',
526 '(?:' + '|'.join(value) + ')', expanded_regex)
527 register_regex = re.sub('{' + register + '}',
528 '(?:' + '|'.join(value) + ')', register_regex)
529 # Add index_rr and output_rr fields if we are dealing with 64-bit case
530 if options.bitness == 32:
531 subst_fixed = subst
532 subst_register_fixed = subst_register
533 subst_memory_fixed = subst_memory
534 else:
535 if memory_accessed:
536 input_note = '[%eax..%edi]' if index == 'rax' else '[%r8d..%r15d]'
537 else:
538 input_note = 'any_nonspecial'
539 if register_write == 'sandbox':
540 output_note = '[%eax..%edi]' if output_regs == 'eax' else '[%r8d..%r14d]'
541 else:
542 output_note = None
543 subst_fixed = subst[0:-1] + (input_note, output_note) + subst[-1:]
544 subst_register_fixed = subst_register[0:-1] + (
545 'any_nonspecial', output_note) + subst_register[-1:]
546 subst_memory_fixed = subst_memory[0:-1] + (input_note,
547 output_note) + subst_memory[-1:]
548 # If we already have replacements in cache then wejust reuse them.
549 output_key = (
550 reg, rm, writes_to, start_byte, index_r8, memory_accessed, register_write)
551 if output_key in AddModRMCompressor.replacements:
552 replacements = AddModRMCompressor.replacements[output_key]
553 main_compressors.append(
554 Compressor(expanded_regex, subst_fixed, replacements[0]))
555 register_compressors.append(
556 Compressor(register_regex, subst_register_fixed, replacements[1]))
557 memory_compressors.append(
558 Compressor(expanded_regex, subst_memory_fixed, replacements[2]))
559 if options.bitness == 64 and not index_r8:
560 AddModRMCompressor(
561 regex, subst, subst_register, subst_memory,
562 reg=reg, rm=rm, writes_to=writes_to, start_byte=start_byte,
563 index_r8=True, memory_accessed=memory_accessed,
564 register_write=register_write)
565 return
566 # It can be memory only instruction, register only one or both
567 main_compressor = Compressor(expanded_regex, subst_fixed)
568 register_compressor = Compressor(register_regex, subst_register_fixed)
569 memory_compressor = Compressor(expanded_regex, subst_memory_fixed)
570
571 # Generation time! Use reversed ranges to check unlikely cases first.
572 if reg is None:
573 # reg field is used as opcode extension
574 byte_range = [byte
575 for byte in range(0xff, -1, -1)
576 if byte & 0x38 == start_byte]
577 else:
578 byte_range = range(0xff, -1, -1)
579
580 for modrm in byte_range:
581 # Parse ModRM
582 mod_field = (modrm & 0xc0) >> 6
583 reg_field = (modrm & 0x38) >> 3
584 rm_field = (modrm & 0x07)
585 if reg is not None:
586 reg_text = '%' + REGISTERS[reg][reg_field]
587 # If mod == 3 then it's register-to-register instruction
588 if mod_field == 3:
589 bytes = '{:02x}'.format(modrm)
590 rm_text = '%' + REGISTERS[rm][rm_field]
591 replacement = [bytes]
592 if reg is None:
593 replacement.append(rm_text)
594 else:
595 replacement.append(rm_text if writes_to == 'reg' else reg_text)
596 replacement.append(reg_text if writes_to == 'reg' else rm_text)
597 if options.bitness == 64:
598 replacement.append('any_nonspecial')
599 output = reg_text if writes_to == 'reg' else rm_text
600 replacement.append(output if register_write == 'sandbox' else None)
601 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:
602 continue
603 if register_write == 'sandbox' and output == '%r15d':
604 continue
605 if writes_to == 'both' and reg_text in X86_64_BASE_REGISTERS:
606 continue
607 replacement = tuple(replacement)
608 main_compressor.replacements.append(replacement)
609 register_compressor.replacements.append(replacement)
610 # If mod != 3 then it's register-to-memory instruction
611 else:
612 # If RM field != %rsp then there are no index
613 if rm_field != validator.REG_RSP:
614 base_text = '%' + REGISTERS[base][rm_field]
615 # If RM field == %rbp and MOD fiels is zero then it's absolute address
616 if mod_field == 0 and rm_field == validator.REG_RBP:
617 bytes = '{:02x} 00 00 00 00'.format(modrm)
618 rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'
619 base_text = '%rip'
620 # Memory access with just a base register
621 elif mod_field == 0:
622 bytes = '{:02x}'.format(modrm)
623 rm_text = '({})'.format(base_text)
624 # Memory access with base and 8bit offset
625 elif mod_field == 1:
626 bytes = '{:02x} 00'.format(modrm)
627 rm_text = '0x0({})'.format(base_text)
628 # Memory access with base and 32bit offset
629 else: # mod_field == 2
630 bytes = '{:02x} 00 00 00 00'.format(modrm)
631 rm_text = '0x0({})'.format(base_text)
632 replacement = [bytes]
633 if reg is None:
634 replacement.append(rm_text)
635 else:
636 replacement.append(rm_text if writes_to == 'reg' else reg_text)
637 replacement.append(reg_text if writes_to == 'reg' else rm_text)
638 if options.bitness == 64:
639 replacement.append('any_nonspecial')
640 output = reg_text if writes_to == 'reg' else None
641 replacement.append(output if register_write == 'sandbox' else None)
642 if memory_accessed and base_text not in X86_64_BASE_REGISTERS:
643 continue
644 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:
645 continue
646 if register_write == 'sandbox' and output == '%r15d':
647 continue
648 if writes_to == 'both' and reg_text in X86_64_BASE_REGISTERS:
649 continue
650 replacement = tuple(replacement)
651 main_compressor.replacements.append(replacement)
652 memory_compressor.replacements.append(replacement)
653 else:
654 # If RM field == %rsp then we have SIB byte
655 for sib in xrange(0x100):
656 scale_field = (sib & 0xc0) >> 6
657 index_field = (sib & 0x38) >> 3
658 base_field = (sib & 0x07)
659 index_text = '%' + INDEXES[index][index_field]
660 base_text = '%' + REGISTERS[base][base_field]
661 scale_text = pow(2, scale_field)
662 # If BASE is %rbp and MOD == 0 then index with 32bit offset is used
663 if mod_field == 0 and base_field == validator.REG_RBP:
664 bytes = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)
665 if (options.bitness == 32 or
666 index_field != validator.REG_RSP or
667 scale_field != 0 or index[0:2] == 'r8'):
668 rm_text = '0x0(,{},{})'.format(index_text, scale_text)
669 else:
670 rm_text = '0x0'
671 base_text = ''
672 # Memory access with base and index (no offset)
673 elif mod_field == 0:
674 bytes = '{:02x} {:02x}'.format(modrm, sib)
675 rm_text = '({},{},{})'.format(base_text, index_text, scale_text)
676 # Memory access with base, index and 8bit offset
677 elif mod_field == 1:
678 bytes = '{:02x} {:02x} 00'.format(modrm, sib)
679 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)
680 # Memory access with base, index and 32bit offset
681 elif mod_field == 2:
682 bytes = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)
683 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)
684 # Pretty-printing of access via %rsp
685 if (scale_field == 0 and index != 'r8' and
686 base_field == validator.REG_RSP and
687 index_field == validator.REG_RSP):
688 #index_text = 'any_nonspecial'
689 rm_text = ('0x0({})' if mod_field else '({})').format(base_text)
690 if index_text == "%riz":
691 index_text = 'any_nonspecial'
692 replacement = [bytes]
693 if reg is None:
694 replacement.append(rm_text)
695 else:
696 replacement.append(rm_text if writes_to == 'reg' else reg_text)
697 replacement.append(reg_text if writes_to == 'reg' else rm_text)
698 if options.bitness == 64:
699 if not memory_accessed or index_text == 'any_nonspecial':
700 replacement.append('any_nonspecial')
701 else:
702 replacement.append('%' + REGISTERS[input][index_field])
703 # Currently xchg can not used used for sandboxing
704 output = reg_text if writes_to == 'reg' else None
705 replacement.append(output if register_write == 'sandbox' else None)
706 if memory_accessed:
707 if base_text not in X86_64_BASE_REGISTERS: continue
708 if index_text in X86_64_BASE_REGISTERS - set(['%r15']): continue
709 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:
710 continue
711 if register_write == 'sandbox' and output == '%r15d':
712 continue
713 if (writes_to == 'both' and
714 reg_text in X86_64_BASE_REGISTERS): continue
715 replacement = tuple(replacement)
716 main_compressor.replacements.append(replacement)
717 memory_compressor.replacements.append(replacement)
718
719 assert len(main_compressor.replacements) > 1
720 assert len(register_compressor.replacements) > 1
721 assert len(memory_compressor.replacements) > 1
722 main_compressor.replacements = tuple(main_compressor.replacements)
723 register_compressor.replacements = tuple(register_compressor.replacements)
724 memory_compressor.replacements = tuple(memory_compressor.replacements)
725 main_compressors.append(main_compressor)
726 register_compressors.append(register_compressor)
727 memory_compressors.append(memory_compressor)
728 AddModRMCompressor.replacements[output_key] = (
729 main_compressor.replacements,
730 register_compressor.replacements,
731 memory_compressor.replacements
732 )
733 if options.bitness == 64 and not index_r8:
734 AddModRMCompressor(
735 regex, subst, subst_register, subst_memory,
736 reg=reg, rm=rm, writes_to=writes_to, start_byte=start_byte,
737 index_r8=True, memory_accessed=memory_accessed,
738 register_write=register_write)
739 # Replacements cache.
740 AddModRMCompressor.replacements = {}
741
742
743 def PrepareCompressors():
744 global compressors
745 global main_compressors
746 global register_compressors
747 global memory_compressors
748
749 # "Larger" compressors should be tried first, then "smaller" ones.
750 main_compressors = []
751 register_compressors = []
752 memory_compressors = []
753 extra_compressors = []
754
755 if options.bitness == 32:
756 register_kinds = ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')
757 register_kind_pairs = (
758 ( 'al', 'al'),
759 ( 'ax', 'al'),
760 ( 'ax', 'ax'),
761 ( 'eax', 'al'),
762 ( 'eax', 'ax'),
763 ( 'eax', 'eax'),
764 ( 'eax', 'mm0'),
765 ( 'mm0', 'eax'),
766 ( 'eax', 'xmm0'),
767 ('xmm0', 'eax'),
768 ( 'mm0', 'mm0'),
769 ( 'mm0', 'xmm0'),
770 ('xmm0', 'mm0'),
771 ('xmm0', 'xmm0'),
772 ('xmm0', 'ymm0'),
773 ('ymm0', 'xmm0'),
774 ('ymm0', 'ymm0')
775 )
776 else:
777 register_kinds = ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0',
778 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8')
779 register_kind_pairs = (
780 ( 'al', 'al'),
781 ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),
782 ( 'ax', 'al'),
783 ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),
784 ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),
785 ( 'eax', 'al'),
786 ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),
787 ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),
788 ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),
789 ( 'rax', 'al'),
790 ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),
791 ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'),
792 ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'),
793 ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),
794 ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'),
795 ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'),
796 ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'),
797 ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'),
798 ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),
799 ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),
800 ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),
801 ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),
802 ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'),
803 ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'),
804 ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'),
805 ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),
806 ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),
807 ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),
808 ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')
809 )
810
811 # Largest compressors: both reg and rm fields are used
812 for reg, rm in register_kind_pairs:
813 start_reg = REGISTERS[reg][0]
814 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]
815 start_rm = REGISTERS[rm][0]
816 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]
817 # First instruction uses just ModR/M byte in 32bit mode but both
818 # ModR/M in 64bit mode. Both approaches will work in both cases,
819 # this is just an optimization to avoid needless work.
820 if options.bitness == 32:
821 bytes = '({RM_BYTE})'
822 else:
823 bytes = '({RM_SIB_BYTES})'
824 for extra_bytes in ('', ' 00', ' 00 00', ' 00 00 00 00'):
825 # Lea in 64 bit mode is truly unique instruction for now
826 if options.bitness == 64 and reg in ('eax', 'r8d', 'rax', 'r8'):
827 AddModRMCompressor(
828 '.*?' + bytes + extra_bytes +
829 ' (?:lock )?\\w* (?:\\$0x0,|\\$0x0,\\$0x0,|%cl,|%xmm0,)?'
830 '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',
831 ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),
832 '[%{}..%{}]'.format(start_reg, end_reg), ' # lea'),
833 ('XX', '[%{}..%{}]'.format(start_rm, end_rm),
834 '[%{}..%{}]'.format(start_reg, end_reg), ' # rm to reg; lea'),
835 ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ' # lea'),
836 reg=reg, rm=rm, writes_to='reg', memory_accessed=False,
837 register_write='sandbox' if reg in ('eax', 'r8d') else 'protect')
838 # Normal instructions with two operands (rm to reg).
839 AddModRMCompressor(
840 '.*?' + bytes + extra_bytes +
841 ' (?:lock )?\\w* (?:\\$0x0,|\\$0x0,\\$0x0,|%cl,|%xmm0,)?'
842 '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',
843 ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),
844 '[%{}..%{}]'.format(start_reg, end_reg), ''),
845 ('XX', '[%{}..%{}]'.format(start_rm, end_rm),
846 '[%{}..%{}]'.format(start_reg, end_reg), ' # rm to reg'),
847 ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ''),
848 reg=reg, rm=rm, writes_to='reg')
849 # Normal instructions with two operands (reg to rm).
850 AddModRMCompressor(
851 '.*?' + bytes + extra_bytes +
852 ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'
853 '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',
854 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
855 '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),
856 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
857 '[%{}..%{}]'.format(start_rm, end_rm), ' # reg to rm'),
858 ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]', ''),
859 reg=reg, rm=rm, writes_to='rm')
860 # There are few more forms in 64 bit case (rm to reg).
861 if options.bitness == 64 and reg in ('eax', 'r8d'):
862 # Zero-extending version.
863 AddModRMCompressor(
864 '.*?' + bytes + extra_bytes +
865 ' (?:lock )?\\w* (?:\\$0x0,|\\$0x0,\\$0x0,|%cl,|%xmm0,)?'
866 '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',
867 ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),
868 '[%{}..%{}]'.format(start_reg, end_reg), ''),
869 ('XX', '[%{}..%{}]'.format(start_rm, end_rm),
870 '[%{}..%{}]'.format(start_reg, end_reg), ' # rm to reg'),
871 ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ''),
872 reg=reg, rm=rm, writes_to='reg', register_write='sandbox')
873 # More forms in 64 bit case (reg to rm).
874 if options.bitness == 64 and rm in ('eax', 'r8d'):
875 # Zero-extending version.
876 AddModRMCompressor(
877 '.*?' + bytes + extra_bytes +
878 ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'
879 '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',
880 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
881 '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),
882 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
883 '[%{}..%{}]'.format(start_rm, end_rm), ' # reg to rm'),
884 ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]', ''),
885 reg=reg, rm=rm, writes_to='rm', register_write='sandbox')
886 # Zero-extending xchg/xadd.
887 AddModRMCompressor(
888 '.*?' + bytes + extra_bytes +
889 ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'
890 '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',
891 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
892 '[%{}..%{} or memory]'.format(start_rm, end_rm),
893 ' # write to both'),
894 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
895 '[%{}..%{}]'.format(start_rm, end_rm),
896 ' # reg to rm; write to both'),
897 ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]',
898 ' # write to both'),
899 reg=reg, rm=rm, writes_to='both', register_write='sandbox')
900 # Still more forms for 64 bit case (rm to reg).
901 if options.bitness == 64 and reg in ('al', 'spl', 'ax', 'eax', 'rax',
902 'r8b', 'r8w', 'r8d', 'r8'):
903 # Dangerous instructions (rm to reg).
904 AddModRMCompressor(
905 '.*?' + bytes + extra_bytes +
906 ' (?:lock )?\\w* (?:\\$0x0,|\\$0x0,\\$0x0,|%cl,|%xmm0,)?'
907 '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',
908 ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),
909 '[%{}..%{}]'.format(start_reg, end_reg), ''),
910 ('XX', '[%{}..%{}]'.format(start_rm, end_rm),
911 '[%{}..%{}]'.format(start_reg, end_reg), ' # rm to reg'),
912 ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ''),
913 reg=reg, rm=rm, writes_to='reg', register_write='protect')
914 # Still more forms for 64 bit case (reg to rm).
915 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',
916 'r8b', 'r8w', 'r8d', 'r8'):
917 # Dangerous instructions (reg to rm).
918 AddModRMCompressor(
919 '.*?' + bytes + extra_bytes +
920 ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'
921 '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',
922 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
923 '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),
924 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
925 '[%{}..%{}]'.format(start_rm, end_rm), ' # reg to rm'),
926 ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]', ''),
927 reg=reg, rm=rm, writes_to='rm', register_write='protect')
928 # Dangerous xchg/xadd.
929 AddModRMCompressor(
930 '.*?' + bytes + extra_bytes +
931 ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'
932 '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',
933 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
934 '[%{}..%{} or memory]'.format(start_rm, end_rm),
935 ' # write to both'),
936 ('XX', '[%{}..%{}]'.format(start_reg, end_reg),
937 '[%{}..%{}]'.format(start_rm, end_rm),
938 ' # reg to rm; write to both'),
939 ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]',
940 ' # write to both'),
941 reg=reg, rm=rm, writes_to='both', register_write='protect')
942 # 3DNow! instructions. Additional byte is opcode extension.
943 AddModRMCompressor(
944 '.*?' + bytes + ' [0-9a-fA-F][0-9a-fA-F] \\w* '
945 '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',
946 ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),
947 '[%{}..%{}]'.format(start_reg, end_reg), ''),
948 ('XX', '[%{}..%{}]'.format(start_rm, end_rm),
949 '[%{}..%{}]'.format(start_reg, end_reg), ' # reg to rm'),
950 ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ''),
951 reg=reg, rm=rm, writes_to='reg')
952
953 # Smaller compressors: only rm field is used.
954 for rm in register_kinds:
955 start_rm = REGISTERS[rm][0]
956 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]
957 for opcode in xrange(8):
958 XX_byte_mark = 'XX/' + str(opcode)
959 start_byte = opcode * 8
960 # First instruction uses just ModR/M byte in 32bit mode but both
961 # ModR/M in 64bit mode. Both approaches will work in both cases,
962 # this is just an optimization to avoid needless work.
963 if options.bitness == 32:
964 bytes = '({RM_BYTE/' + str(opcode) + '})'
965 else:
966 bytes = '({RM_SIB_BYTES/' + str(opcode) + '})'
967 if options.bitness == 64:
968 # No memory access (e.g. prefetch)
969 AddModRMCompressor(
970 '.*?' + bytes + ' ?\\w* (?:\\$0x0,|%cl,)?({%' + rm + '}).*'
971 '{RR_NOTES}()',
972 (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),
973 (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm), ''),
974 (XX_byte_mark, '[memory]', ''),
975 reg=None, rm=rm, memory_accessed=False, start_byte=start_byte)
976 for extra_bytes in ('', ' 00', ' 00 00', ' 00 00 00 00'):
977 # Part of opcode is encoded in ModR/M
978 AddModRMCompressor(
979 '.*?' + bytes + extra_bytes +
980 ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'
981 '({%' + rm + '}).*{RR_NOTES}()',
982 (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),
983 (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm), ''),
984 (XX_byte_mark, '[memory]', ''),
985 reg=None, rm=rm, start_byte=start_byte)
986 # More forms in 64 bit case.
987 if options.bitness == 64 and rm in ('eax', 'r8d'):
988 # Zero-extending version.
989 AddModRMCompressor(
990 '.*?' + bytes + extra_bytes +
991 ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'
992 '({%' + rm + '}).*{RR_NOTES}()',
993 (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),
994 (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm), ''),
995 (XX_byte_mark, '[memory]', ''),
996 reg=None, rm=rm, start_byte=start_byte, register_write='sandbox')
997 # Still more forms for 64 bit case (reg to rm).
998 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',
999 'r8b', 'r8w', 'r8d', 'r8'):
1000 # Dangerous instructions.
1001 AddModRMCompressor(
1002 '.*?' + bytes + extra_bytes +
1003 ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'
1004 '({%' + rm + '}).*{RR_NOTES}()',
1005 (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),
1006 (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm), ''),
1007 (XX_byte_mark, '[memory]', ''),
1008 reg=None, rm=rm, start_byte=start_byte, register_write='protect')
1009
1010 # Even smaller compressors: only low 3 bits of opcode are used.
1011 for reg in register_kinds + ('st(0)',):
1012 start_reg = REGISTERS[reg][0]
1013 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]
1014 for opcode in xrange(8):
1015 for extra_bytes in ('', ' 00', ' 00 00', ' 00 00 00 00'):
1016 for text1, text2, nibble in (
1017 ('[0..7]', '[8..f]', xrange(8)),
1018 ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)),
1019 ('[0..6]', '[8..e]', xrange(7))
1020 ):
1021 # Operand is encoded in opcode
1022 extra_compressors.append(Compressor(
1023 '.*?[0-9a-fA-F]([0-7])' + extra_bytes +
1024 ' \\w* (?:\\$0x0,|%ax,|%st,)?'
1025 '(%(?:' + '|'.join(REGISTERS_RE[reg]) + ')).*()',
1026 (text1, '[%{}..%{}]'.format(start_reg, end_reg), ''),
1027 tuple(('{:x}'.format(n), '%' + REGISTERS[reg][n])
1028 for n in nibble)))
1029 extra_compressors.append(Compressor(
1030 '.*?[0-9a-fA-F]([89a-fA-F])' + extra_bytes +
1031 ' \\w* (?:\\$0x0,|%ax,|%st,)?'
1032 '(%(?:' + '|'.join(REGISTERS_RE[reg]) + ')).*()',
1033 (text2, '[%{}..%{}]'.format(start_reg, end_reg), ''),
1034 tuple(('{:x}'.format(n + 8), '%' + REGISTERS[reg][n])
1035 for n in nibble)))
1036 # Another version for 64 bit case
1037 if options.bitness == 64 and reg in ('eax', 'r8d'):
1038 # Operand is encoded in opcode and output
1039 extra_compressors.append(Compressor(
1040 '.*?[0-9a-fA-F]([0-7])' + extra_bytes +
1041 ' \\w* (?:\\$0x0,|%ax,|%st,)?'
1042 '(%(?:' + '|'.join(REGISTERS_RE[reg]) + ')).*'
1043 'output_rr=(%(?:'+ '|'.join(REGISTERS_RE[reg]) + ')).*()',
1044 tuple([text1] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +
1045 ['']),
1046 tuple(['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2
1047 for n in nibble)))
1048 extra_compressors.append(Compressor(
1049 '.*?[0-9a-fA-F]([89a-fA-F])' + extra_bytes +
1050 ' \\w* (?:\\$0x0,|%ax,|%st,)?'
1051 '(%(?:' + '|'.join(REGISTERS_RE[reg]) + ')).*'
1052 'output_rr=(%(?:'+ '|'.join(REGISTERS_RE[reg]) + ')).*()',
1053 tuple([text2] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +
1054 ['']),
1055 tuple(['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2
1056 for n in nibble)))
1057 compressors = (main_compressors + memory_compressors + register_compressors +
1058 extra_compressors)
1059
1060 # Special compressors: will handle some cosmetic issues.
1061 #
1062 # SETxx ignores reg field and thus are described as many separate instructions
1063 compressors.append(Compressor(
1064 '.*0f 9[0-9a-fA-F] XX(/[0-7]) set.*()', ('', ''),
1065 [('/' + str(i), ) for i in range(8)]))
1066 # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual
1067 if options.bitness == 32:
1068 compressors.append(Compressor(
1069 '.*(XX/1) bswap.*ax.*()', ('c[8..f]', ''), [('XX/1', )]))
1070 else:
1071 compressors.append(Compressor(
1072 '.*(XX/1) bswap.*ax.*()', ('c[89abef]', ''), [('XX/1', )]))
1073 compressors.append(Compressor(
1074 '.*(XX/1) bswap.*r8.*()', ('c[8..e]', ''), [('XX/1', )]))
1075 # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG
1076 if options.bitness == 64:
1077 compressors.append(Compressor(
1078 '.* (?:cmpxchg|xadd|xchg).*%al\\.\\.%bh[^#]*()$',
1079 (' # write to both', ), ((), )))
1080 # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all
1081 # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.
1082 # Without this rule these ands comprise 2/3 of the whole output!
1083 if options.bitness == 32:
1084 compressors.append(Compressor(
1085 '.*83 (e0 01 and \\$0x1,%eax)()',
1086 ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', ' # special and'),
1087 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]), )
1088 for i in range(0x01, 0x100) for r in range(8)] +
1089 [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', )]))
1090 else:
1091 for reg in ('eax', 'r8d'):
1092 start_reg = REGISTERS[reg][0]
1093 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]
1094 for index_reg in ('eax', 'r8d'):
1095 start_index = REGISTERS[index_reg][0]
1096 end_index = REGISTERS[index_reg][-1]
1097 compressors.append(Compressor(
1098 '.*83 (e0 01 and \\$0x1,%' + reg + ').*'
1099 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()',
1100 ('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,
1101 end_reg), '[%{}..%{}]'.format(start_index, end_index),
1102 '[%{}..%{}]'.format(start_reg, end_reg),
1103 ' # special and'),
1104 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]),
1105 'any_nonspecial', '%' + REGISTERS[reg][r])
1106 for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] +
1107 [('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,
1108 end_reg), '[%{}..%{}]'.format(start_index, end_index),
1109 '[%{}..%{}]'.format(start_reg, end_reg))]))
1110
1111 # "and $e0" and similar are used to align %rsp. All negative values are
1112 # accepted by validator and there are 127 of these.
1113 # Consolidate them into one line.
1114 if options.bitness == 64:
1115 compressors.append(Compressor(
1116 '.*(?:81|83) (?:e4|e5) (80) (?:00 00 00 |) and \\$0x(80),%r[bs]p.*()',
1117 ('[80..ff]', '[80..ff]', ' # alignment and'),
1118 [('{:02x}'.format(i), '{:02x}'.format(i)) for i in range(0x80, 0x100)]))
1119
1120 # Merge memory and non-memory access
1121 if options.bitness == 32:
1122 letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', ''))
1123 else:
1124 letters_and_registers = (
1125 ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'),
1126 ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'),
1127 ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'),
1128 ('q', 'rax', 'eax'), ('q', 'r8', 'r8d')
1129 )
1130 for letter, reg, out_reg in letters_and_registers:
1131 start_reg = REGISTERS[reg][0]
1132 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]
1133 all_regs = '[%{}..%{}]'.format(start_reg, end_reg)
1134 regs_mark = '[%{}..%{} or memory]'.format(start_reg, end_reg)
1135 if options.bitness == 64:
1136 start_out = REGISTERS[out_reg][0]
1137 end_out = REGISTERS[out_reg][-1 if out_reg[0:2] != 'r8' else -2]
1138 out_regs = '[%{}..%{}]'.format(start_out, end_out)
1139 for notes in ('', ' # rm to reg', ' # reg to rm'):
1140 compressors.append(Compressor(
1141 '.* \\w*(' + letter + ') .*(\\[memory]).*()()',
1142 ('[{}]?'.format(letter), regs_mark, '', ''),
1143 ((letter, '[memory]', ''), ('', all_regs, notes))))
1144 if options.bitness == 64:
1145 for index_reg in ('eax', 'r8d'):
1146 start_index = REGISTERS[index_reg][0]
1147 end_index = REGISTERS[index_reg][-1]
1148 index_regs = '[%{}..%{}]'.format(start_index, end_index)
1149 for output_rrs in ((None, out_regs), (out_regs, None), (None, None)):
1150 compressors.append(Compressor(
1151 '.* \\w*(' + letter + ') .*(\\[memory]).*; '
1152 'input_rr=(\\[%[a-z0-9]*..%[a-z0-9]*\\]); '
1153 'output_rr=(\\[%[a-z0-9]*..%[a-z0-9]*\\]|None)()()',
1154 ('[{}]?'.format(letter), regs_mark, index_regs,
1155 output_rrs[0] if output_rrs[0] is not None else output_rrs[1],
1156 '', ''),
1157 ((letter, '[memory]', index_regs, output_rrs[0], ''),
1158 ('', all_regs, 'any_nonspecial', output_rrs[1], notes))))
1159
1160 # REX compressors
1161 if options.bitness == 64:
1162 # First pretty complex set of compressors to combine versions of REX with
1163 # three lowest bits in different states.
1164 register_kind_pairs = (
1165 ( None, None),
1166 ( 'al', 'al'), ( 'al', None), (None, 'al'),
1167 ( 'ax', 'al'), ( 'al', 'ax'),
1168 ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'),
1169 ( 'eax', 'al'), ( 'al', 'eax'),
1170 ( 'eax', 'ax'), ( 'ax', 'eax'),
1171 ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'),
1172 ( 'rax', 'al'), ( 'al', 'rax'),
1173 ( 'rax', 'ax'), ( 'ax', 'rax'),
1174 ( 'rax', 'eax'), ( 'eax', 'rax'),
1175 ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'),
1176 ( 'eax', 'mm0'), ( 'mm0', 'eax'),
1177 ( 'rax', 'mm0'), ( 'mm0', 'rax'),
1178 ( 'mm0', 'eax'), ( 'eax', 'mm0'),
1179 ( 'mm0', 'rax'), ( 'rax', 'mm0'),
1180 ( 'eax', 'xmm0'),
1181 ( 'rax', 'xmm0'),
1182 ('xmm0', 'eax'),
1183 ('xmm0', 'rax'),
1184 ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'),
1185 ( 'mm0', 'xmm0'),
1186 ('xmm0', 'mm0'),
1187 ('xmm0', 'xmm0'),
1188 ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'),
1189 ('ymm0', 'xmm0'),
1190 ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'),
1191 )
1192 r8 = {
1193 'al': 'r8b',
1194 'ax': 'r8w',
1195 'eax': 'r8d',
1196 'rax': 'r8',
1197 'mm0': 'mmalt',
1198 'xmm0': 'xmm8',
1199 'ymm0': 'ymm8'
1200 }
1201 for reg, rm in register_kind_pairs:
1202 for last_reg, last_rm in ((-1, -1), (-1, -2), (-2, -1), (-2, -2)):
1203 if reg:
1204 start_reg = REGISTERS[reg][0]
1205 start_reg8 = REGISTERS[r8[reg]][0]
1206 end_reg = REGISTERS[reg][-1]
1207 end_reg0 = 'dil' if reg == 'al' else end_reg
1208 end_reg8 = REGISTERS[r8[reg]][last_reg]
1209 reg_regex = '\\[(%' + start_reg + '\\.\\.%' + end_reg + ')]'
1210 reg_regex0 = '\\[(%' + start_reg + '\\.\\.%' + end_reg0 + ')]'
1211 elif last_reg == -2:
1212 continue
1213 if rm:
1214 start_rm = REGISTERS[rm][0]
1215 start_rm8 = REGISTERS[r8[rm]][0]
1216 end_rm = REGISTERS[rm][-1]
1217 end_rm0 = 'dil' if rm == 'al' else end_rm
1218 end_rm8 = REGISTERS[r8[rm]][last_rm]
1219 rm_regex = ('\\[(%' + start_rm + '\\.\\.%' + end_rm + ')'
1220 '(?: or memory)?]')
1221 rm_regex0 = ('\\[(%' + start_rm + '\\.\\.%' + end_rm0 + ')'
1222 '(?: or memory)?]')
1223 elif last_rm == -2:
1224 continue
1225 for rexw in (True, False):
1226 for input_rr in (True, False):
1227 for output_rr in (True, False) if reg or rm else (None, ):
1228 for rm_to_reg in (True, False) if reg and rm else (None, ):
1229 # Legacy prefixes
1230 regex = '.*:(?: 26| 2e| 36| 3e| 64| 65| 66| 67| f0| f2| f3)*'
1231 # REX
1232 regex += '( 48).*' if rexw else '( 40|).*'
1233 # Replacement text
1234 replacement_tuple = (
1235 ' [REX:48..4f]' if rexw else ' [REX:40..47]?', )
1236 if reg:
1237 replacement_regs = '%{}..%{}'.format(start_reg, end_reg8)
1238 if rm:
1239 replacement_rms = '%{}..%{}'.format(start_rm, end_rm8)
1240 # Instruction arguments
1241 if not reg and not rm:
1242 pass
1243 elif not reg and rm:
1244 if rexw:
1245 regex += rm_regex0 + '.*'
1246 else:
1247 regex += rm_regex + '.*'
1248 replacement_tuple += (replacement_rms, )
1249 elif reg and not rm:
1250 if rexw:
1251 regex += reg_regex0 + '.*'
1252 else:
1253 regex += reg_regex + '.*'
1254 replacement_tuple += (replacement_regs, )
1255 elif rm_to_reg:
1256 if rexw:
1257 regex += rm_regex0 + ',' + reg_regex0 + '.*'
1258 else:
1259 regex += rm_regex + ',' + reg_regex + '.*'
1260 replacement_tuple += (replacement_rms, replacement_regs)
1261 else:
1262 if rexw:
1263 regex += reg_regex0 + ',' + rm_regex0 + '.*'
1264 else:
1265 regex += reg_regex + ',' + rm_regex + '.*'
1266 replacement_tuple += (replacement_regs, replacement_rms)
1267 # Input and output restricted registers
1268 if input_rr:
1269 regex += 'input_rr=\\[(%eax\\.\\.%edi)].*'
1270 replacement_tuple += ('%eax..%r15d', )
1271 if output_rr:
1272 regex += 'output_rr=\\[(%eax\\.\\.%edi)].*'
1273 replacement_tuple += ('%eax..%r14d', )
1274 regex += '()'
1275 replacement_tuple += ('', )
1276 # Replacement cases
1277 replacement_tuples = ()
1278 for byte in (range(0x48, 0x50)
1279 if rexw
1280 else range(0x40, 0x48) + ['']):
1281 replacement_case = (
1282 ' {:02x}'.format(byte) if byte else byte, )
1283 if byte:
1284 if rm:
1285 if byte & 0x1:
1286 replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8)
1287 else:
1288 replacement_rms = '%{}..%{}'.format(start_rm, end_rm0)
1289 if byte & 0x2:
1290 replacement_index = '%r8d..%r15d'
1291 else:
1292 replacement_index = '%eax..%edi'
1293 if reg:
1294 if byte & 0x4:
1295 replacement_regs = '%{}..%{}'.format(start_reg8,
1296 end_reg8)
1297 else:
1298 replacement_regs = '%{}..%{}'.format(start_reg,
1299 end_reg0)
1300 else:
1301 if rm:
1302 replacement_rms = '%{}..%{}'.format(start_rm, end_rm)
1303 replacement_index = '%eax..%edi'
1304 if reg:
1305 replacement_regs = '%{}..%{}'.format(start_reg, end_reg)
1306 if not reg and not rm:
1307 pass
1308 elif not reg and rm:
1309 replacement_case += (replacement_rms, )
1310 if byte:
1311 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'
1312 else:
1313 final_rr = '%eax..%edi'
1314 elif reg and not rm:
1315 replacement_case += (replacement_regs, )
1316 if byte:
1317 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'
1318 else:
1319 final_rr = '%eax..%edi'
1320 elif rm_to_reg:
1321 replacement_case += (replacement_rms, replacement_regs)
1322 if byte:
1323 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'
1324 else:
1325 final_rr = '%eax..%edi'
1326 else:
1327 replacement_case += (replacement_regs, replacement_rms)
1328 if byte:
1329 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'
1330 else:
1331 final_rr = '%eax..%edi'
1332 if input_rr: replacement_case += (replacement_index, )
1333 if output_rr: replacement_case += (final_rr, )
1334 replacement_tuples += (replacement_case, )
1335 compressors.append(Compressor(
1336 regex, replacement_tuple, replacement_tuples))
1337 # This is pretty simple compressor to combine two lines with different REX.W
1338 # bits (only if they are otherwise identical).
1339 compressors.append(Compressor(
1340 '.*(\\[REX:40\\.\\.47]\\?).*()', ('[REX:40..4f]?', ''),
1341 (('[REX:40..47]?', ), ('[REX:48..4f]', ))))
1342
1343
1344 def ShowProgress(rule, instruction):
1345 if rule not in ShowProgress.rules_shown:
1346 first_print = True
1347 ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown)
1348 else:
1349 first_print = False
1350 print >> sys.stderr, '-------- Compressed --------'
1351 print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule]
1352 print >> sys.stderr, '--------'
1353 compressor = compressors[rule]
1354 match = compressor.regex.match(instruction)
1355 assert match
1356 format_str = CompressionTemplate(instruction, match, '{{{}}}')
1357 replacements = sorted(format_str.format(*replacement)
1358 for replacement in compressor.replacements)
1359 if len(compressor.replacements) <= 4 or first_print:
1360 for replacement in replacements:
1361 print >> sys.stderr, replacement
1362 else:
1363 print >> sys.stderr, replacements[0]
1364 print >> sys.stderr, "..."
1365 print >> sys.stderr, replacements[-1]
1366 print >> sys.stderr, '--------'
1367 print >> sys.stderr, 'Compressed', (
1368 format_str + '{{{}}}').format(*compressor.subst)
1369 ShowProgress.rules_shown = {}
1370
1371
1372 def main():
1373 # We are keeping these global to share state graph and compressors
1374 # between workers spawned by multiprocess. Passing them every time is slow.
1375 global options, xml_file
1376 global dfa
1377 global worker_validator
1378 options, xml_file = ParseOptions()
1379 dfa = dfa_parser.ParseXml(xml_file)
1380 worker_validator = validator.Validator(
1381 validator_dll=options.validator_dll,
1382 decoder_dll=options.decoder_dll)
1383 PrepareCompressors()
1384
1385 assert dfa.initial_state.is_accepting
1386 assert not dfa.initial_state.any_byte
1387
1388 print >> sys.stderr, len(dfa.states), 'states'
1389
1390 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)
1391
1392 # We can't just write 'num_suffixes[dfa.initial_state]' because
1393 # initial state is accepting.
1394 total_instructions = sum(
1395 num_suffixes[t.to_state]
1396 for t in dfa.initial_state.forward_transitions.values())
1397 print >> sys.stderr, total_instructions, 'regular instructions total'
1398
1399 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)
1400 print >> sys.stderr, len(tasks), 'tasks'
1401
1402 pool = multiprocessing.Pool()
1403
1404 results = pool.imap(Worker, tasks)
1405
1406 total = 0
1407 num_valid = 0
1408 full_output = set()
1409 for prefix, count, valid_count, output, trace in results:
1410 print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))
1411 total += count
1412 num_valid += valid_count
1413 full_output |= output
1414 for rule, instruction in trace:
1415 ShowProgress(rule, instruction)
1416 for instruction in sorted(Compressed(full_output,
1417 compressors,
1418 ShowProgress)):
1419 print instruction
1420
1421 print >> sys.stderr, total, 'instructions were processed'
1422 print >> sys.stderr, num_valid, 'valid instructions'
1423
1424
1425 if __name__ == '__main__':
1426 main()
OLDNEW
« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698