src/trusted/validator_ragel/compress_regular_instructions.py - Issue 49183002: Regular instructions golden file test.

Side by Side Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """

	6 Traverse the validator's DFA, collect all "normal" instruction and then

	7 compress output. Note: "anybyte fields" (immediates and displacements)

	8 are always filled with zeros. Otherwise processing of sextillions (sic!)

	9 of possibilities will take too long.

	10

	11 Each rule is applied only when all variants are accepted by validator.

	12 The following compression rules are present:

	13

	14 1. Compress ModR/M (+SIB & displacement).

	15 Instruction: 00 00 add %al,(%rax)

	16 ...

	17 Instruction: 00 ff add %bh,%bh

	18 becomes

	19 Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]

	20

	21 1a. Compress ModR/M (+SIB & displacement) memory-only.

	22 Instruction: f0 01 00 lock add %eax,(%eax)

	23 ...

	24 Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)

	25 becomes

	26 Instruction: f0 01 XX lock add [%eax..edi],[memory]

	27

	28 1b. Compress ModR/M register only.

	29 Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax

	30 ...

	31 Instruction: 66 0f 50 ff movmskpd %xmm7,%edi

	32 becomes

	33 Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]

	34

	35 2. Compress ModR/M (+SIB & displacement) with opcode extension.

	36 Instruction: 0f 90 00 seto (%eax)

	37 ...

	38 Instruction: 0f 90 c7 seto %bh

	39 becomes

	40 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	41

	42 2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.

	43 Instruction: f0 ff 00 lock incl (%eax)

	44 ...

	45 Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)

	46 becomes

	47 Instruction: f0 ff XX/1 lock decl [memory]

	48

	49 2b. Compress ModR/M register-only with opcode extension.

	50 Instruction: 0f 71 d0 00 psrlw $0x0,%mm0

	51 ...

	52 Instruction: 0f 71 d7 00 psrlw $0x0,%mm7

	53 becomes

	54 Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7]

	55

	56 3. Compress register-in-opcode.

	57 Instruction: d9 c0 fld %st(0)

	58 ...

	59 Instruction: d9 c7 fld %st(7)

	60 becomes

	61 Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]

	62

	63 Only applies if all possible register accesses are accepted by validator.

	64

	65 4. Special compressor for "set" instruction.

	66 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	67 ...

	68 Instruction: 0f 90 XX/7 seto [%al..%bh or memory]

	69 becomes

	70 Instruction: 0f 90 XX seto [%al..%bh or memory]

	71 """

	72

	73 import itertools

	74 import multiprocessing

	75 import optparse

	76 import os

	77 import re

	78 import subprocess

	79 import sys

	80 import tempfile

	81 import traceback

	82

	83 import dfa_parser

	84 import dfa_traversal

	85 import validator

	86

	87

	88 # Register names in 'natual' order (as defined by IA32/x86-64 ABI)

	89 #

	90 # X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field

	91 # in opcode, ModR/M, and/or SIB bytes to encode them.

	92 #

	93 # In most cases there are 16 registers of a given kind and two such groups,

	94 # but there are couple of exceptions:

	95 # 1. There are 20 8-bit registers and three groups (two of them overlap)

	96 # 2. There are eight X87 and MMX registers thus two groups are identical

	97 #

	98 # We use typical register from a group to name the whole group. Most groups

	99 # use first register, but 'spl' group uses fifth register because it's first

	100 # four registers are the same as 'al' group. We use mnemonic name 'mmalt'

	101 # to represent the "evil mirror" of the 'mm0' group.

	102 REGISTERS = {

	103 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],

	104 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],

	105 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],

	106 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],

	107 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],

	108 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],

	109 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],

	110 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],

	111 'r8': [ 'r{}'.format(N) for N in range(8,16) ],

	112 'mm0': [ 'mm{}'.format(N) for N in range(8) ],

	113 'mmalt': [ 'mm{}'.format(N) for N in range(8) ],

	114 'st(0)': [ 'st({})'.format(N) for N in range(8) ],

	115 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],

	116 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],

	117 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],

	118 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]

	119 }

	120

	121

	122 NOP = 0x90

	123

	124

	125 def PadToBundleSize(bytes):

	126 assert len(bytes) <= validator.BUNDLE_SIZE

	127 return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))

	128

	129

	130 # In x86-64 mode we have so-called 'restricted register' which is used to

	131 # tie two groups together. Some instructions require particular value to

	132 # be stored in this variable, while some accept any non-special restricted

	133 # register (%ebp and %esp are special because they can only be accepted by

	134 # a few 'special' instructions).

	135 #

	136 # You can find more details in the "NaCl SFI model on x86-64 systems" manual.

	137 #

	138 # We try to feed all possible 'restricted registers' into validator and then

	139 # classify the instruction using this map. If set of acceptable 'restricted

	140 # registers' is not here, then it's an error in validator.

	141 ACCEPTABLE_X86_64_INPUTS = {

	142 0x00001: 'input_rr=%eax',

	143 0x00002: 'input_rr=%ecx',

	144 0x00004: 'input_rr=%edx',

	145 0x00008: 'input_rr=%ebx',

	146 0x00010: 'input_rr=%esp',

	147 0x00020: 'input_rr=%ebp',

	148 0x00040: 'input_rr=%esi',

	149 0x00080: 'input_rr=%edi',

	150 0x00100: 'input_rr=%r8d',

	151 0x00200: 'input_rr=%r9d',

	152 0x00400: 'input_rr=%r10d',

	153 0x00800: 'input_rr=%r11d',

	154 0x01000: 'input_rr=%r12d',

	155 0x02000: 'input_rr=%r13d',

	156 0x04000: 'input_rr=%r14d',

	157 0x08000: 'input_rr=%r15d',

	158 0x1ffcf: 'input_rr=any_nonspecial'

	159 }

	160

	161 # Any instruction must produce either None or one of fifteen registers as an

	162 # output 'restricted register' value. 'r15d' is NOT acceptable as an output.

	163 ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple(

	164 '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1])

	165

	166

	167 def ValidateInstruction(instruction, validator_inst):

	168 bundle = ''.join(map(chr, PadToBundleSize(instruction)))

	169 if options.bitness == 32:

	170 result = validator_inst.ValidateChunk(bundle, bitness=32)

	171 return result, []

	172 else:

	173 valid_inputs = 0

	174 known_final_rr = None

	175 output_rr = None

	176 # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array

	177 # above.

	178 for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]):

	179 valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(

	180 bundle, len(instruction), initial_rr)

	181 if valid:

	182 # final_rr should not depend on input_rr

	183 assert valid_inputs == 0 or known_final_rr == final_rr

	184 valid_inputs \|= 1 << bit

	185 known_final_rr = final_rr

	186 # If nothing is accepted then instruction is not valid. Easy and simple.

	187 if valid_inputs == 0: return False, []

	188 # If returned value in unacceptable we'll get IndexError here and this

	189 # test will fail

	190 if known_final_rr is not None:

	191 output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr]

	192 # If collected valid_inputs are unacceptable we'll get KeyError here and

	193 # this test will fail

	194 return True, [ACCEPTABLE_X86_64_INPUTS[valid_inputs],

	195 'output_rr={}'.format(output_rr)]

	196

	197

	198 class WorkerState(object):

	199 def __init__(self, prefix, validator):

	200 self.total_instructions = 0

	201 self.num_valid = 0

	202 self.validator = validator

	203 self.output = set()

	204 self.trace = []

	205

	206

	207 def ReceiveInstruction(self, bytes):

	208 self.total_instructions += 1

	209 result, notes = ValidateInstruction(bytes, self.validator)

	210 if result:

	211 self.num_valid += 1

	212 dis = self.validator.DisassembleChunk(

	213 ''.join(map(chr, bytes)),

	214 bitness=options.bitness)

	215 for line_nr in xrange(len(dis)):

	216 dis[line_nr] = str(dis[line_nr])

	217 assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '

	218 assert dis[line_nr][-1:] == ')'

	219 dis[line_nr] = dis[line_nr][17:-1]

	220 # If %rip is involved then comment will be different depending on the

	221 # instruction length. Eliminate it.

	222 if '(%rip)' in dis[0]:

	223 dis[0] = re.sub(' # 0x[ ][0-9a-fA-F]', '', dis[0])

	224 # Zero displacements are represented as 0x0 for all instructions except

	225 # jumps where they disassembled as non-zero due to %eip/%rip-relative

	226 # addressing. We replace this displacement with %eip/%rip to simplify

	227 # compression.

	228 if ' 0x' in dis[0] and ' 0x0' not in dis[0]:

	229 for bytes in xrange(1, 16):

	230 dis[0] = re.sub(

	231 '(' + '(?:[0-9a-fA-F][0-9a-fA-F] ){' + str(bytes) + '} .* )' +

	232 hex(bytes) + '(.*)',

	233 '\\1%eip\\2' if options.bitness == 32 else '\\1%rip\\2',

	234 dis[0]);

	235 dis[0] = 'Instruction: ' + dis[0]

	236 dis += notes

	237 self.output.add('; '.join(dis))

	238

	239

	240 def RecordTrace(self, compressor_nr, instruction):

	241 self.trace.append((compressor_nr, instruction))

	242

	243

	244 # Compressor has three slots: regex (which picks apart given instruction),

	245 # subst (which is used to denote compressed version) and replacements (which

	246 # are used to generate set of instructions from a given code).

	247 #

	248 # Example compressor:

	249 # regex = '.?[0-9a-fA-F]([0-7]) \\w (%e(?:[abcd]x\|[sb]p\|[sd]i)).*()'

	250 # subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode')

	251 # replacements = ((0, '%eax'), (1, '%ecx'), (2, '%edx'), (3, '%ebx')

	252 # (4, '%esp'), (5, '%ebp'), (6, '%esi'), (7, '%edi'))

	253 #

	254 # When faced with instriuction '40 inc %eax' it will capture the following

	255 # pieces of said instruction: '4[0] inc [%eax]'.

	256 #

	257 # Then it will produce the following eight instructions:

	258 # '40 inc %eax'

	259 # '41 inc %ecx'

	260 # '42 inc %edx'

	261 # '43 inc %ebx'

	262 # '44 inc %esp'

	263 # '45 inc %ebp'

	264 # '46 inc %esi'

	265 # '47 inc %edi'

	266 #

	267 # If all these instructions can be found in a set of instructions then

	268 # compressor will remove them from said set and will insert one replacement

	269 # "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'.

	270 #

	271 # Note that last group is only used in the replacement. It's used to grab marks

	272 # added by previous compressors and to replace them with a new mark.

	273 class Compressor(object):

	274 __slots__ = [

	275 'regex',

	276 'subst',

	277 'replacements'

	278 ]

	279

	280 def __init__(self, regex, subst, replacements=None):

	281 self.regex = re.compile(regex)

	282 self.subst = subst

	283 self.replacements = [] if replacements is None else replacements

	284

	285

	286 def CompressionTemplate(instruction, match, mark):

	287 """ Replace all match groups with the mark. """

	288 pos = 0

	289 format_str = ''

	290 for group in range(1, len(match.groups())):

	291 format_str += instruction[pos:match.start(group)] + mark

	292 pos = match.end(group)

	293 return format_str + instruction[pos:match.start(len(match.groups()))]

	294

	295

	296 def CompressOneMatch(instructions, instruction, match, compressor):

	297 format_str = CompressionTemplate(instruction, match, '{}')

	298 subset = set()

	299 for replacement in compressor.replacements:

	300 replacement_str = format_str.format(*replacement)

	301 if not replacement_str in instructions:

	302 return (False, instructions)

	303 subset.add(replacement_str)

	304 instructions -= subset

	305 instructions.add((format_str + '{}').format(*compressor.subst))

	306 return (True, instructions)

	307

	308

	309 def CompressOneInstruction(instructions, compressors, split, cache):

	310 sorted_instructions = (sorted(i for i in instructions if i > split) +

	311 sorted(i for i in instructions if i < split))

	312 for instruction in sorted_instructions:

	313 if instruction in cache:

	314 compressors_list = cache[instruction]

	315 for compressor_nr, match, compressor in compressors_list:

	316 result, instructions = CompressOneMatch(

	317 instructions, instruction, match, compressor)

	318 if result:

	319 return (instructions, compressor_nr, instruction)

	320 else:

	321 compressors_list = []

	322 for compressor_nr, compressor in enumerate(compressors):

	323 match = compressor.regex.match(instruction)

	324 if match:

	325 compressors_list.append((compressor_nr, match, compressor))

	326 result, instructions = CompressOneMatch(

	327 instructions, instruction, match, compressor)

	328 if result:

	329 return (instructions, compressor_nr, instruction)

	330 cache[instruction] = compressors_list

	331 return (instructions, False, False)

	332

	333

	334 def Compressed(instructions, compressors, show_progress):

	335 split = ''

	336 cache = {}

	337 while True:

	338 instructions, rule, split = CompressOneInstruction(

	339 instructions, compressors, split, cache)

	340 if rule is False: break

	341 show_progress(rule, split)

	342 return instructions

	343

	344

	345 def Worker((prefix, state_index)):

	346 worker_state = WorkerState(prefix, worker_validator)

	347

	348 try:

	349 dfa_traversal.TraverseTree(

	350 dfa.states[state_index],

	351 final_callback=worker_state.ReceiveInstruction,

	352 prefix=prefix,

	353 anyfield=0)

	354 if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions

	355 worker_state.output = Compressed(set(worker_state.output),

	356 compressors,

	357 worker_state.RecordTrace)

	358 except Exception as e:

	359 traceback.print_exc() # because multiprocessing imap swallows traceback

	360 raise

	361

	362 return (

	363 prefix,

	364 worker_state.total_instructions,

	365 worker_state.num_valid,

	366 worker_state.output,

	367 worker_state.trace)

	368

	369

	370 def ParseOptions():

	371 parser = optparse.OptionParser(usage='%prog [options] xmlfile')

	372

	373 parser.add_option('--bitness',

	374 choices=['32', '64'],

	375 help='The subarchitecture: 32 or 64')

	376 parser.add_option('--validator_dll',

	377 help='Path to librdfa_validator_dll')

	378 parser.add_option('--decoder_dll',

	379 help='Path to librdfa_decoder_dll')

	380

	381 options, args = parser.parse_args()

	382 options.bitness = int(options.bitness)

	383

	384 if len(args) != 1:

	385 parser.error('specify one xml file')

	386

	387 (xml_file, ) = args

	388

	389 return options, xml_file

	390

	391

	392 # Version suitable for use in regular expressions

	393 REGISTERS_RE = REGISTERS.copy()

	394 REGISTERS_RE['st(0)'] = [ 'st\${}\$'.format(N) for N in range(8) ]

	395 REGISTERS_RE['st\$0\$'] = REGISTERS_RE['st(0)']

	396

	397 # Index names in 'natual' order (as defined by IA32/x86-64 ABI)

	398 INDEXES = {

	399 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],

	400 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],

	401 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]

	402 }

	403 # Register which can not be used as base in 64-bit mode in all incarnations

	404 X86_64_BASE_REGISTERS = set([

	405 '%spl', '%bpl', '%r15b',

	406 '%sp', '%bp', '%r15w',

	407 '%esp', '%ebp', '%r15d',

	408 '%rsp', '%rbp', '%r15',

	409 '%rip'

	410 ])

	411

	412

	413 def ModRMRegisterReplacements(rm, reg=None, writes_to='rm', opcode_bits=0,

	414 register_write='ignore'):

	415 """Creates replacement tuples list for register-to-register instructions

	416

	417 Args:

	418 rm: rm operand kind (see REGISTERS array)

	419 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	420 writes_to: three-state selector

	421 'reg' - instruction uses rm as source, reg as destination

	422 'rm' - instruction uses reg as source, rm as destination

	423 'both' - instruction writes to both reg and rm

	424 opcode_bits: opcode extensions code (used when reg is None)

	425 register_write: three-state selector

	426 'sandbox' - instruction can be used to produce "restricted register"

	427 'protect' - instruction can damage output, protect "special registers"

	428 'ignore' - instruction does not affect it's operands (e.g. test) or

	429 is used with non-GP registers (X87, MMX, XMM, etc)

	430

	431 Returns:

	432 List of replacement tuples

	433 """

	434 # Reg field can be used either as reg or as opcode extension, but not both

	435 assert reg is None or opcode_bits == 0

	436 # writes_to == 'both' makes no sense if reg field is an opcode extension

	437 assert reg is not None or writes_to != 'both'

	438

	439 output_key = (options.bitness, reg, rm, writes_to, opcode_bits,

	440 register_write)

	441 if output_key in ModRMRegisterReplacements.replacements:

	442 return ModRMRegisterReplacements.replacements[output_key]

	443

	444 replacements = []

	445

	446 # Two upper bits of ModR/M byte (mod field) must be equal to 11

	447 # This gives us range from 0xc0 to 0xff but we are going from the

	448 # end make rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).
	halyavin 2013/11/14 07:07:50 make -> to make make -> to make khim 2013/11/15 12:11:27 Done. Show quoted text On 2013/11/14 07:07:50, halyavin wrote: > make -> to make Done.
	449 if reg is None:

	450 # reg field is used as opcode extension

	451 byte_range = [byte

	452 for byte in range(0xff, 0xbf, -1)

	453 if (byte >> 3) & 0x7 == opcode_bits]

	454 else:

	455 byte_range = range(0xff, 0xbf, -1)

	456

	457 for modrm in byte_range:

	458 # Parse ModRM

	459 mod_field = (modrm >> 6) & 0x03

	460 reg_field = (modrm >> 3) & 0x07

	461 rm_field = (modrm & 0x07)

	462 if reg is not None:

	463 reg_text = '%' + REGISTERS[reg][reg_field]

	464 byte_text = '{:02x}'.format(modrm)

	465 rm_text = '%' + REGISTERS[rm][rm_field]

	466 replacement = [byte_text]

	467 if reg is None:
	halyavin 2013/11/14 07:07:50 We can make code longer but easier to understand w We can make code longer but easier to understand with respect to None/not None cases. if reg is None: assert writes_to == 'rm' input, output = None, rm_text replacement.append(output) else: reg_text = '%' + REGISTERS[reg][reg_field] if writes_to == 'reg': input, output = rm_text, reg_text else: # rm, both input, output = reg_text, rm_text replacement.extend([input, output]) Then we don't need reg_text and rm_text anymore. if writes_to == 'both' and reg_text in X86_64_BASE_REGISTERS can be replaced with if writes_to == 'both' and input in X86_64_BASE_REGISTERS We can then extract the if to input, output = AppendOperandsReplacement( replacement, rm_text, reg, reg_field, writes_to) input -> index_32 in ModRMMemoryReplacements khim 2013/11/15 12:11:27 Done. Show quoted text On 2013/11/14 07:07:50, halyavin wrote: > We can make code longer but easier to understand with respect to None/not None > cases. > > if reg is None: > assert writes_to == 'rm' > input, output = None, rm_text > replacement.append(output) > else: > reg_text = '%' + REGISTERS[reg][reg_field] > if writes_to == 'reg': > input, output = rm_text, reg_text > else: # rm, both > input, output = reg_text, rm_text > replacement.extend([input, output]) > Then we don't need reg_text and rm_text anymore. > if writes_to == 'both' and reg_text in X86_64_BASE_REGISTERS > can be replaced with > if writes_to == 'both' and input in X86_64_BASE_REGISTERS > > We can then extract the if to > input, output = AppendOperandsReplacement( > replacement, rm_text, reg, reg_field, writes_to) > > input -> index_32 in ModRMMemoryReplacements Done.
	468 replacement.append(rm_text)

	469 else:

	470 replacement.append(rm_text if writes_to == 'reg' else reg_text)

	471 replacement.append(reg_text if writes_to == 'reg' else rm_text)

	472 if options.bitness == 64:

	473 replacement.append('any_nonspecial')

	474 output = reg_text if writes_to == 'reg' else rm_text

	475 replacement.append(output if register_write == 'sandbox' else None)

	476 # Do not generate replacements that will violate sandboxing rules.

	477 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	478 continue

	479 if register_write == 'sandbox' and output == '%r15d':

	480 continue

	481 if writes_to == 'both' and reg_text in X86_64_BASE_REGISTERS:

	482 continue

	483 replacements.append(tuple(replacement))

	484 ModRMRegisterReplacements.replacements[output_key] = tuple(replacements)

	485 return ModRMRegisterReplacements.replacements[output_key]

	486 ModRMRegisterReplacements.replacements = {}

	487

	488

	489 def ModRMMemoryReplacements(reg=None, writes_to='rm', opcode_bits=0,

	490 memory_accessed=True, register_write='ignore',

	491 base_r8=False, index_r8=False):

	492 """Creates replacement tuples list for register-to-memory instructions

	493

	494 Args:

	495 rm: rm operand kind (see REGISTERS array)

	496 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	497 writes_to: three-state selector

	498 'reg' - instruction uses rm as source, reg as destination

	499 'rm' - instruction uses reg as source, rm as destination

	500 'both' - instruction writes to both reg and rm

	501 opcode_bits: opcode extensions code (used when reg is None)

	502 memory_accessed: True if instruction accesses memory

	503 register_write: three-state selector

	504 'sandbox' - instruction can be used to produce "restricted register"

	505 'protect' - instruction can damage output, protect "special registers"

	506 'ignore' - instruction does not affect it's operands (e.g. test) or

	507 is used with non-GP registers (X87, MMX, XMM, etc)

	508 index_r8: True if REX.X bit in the instruction set to 1

	509

	510 Returns:

	511 List of replacement tuples

	512 """

	513 # Reg field can be used either as reg or as opcode extension, but not both

	514 assert reg is None or opcode_bits == 0

	515 # writes_to == 'both' makes no sense if reg field is an opcode extension

	516 assert reg is not None or writes_to != 'both'

	517

	518 output_key = (options.bitness, reg, writes_to, opcode_bits,

	519 base_r8, index_r8, memory_accessed, register_write)

	520 if output_key in ModRMMemoryReplacements.replacements:

	521 return ModRMMemoryReplacements.replacements[output_key]

	522

	523 if options.bitness == 32:

	524 base = 'eax'

	525 index = 'eax'

	526 else:

	527 base = 'r8' if base_r8 else 'rax'

	528 index = 'r8' if index_r8 else 'rax'

	529 input = 'r8d' if index_r8 else 'eax'

	530

	531 replacements = []

	532

	533 # Two upper bits of ModR/M byte (mod field) must be equal to 00, 01, or 10

	534 # This gives us range from 0x00 to 0xbf but we are going from the end to make

	535 # rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	536 if reg is None:

	537 # reg field is used as opcode extension

	538 byte_range = [byte

	539 for byte in range(0xbf, -1, -1)

	540 if byte & 0x38 == opcode_bits << 3]
	halyavin 2013/11/14 07:07:50 Change as in previous function. Change as in previous function. khim 2013/11/15 12:11:27 Done. Show quoted text On 2013/11/14 07:07:50, halyavin wrote: > Change as in previous function. Done.
	541 else:

	542 byte_range = range(0xbf, -1, -1)

	543

	544 for modrm in byte_range:

	545 # Parse ModRM

	546 mod_field = (modrm >> 6) & 0x03

	547 reg_field = (modrm >> 3) & 0x07

	548 rm_field = (modrm & 0x07)

	549 if reg is not None:

	550 reg_text = '%' + REGISTERS[reg][reg_field]

	551 # If RM field != %rsp then there are no index

	552 if rm_field != validator.REG_RSP:

	553 base_text = '%' + REGISTERS[base][rm_field]

	554 # If RM field == %rbp and MOD field is zero then it's absolute address

	555 # in 32-bit mode and %rip-based address in 64-bit mode

	556 if mod_field == 0 and rm_field == validator.REG_RBP:
	halyavin 2013/11/14 07:07:50 Extract into function: bytes_text, rm_text, base Extract into function: bytes_text, rm_text, base_text = BaseOnlyMemoryOperand( mod_field, base, rm_field) khim 2013/11/15 12:11:27 Done. Show quoted text On 2013/11/14 07:07:50, halyavin wrote: > Extract into function: > bytes_text, rm_text, base_text = BaseOnlyMemoryOperand( > mod_field, base, rm_field) Done.
	557 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	558 rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'

	559 base_text = '%rip'

	560 # Memory access with just a base register

	561 elif mod_field == 0:

	562 bytes_text = '{:02x}'.format(modrm)

	563 rm_text = '({})'.format(base_text)

	564 # Memory access with base and 8bit offset

	565 elif mod_field == 1:

	566 bytes_text = '{:02x} 00'.format(modrm)

	567 rm_text = '0x0({})'.format(base_text)

	568 # Memory access with base and 32bit offset

	569 else: # mod_field == 2

	570 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	571 rm_text = '0x0({})'.format(base_text)

	572 replacement = [bytes_text]

	573 if reg is None:

	574 replacement.append(rm_text)

	575 else:

	576 replacement.append(rm_text if writes_to == 'reg' else reg_text)

	577 replacement.append(reg_text if writes_to == 'reg' else rm_text)

	578 if options.bitness == 64:

	579 replacement.append('any_nonspecial')

	580 output = reg_text if writes_to == 'reg' else None

	581 replacement.append(output if register_write == 'sandbox' else None)

	582 # Do not generate replacements that will violate sandboxing rules.

	583 if memory_accessed and base_text not in X86_64_BASE_REGISTERS:

	584 continue

	585 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	586 continue

	587 if register_write == 'sandbox' and output == '%r15d':

	588 continue

	589 if writes_to == 'both' and reg_text in X86_64_BASE_REGISTERS:

	590 continue

	591 replacement = tuple(replacement)

	592 replacements.append(replacement)

	593 else:

	594 # If RM field == %rsp then we have SIB byte

	595 for sib in xrange(0x100):

	596 scale_field = (sib >> 6) & 0x03

	597 index_field = (sib >> 3) & 0x07

	598 base_field = (sib & 0x07)

	599 index_text = '%' + INDEXES[index][index_field]

	600 base_text = '%' + REGISTERS[base][base_field]

	601 scale_text = str(1 << scale_field)

	602 # If BASE is %rbp and MOD == 0 then index with 32bit offset is used

	603 if mod_field == 0 and base_field == validator.REG_RBP:
	halyavin 2013/11/14 07:07:50 Extract into function: bytes_text, rm_text, base Extract into function: bytes_text, rm_text, base_text = SIBMemoryOperand( mod_field, base, base_field, index_text, scale_text) khim 2013/11/15 12:11:27 Done. Show quoted text On 2013/11/14 07:07:50, halyavin wrote: > Extract into function: > bytes_text, rm_text, base_text = SIBMemoryOperand( > mod_field, base, base_field, index_text, scale_text) Done.
	604 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	605 # In 64-bit mode this case is displayed as simple absolute address

	606 # In 32-bit mode there are another, shorter, form, but it's used

	607 # for %rip-relative addressing in 64-bit mode

	608 if (options.bitness == 64 and

	609 index_field == validator.REG_RSP and
	halyavin 2013/11/14 07:07:50 can we use index_text in ['%esp', '%rsp'] here? can we use index_text in ['%esp', '%rsp'] here? khim 2013/11/15 12:11:27 Yes, we can. Not %rsp, of course, but %riz. Show quoted text On 2013/11/14 07:07:50, halyavin wrote: > can we use index_text in ['%esp', '%rsp'] here? Yes, we can. Not %rsp, of course, but %riz.
	610 scale_field == 0 and index != 'r8'):

	611 rm_text = '0x0'

	612 else:

	613 rm_text = '0x0(,{},{})'.format(index_text, scale_text)

	614 base_text = ''

	615 # Memory access with base and index (no offset)

	616 elif mod_field == 0:

	617 bytes_text = '{:02x} {:02x}'.format(modrm, sib)

	618 rm_text = '({},{},{})'.format(base_text, index_text, scale_text)

	619 # Memory access with base, index and 8bit offset

	620 elif mod_field == 1:

	621 bytes_text = '{:02x} {:02x} 00'.format(modrm, sib)

	622 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	623 # Memory access with base, index and 32bit offset

	624 elif mod_field == 2:

	625 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	626 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	627 # Pretty-printing of access via %rsp

	628 if (scale_field == 0 and index != 'r8' and

	629 base_field == validator.REG_RSP and
	halyavin 2013/11/14 07:07:50 can we use index_text in ['%esp', '%rsp'] here? can we use index_text in ['%esp', '%rsp'] here? khim 2013/11/15 12:11:27 Yes, we can do that, too. Show quoted text On 2013/11/14 07:07:50, halyavin wrote: > can we use index_text in ['%esp', '%rsp'] here? Yes, we can do that, too.
	630 index_field == validator.REG_RSP):

	631 rm_text = ('0x0({})' if mod_field else '({})').format(base_text)

	632 replacement = [bytes_text]

	633 if reg is None:

	634 replacement.append(rm_text)

	635 else:

	636 replacement.append(rm_text if writes_to == 'reg' else reg_text)

	637 replacement.append(reg_text if writes_to == 'reg' else rm_text)

	638 if options.bitness == 64:

	639 if not memory_accessed or index_text == '%riz':

	640 replacement.append('any_nonspecial')

	641 else:

	642 replacement.append('%' + REGISTERS[input][index_field])

	643 # Currently xchg can not used used for sandboxing

	644 output = reg_text if writes_to == 'reg' else None

	645 replacement.append(output if register_write == 'sandbox' else None)

	646 # Do not generate replacements that will violate sandboxing rules.

	647 if memory_accessed:

	648 if base_text not in X86_64_BASE_REGISTERS: continue

	649 if index_text in X86_64_BASE_REGISTERS - set(['%r15']): continue

	650 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	651 continue

	652 if register_write == 'sandbox' and output == '%r15d':

	653 continue

	654 if (writes_to == 'both' and

	655 reg_text in X86_64_BASE_REGISTERS): continue

	656 replacements.append(tuple(replacement))

	657 ModRMMemoryReplacements.replacements[output_key] = tuple(replacements)

	658 return ModRMMemoryReplacements.replacements[output_key]

	659 ModRMMemoryReplacements.replacements = {}

	660

	661

	662 def PrepareCompressors():

	663 global compressors

	664 global main_compressors

	665 global register_compressors

	666 global memory_compressors

	667

	668 # "Larger" compressors should be tried first, then "smaller" ones.

	669 main_compressors = []

	670 register_compressors = []

	671 memory_compressors = []

	672 extra_compressors = []

	673

	674 # Map from "REX bit off" group of registers to "REX bit on" group of registers

	675 r8 = {

	676 'al': 'r8b',

	677 'ax': 'r8w',

	678 'eax': 'r8d',

	679 'rax': 'r8',

	680 'mm0': 'mmalt',

	681 'xmm0': 'xmm8',

	682 'ymm0': 'ymm8'

	683 }

	684

	685 if options.bitness == 32:

	686 register_kinds = ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')

	687 register_kind_pairs = (

	688 ( 'al', 'al'),

	689 ( 'ax', 'al'),

	690 ( 'ax', 'ax'),

	691 ( 'eax', 'al'),

	692 ( 'eax', 'ax'),

	693 ( 'eax', 'eax'),

	694 ( 'eax', 'mm0'),

	695 ( 'mm0', 'eax'),

	696 ( 'eax', 'xmm0'),

	697 ('xmm0', 'eax'),

	698 ( 'mm0', 'mm0'),

	699 ( 'mm0', 'xmm0'),

	700 ('xmm0', 'mm0'),

	701 ('xmm0', 'xmm0'),

	702 ('xmm0', 'ymm0'),

	703 ('ymm0', 'xmm0'),

	704 ('ymm0', 'ymm0')

	705 )

	706 else:

	707 register_kinds = ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0',

	708 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8')

	709 register_kind_pairs = (

	710 ( 'al', 'al'),

	711 ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),

	712 ( 'ax', 'al'),

	713 ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),

	714 ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),

	715 ( 'eax', 'al'),

	716 ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),

	717 ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),

	718 ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),

	719 ( 'rax', 'al'),

	720 ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),

	721 ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'),

	722 ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'),

	723 ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),

	724 ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'),

	725 ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'),

	726 ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'),

	727 ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'),

	728 ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),

	729 ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),

	730 ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),

	731 ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),

	732 ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'),

	733 ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'),

	734 ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'),

	735 ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),

	736 ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),

	737 ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),

	738 ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')

	739 )

	740

	741 # Largest compressors: both reg and rm fields are used

	742 for reg, rm in register_kind_pairs:

	743 start_reg = REGISTERS[reg][0]

	744 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	745 start_rm = REGISTERS[rm][0]

	746 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

	747 instruction_kinds = [

	748 # Normal instructions with two operands (rm to reg)

	749 ({'writes_to':'reg'}, '', ' # rm to reg', ''),

	750 # Normal instructions with two operands (reg to rm)

	751 ({'writes_to':'rm'}, '', ' # reg to rm', '')

	752 ]

	753 # Lea in 64 bit mode is truly unique instruction for now

	754 if options.bitness == 64 and reg in ('eax', 'r8d', 'rax', 'r8'):

	755 instruction_kinds = [

	756 ({'writes_to':'reg', 'memory_accessed':False,

	757 'register_write':'sandbox' if reg in ('eax', 'r8d') else 'protect'},

	758 ' # lea', ' # rm to reg; lea', ' # lea')] + instruction_kinds

	759 # There are few more forms in 64 bit case (rm to reg)

	760 if options.bitness == 64 and reg in ('eax', 'r8d'):

	761 # Zero-extending version.

	762 instruction_kinds.append(

	763 ({'writes_to':'reg', 'register_write':'sandbox'},

	764 '', ' # rm to reg', ''))

	765 # More forms in 64 bit case (reg to rm)

	766 if options.bitness == 64 and rm in ('eax', 'r8d'):

	767 # Zero-extending version.

	768 instruction_kinds.append(

	769 ({'writes_to':'rm', 'register_write':'sandbox'},

	770 '', ' # reg to rm', ''))

	771 # Zero-extending xchg/xadd

	772 instruction_kinds.append(

	773 ({'writes_to':'both', 'register_write':'sandbox'},

	774 ' # write to both',

	775 ' # reg to rm; write to both',

	776 ' # write to both'))

	777 # Still more forms for 64 bit case (rm to reg).

	778 if options.bitness == 64 and reg in ('al', 'spl', 'ax', 'eax', 'rax',

	779 'r8b', 'r8w', 'r8d', 'r8'):

	780 # Dangerous instructions (rm to reg)

	781 instruction_kinds.append(

	782 ({'writes_to':'reg', 'register_write':'protect'},

	783 '', ' # rm to reg', ''))

	784 # Still more forms for 64 bit case (reg to rm)

	785 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

	786 'r8b', 'r8w', 'r8d', 'r8'):

	787 # Dangerous instructions (reg to rm)

	788 instruction_kinds.append(

	789 ({'writes_to':'rm', 'register_write':'protect'},

	790 '', ' # reg to rm', ''))

	791 # Dangerous xchg/xadd

	792 instruction_kinds.append(

	793 ({'writes_to':'both', 'register_write':'protect'},

	794 ' # write to both',

	795 ' # reg to rm; write to both',

	796 ' # write to both'))

	797 # 3DNow! instructions

	798 instruction_kinds.append(

	799 ({'writes_to':'reg', '3dnow':'yes'}, '', ' # rm to reg', ''))

	800 for args, notes, notes_register, notes_memory in instruction_kinds:

	801 regex = '(?: 00)*'

	802 # Additional byte is opcode extension with 3DNow! instructions.

	803 if '3dnow' in args:

	804 regex = ' [0-9a-fA-F][0-9a-fA-F]'

	805 args.pop('3dnow')

	806 regex += ' (?:lock )?\\w* (?:\\$0x0,\|\\$0x0,\\$0x0,\|%cl,\|%xmm0,)?'

	807 # We only need to process ModR/M+SIB '04 04' or '04 07' here

	808 if options.bitness == 32:

	809 regex_mem = '\$%esp,%eax,1\$'

	810 else:

	811 regex_mem = '\$(?:%rsp\|%r15),(?:%rax\|%r8),1\$'

	812 output = None

	813 output_note = None

	814 if args['writes_to'] == 'reg':

	815 regex += '(%' + REGISTERS[rm][0] + '\|' + regex_mem + ')'

	816 regex += ',(%' + REGISTERS[reg][0] + ')'

	817 if 'register_write' in args and args['register_write'] == 'sandbox':

	818 assert reg in ('eax', 'r8d')

	819 output = '%' + reg + '\|None'

	820 output_note = '[%eax..%edi]' if reg == 'eax' else '[%r8d..%r14d]'

	821 subst = (

	822 'XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),

	823 '[%{}..%{}]'.format(start_reg, end_reg), notes)

	824 subst_register = (

	825 'XX', '[%{}..%{}]'.format(start_rm, end_rm),

	826 '[%{}..%{}]'.format(start_reg, end_reg), notes_register)

	827 subst_memory = (

	828 'XX', '[memory]',

	829 '[%{}..%{}]'.format(start_reg, end_reg), notes_memory)

	830 else:

	831 regex += '(%' + REGISTERS[reg][0] + ')'

	832 regex += ',(%' + REGISTERS[rm][0] + '\|' + regex_mem + ')'

	833 if 'register_write'in args and args['register_write'] == 'sandbox':

	834 assert rm in ('eax', 'r8d')

	835 output = '%' + rm + '\|None'

	836 output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	837 subst = (

	838 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	839 '[%{}..%{} or memory]'.format(start_rm, end_rm), notes)

	840 subst_register = (

	841 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	842 '[%{}..%{}]'.format(start_rm, end_rm), notes_register)

	843 subst_memory = (

	844 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	845 '[memory]', notes_memory)

	846 regex += '.*'

	847 if options.bitness == 64:

	848 regex += '; input_rr=(%eax\|%r8d\|any_nonspecial)'

	849 regex += '; output_rr=({})'.format(output)

	850 if 'memory_accessed' in args:

	851 input_note = 'any_nonspecial'

	852 input_note_r8 = 'any_nonspecial'

	853 else:

	854 input_note = '[%eax..%edi]'

	855 input_note_r8 = '[%r8d..%r15d]'

	856 subst_r8 = subst[0:-1] + (input_note_r8, output_note) + subst[-1:]

	857 subst = subst[0:-1] + (input_note, output_note) + subst[-1:]

	858 subst_memory_r8 = subst_memory[0:-1] + (

	859 input_note_r8, output_note) + subst_memory[-1:]

	860 subst_memory = subst_memory[0:-1] + (

	861 input_note, output_note) + subst_memory[-1:]

	862 subst_register = subst_register[0:-1] + (

	863 'any_nonspecial', output_note) + subst_register[-1:]

	864 regex += '()'

	865 base_r8 = rm in r8.values()

	866 memory_replacement = ModRMMemoryReplacements(

	867 reg=reg, base_r8=base_r8, **args)

	868 memory_compressors.append(Compressor(

	869 '.*?(04 0[47])' + regex, subst_memory, memory_replacement))

	870 if options.bitness == 64:

	871 memory_replacement_r8 = ModRMMemoryReplacements(

	872 reg=reg, base_r8=base_r8, index_r8=True, **args)

	873 memory_compressors.append(Compressor(

	874 '.*?(04 0[47])' + regex, subst_memory_r8, memory_replacement_r8))

	875 # Instructions with no memory access are instructions which are doing

	876 # something with memory address (e.g. lea) and as such they don't have

	877 # non-memory forms.

	878 if not 'memory_accessed' in args:

	879 register_replacement = ModRMRegisterReplacements(rm=rm, reg=reg, **args)

	880 register_compressors.append(Compressor(

	881 '.*?(c0)' + regex, subst_register, register_replacement))

	882 main_replacement = register_replacement + memory_replacement

	883 main_compressors.append(Compressor(

	884 '.*?(04 0[47])' + regex, subst, main_replacement))

	885 if options.bitness == 64:

	886 main_replacement_r8 = register_replacement + memory_replacement_r8

	887 main_compressors.append(Compressor(

	888 '.*?(04 0[47])' + regex, subst_r8, main_replacement_r8))

	889

	890 # Smaller compressors: only rm field is used.

	891 for rm in register_kinds:

	892 start_rm = REGISTERS[rm][0]

	893 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

	894 for opcode_bits in xrange(8):

	895 XX_byte_mark = 'XX/' + str(opcode_bits)

	896 instruction_kinds = [

	897 # The most basic form

	898 ({}, '', '', '')

	899 ]

	900 if options.bitness == 64:

	901 # No memory access (e.g. prefetch)

	902 instruction_kinds = [

	903 ({'memory_accessed':False}, '', '', '')] + instruction_kinds

	904 # More forms in 64 bit case.

	905 if options.bitness == 64 and rm in ('eax', 'r8d'):

	906 # Zero-extending version.

	907 instruction_kinds.append(

	908 ({'register_write':'sandbox'}, '', '', ''))

	909 # Still more forms for 64 bit case (reg to rm).

	910 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

	911 'r8b', 'r8w', 'r8d', 'r8'):

	912 # Dangerous instructions.

	913 instruction_kinds.append(

	914 ({'register_write':'protect'}, '', '', ''))

	915 for args, notes, notes_register, notes_memory in instruction_kinds:

	916 subst = (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm),

	917 notes)

	918 subst_register = (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm),

	919 notes_register)

	920 subst_memory = (XX_byte_mark, '[memory]',

	921 notes_memory)

	922 regex = ('(?: 00)* (?:lock )?\\w* (?:\\$0x0,\|%cl,)?'

	923 '(%' + REGISTERS[rm][0] + '\|' + regex_mem + ').*')

	924 output = None

	925 output_note = None

	926 if options.bitness == 64:

	927 if 'register_write' in args and args['register_write'] == 'sandbox':

	928 assert rm in ('eax', 'r8d')

	929 output = '%' + rm + '\|None'

	930 output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	931 regex += '; input_rr=(%eax\|%r8d\|any_nonspecial)'

	932 regex += '; output_rr=({})'.format(output)

	933 if 'memory_accessed' in args:

	934 input_note = 'any_nonspecial'

	935 input_note_r8 = 'any_nonspecial'

	936 else:

	937 input_note = '[%eax..%edi]'

	938 input_note_r8 = '[%r8d..%r15d]'

	939 subst_r8 = subst[0:-1] + (input_note_r8, output_note) + subst[-1:]

	940 subst = subst[0:-1] + (input_note, output_note) + subst[-1:]

	941 subst_memory_r8 = subst_memory[0:-1] + (

	942 input_note_r8, output_note) + subst_memory[-1:]

	943 subst_memory = subst_memory[0:-1] + (

	944 input_note, output_note) + subst_memory[-1:]

	945 subst_register = subst_register[0:-1] + (

	946 'any_nonspecial', output_note) + subst_register[-1:]

	947 regex += '()'

	948 base_r8 = rm in r8.values()

	949 memory_replacement = ModRMMemoryReplacements(

	950 reg=None, base_r8=base_r8, opcode_bits=opcode_bits, **args)

	951 memory_compressors.append(Compressor(

	952 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	953 subst_memory, memory_replacement))

	954 if options.bitness == 64:

	955 memory_replacement_r8 = ModRMMemoryReplacements(

	956 reg=None, base_r8=base_r8, index_r8=True, opcode_bits=opcode_bits,

	957 **args)

	958 memory_compressors.append(Compressor(

	959 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	960 subst_memory_r8, memory_replacement_r8))

	961 # Instructions with no memory access are instructions which are doing

	962 # something with memory address (e.g. prefetch) and as such they don't

	963 # have non-memory forms.

	964 if not 'memory_accessed' in args:

	965 register_replacement = ModRMRegisterReplacements(

	966 reg=None, rm=rm, opcode_bits=opcode_bits, **args)

	967 register_compressors.append(Compressor(

	968 '.?({:02x})'.format(0xc0 + opcode_bits 8) + regex,

	969 subst_register, register_replacement))

	970 main_replacement = register_replacement + memory_replacement

	971 main_compressors.append(Compressor(

	972 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	973 subst, main_replacement))

	974 if options.bitness == 64:

	975 main_replacement_r8 = register_replacement + memory_replacement_r8

	976 main_compressors.append(Compressor(

	977 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	978 subst_r8, main_replacement_r8))

	979

	980 # Even smaller compressors: only low 3 bits of opcode are used.

	981 for reg in register_kinds + ('st(0)',):

	982 start_reg = REGISTERS[reg][0]

	983 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	984 for opcode in xrange(8):

	985 for text1, text2, nibble in (

	986 ('[0..7]', '[8..f]', xrange(8)),

	987 ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)),

	988 ('[0..6]', '[8..e]', xrange(7))

	989 ):

	990 # Note that we use 2nd line here to avoid ambiguity when opcode is 0x00

	991 extra_compressors.append(Compressor(

	992 '.?[0-9a-fA-F](1)(?: 00)'

	993 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	994 '(%(?:' + REGISTERS_RE[reg][1] + ')).*()',

	995 (text1, '[%{}..%{}]'.format(start_reg, end_reg), ''),

	996 tuple(('{:x}'.format(n), '%' + REGISTERS[reg][n])

	997 for n in nibble)))

	998 extra_compressors.append(Compressor(

	999 '.?[0-9a-fA-F](8)(?: 00)'

	1000 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1001 '(%(?:' + REGISTERS_RE[reg][0] + ')).*()',

	1002 (text2, '[%{}..%{}]'.format(start_reg, end_reg), ''),

	1003 tuple(('{:x}'.format(n + 8), '%' + REGISTERS[reg][n])

	1004 for n in nibble)))

	1005 # Another version for 64 bit case

	1006 if options.bitness == 64 and reg in ('eax', 'r8d'):

	1007 extra_compressors.append(Compressor(

	1008 '.?[0-9a-fA-F](1)(?: 00)'

	1009 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1010 '(%(?:' + REGISTERS_RE[reg][1] + ')).*'

	1011 'output_rr=(%(?:'+ REGISTERS_RE[reg][1] + ')).*()',

	1012 tuple([text1] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +

	1013 ['']),

	1014 tuple(['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2

	1015 for n in nibble)))

	1016 extra_compressors.append(Compressor(

	1017 '.?[0-9a-fA-F](8)(?: 00)'

	1018 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1019 '(%(?:' + REGISTERS_RE[reg][0] + ')).*'

	1020 'output_rr=(%(?:'+ REGISTERS_RE[reg][0] + ')).*()',

	1021 tuple([text2] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +

	1022 ['']),

	1023 tuple(['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2

	1024 for n in nibble)))

	1025 compressors = (main_compressors + memory_compressors + register_compressors +

	1026 extra_compressors)

	1027

	1028 # Special compressors: will handle some cosmetic issues.

	1029 #

	1030 # SETxx ignores reg field and thus are described as many separate instructions

	1031 compressors.append(Compressor(

	1032 '.0f 9[0-9a-fA-F] XX(/[0-7]) set.()', ('', ''),

	1033 [('/' + str(i), ) for i in range(8)]))

	1034 # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual

	1035 if options.bitness == 32:

	1036 compressors.append(Compressor(

	1037 '.(XX/1) bswap.ax.*()', ('c[8..f]', ''), [('XX/1', )]))

	1038 else:

	1039 compressors.append(Compressor(

	1040 '.(XX/1) bswap.ax.*()', ('c[89abef]', ''), [('XX/1', )]))

	1041 compressors.append(Compressor(

	1042 '.(XX/1) bswap.r8.*()', ('c[8..e]', ''), [('XX/1', )]))

	1043 # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG

	1044 if options.bitness == 64:

	1045 compressors.append(Compressor(

	1046 '.* (?:cmpxchg\|xadd\|xchg).%al\\.\\.%bh[^#]()$',

	1047 (' # write to both', ), ((), )))

	1048 # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all

	1049 # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.

	1050 # Without this rule these ands comprise 2/3 of the whole output!

	1051 if options.bitness == 32:

	1052 compressors.append(Compressor(

	1053 '.*83 (e0 01 and \\$0x1,%eax)()',

	1054 ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', ' # special and'),

	1055 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]), )

	1056 for i in range(0x01, 0x100) for r in range(8)] +

	1057 [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', )]))

	1058 else:

	1059 for reg in ('eax', 'r8d'):

	1060 start_reg = REGISTERS[reg][0]

	1061 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1062 for index_reg in ('eax', 'r8d'):

	1063 start_index = REGISTERS[index_reg][0]

	1064 end_index = REGISTERS[index_reg][-1]

	1065 compressors.append(Compressor(

	1066 '.83 (e0 01 and \\$0x1,%' + reg + ').'

	1067 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()',

	1068 ('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1069 end_reg), '[%{}..%{}]'.format(start_index, end_index),

	1070 '[%{}..%{}]'.format(start_reg, end_reg),

	1071 ' # special and'),

	1072 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]),

	1073 'any_nonspecial', '%' + REGISTERS[reg][r])

	1074 for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] +

	1075 [('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1076 end_reg), '[%{}..%{}]'.format(start_index, end_index),

	1077 '[%{}..%{}]'.format(start_reg, end_reg))]))

	1078

	1079 # "and $e0" and similar are used to align %rsp. All negative values are

	1080 # accepted by validator and there are 127 of these.

	1081 # Consolidate them into one line.

	1082 if options.bitness == 64:

	1083 compressors.append(Compressor(

	1084 '.(?:81\|83) (?:e4\|e5) (80) (?:00 00 00 \|) and \\$0x(80),%r[bs]p.()',

	1085 ('[80..ff]', '[80..ff]', ' # alignment and'),

	1086 [('{:02x}'.format(i), '{:02x}'.format(i)) for i in range(0x80, 0x100)]))

	1087

	1088 # Merge memory and non-memory access

	1089 if options.bitness == 32:

	1090 letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', ''))

	1091 else:

	1092 letters_and_registers = (

	1093 ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'),

	1094 ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'),

	1095 ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'),

	1096 ('q', 'rax', 'eax'), ('q', 'r8', 'r8d')

	1097 )

	1098 for letter, reg, out_reg in letters_and_registers:

	1099 start_reg = REGISTERS[reg][0]

	1100 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1101 all_regs = '[%{}..%{}]'.format(start_reg, end_reg)

	1102 regs_mark = '[%{}..%{} or memory]'.format(start_reg, end_reg)

	1103 if options.bitness == 64:

	1104 start_out = REGISTERS[out_reg][0]

	1105 end_out = REGISTERS[out_reg][-1 if out_reg[0:2] != 'r8' else -2]

	1106 out_regs = '[%{}..%{}]'.format(start_out, end_out)

	1107 for notes in ('', ' # rm to reg', ' # reg to rm'):

	1108 compressors.append(Compressor(

	1109 '.* \\w(' + letter + ') .(\\[memory]).*()()',

	1110 ('[{}]?'.format(letter), regs_mark, '', ''),

	1111 ((letter, '[memory]', ''), ('', all_regs, notes))))

	1112 if options.bitness == 64:

	1113 for index_reg in ('eax', 'r8d'):

	1114 start_index = REGISTERS[index_reg][0]

	1115 end_index = REGISTERS[index_reg][-1]

	1116 index_regs = '[%{}..%{}]'.format(start_index, end_index)

	1117 for output_rrs in ((None, out_regs), (out_regs, None), (None, None)):

	1118 compressors.append(Compressor(

	1119 '.* \\w(' + letter + ') .(\\[memory]).*; '

	1120 'input_rr=(\\[%[a-z0-9]..%[a-z0-9]\\]); '

	1121 'output_rr=(\\[%[a-z0-9]..%[a-z0-9]\\]\|None)()()',

	1122 ('[{}]?'.format(letter), regs_mark, index_regs,

	1123 output_rrs[0] if output_rrs[0] is not None else output_rrs[1],

	1124 '', ''),

	1125 ((letter, '[memory]', index_regs, output_rrs[0], ''),

	1126 ('', all_regs, 'any_nonspecial', output_rrs[1], notes))))

	1127

	1128 # REX compressors

	1129 if options.bitness == 64:

	1130 # First pretty complex set of compressors to combine versions of REX with

	1131 # three lowest bits in different states.

	1132 register_kind_pairs = (

	1133 ( None, None),

	1134 ( 'al', 'al'), ( 'al', None), (None, 'al'),

	1135 ( 'ax', 'al'), ( 'al', 'ax'),

	1136 ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'),

	1137 ( 'eax', 'al'), ( 'al', 'eax'),

	1138 ( 'eax', 'ax'), ( 'ax', 'eax'),

	1139 ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'),

	1140 ( 'rax', 'al'), ( 'al', 'rax'),

	1141 ( 'rax', 'ax'), ( 'ax', 'rax'),

	1142 ( 'rax', 'eax'), ( 'eax', 'rax'),

	1143 ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'),

	1144 ( 'eax', 'mm0'), ( 'mm0', 'eax'),

	1145 ( 'rax', 'mm0'), ( 'mm0', 'rax'),

	1146 ( 'mm0', 'eax'), ( 'eax', 'mm0'),

	1147 ( 'mm0', 'rax'), ( 'rax', 'mm0'),

	1148 ( 'eax', 'xmm0'),

	1149 ( 'rax', 'xmm0'),

	1150 ('xmm0', 'eax'),

	1151 ('xmm0', 'rax'),

	1152 ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'),

	1153 ( 'mm0', 'xmm0'),

	1154 ('xmm0', 'mm0'),

	1155 ('xmm0', 'xmm0'),

	1156 ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'),

	1157 ('ymm0', 'xmm0'),

	1158 ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'),

	1159 )

	1160 for reg, rm in register_kind_pairs:

	1161 for last_reg, last_rm in ((-1, -1), (-1, -2), (-2, -1), (-2, -2)):

	1162 if reg:

	1163 start_reg = REGISTERS[reg][0]

	1164 start_reg8 = REGISTERS[r8[reg]][0]

	1165 end_reg = REGISTERS[reg][-1]

	1166 end_reg0 = 'dil' if reg == 'al' else end_reg

	1167 end_reg8 = REGISTERS[r8[reg]][last_reg]

	1168 reg_regex = '\\[(%' + start_reg + '\\.\\.%' + end_reg + ')]'

	1169 reg_regex0 = '\\[(%' + start_reg + '\\.\\.%' + end_reg0 + ')]'

	1170 elif last_reg == -2:

	1171 continue

	1172 if rm:

	1173 start_rm = REGISTERS[rm][0]

	1174 start_rm8 = REGISTERS[r8[rm]][0]

	1175 end_rm = REGISTERS[rm][-1]

	1176 end_rm0 = 'dil' if rm == 'al' else end_rm

	1177 end_rm8 = REGISTERS[r8[rm]][last_rm]

	1178 rm_regex = ('\\[(%' + start_rm + '\\.\\.%' + end_rm + ')'

	1179 '(?: or memory)?]')

	1180 rm_regex0 = ('\\[(%' + start_rm + '\\.\\.%' + end_rm0 + ')'

	1181 '(?: or memory)?]')

	1182 elif last_rm == -2:

	1183 continue

	1184 for rexw in (True, False):

	1185 for input_rr in (True, False):

	1186 for output_rr in (True, False) if reg or rm else (None, ):

	1187 for rm_to_reg in (True, False) if reg and rm else (None, ):

	1188 # Legacy prefixes

	1189 regex = '.:(?: 26\| 2e\| 36\| 3e\| 64\| 65\| 66\| 67\| f0\| f2\| f3)'

	1190 # REX

	1191 regex += '( 48).' if rexw else '( 40\|).'

	1192 # Replacement text

	1193 replacement_tuple = (

	1194 ' [REX:48..4f]' if rexw else ' [REX:40..47]?', )

	1195 if reg:

	1196 replacement_regs = '%{}..%{}'.format(start_reg, end_reg8)

	1197 if rm:

	1198 replacement_rms = '%{}..%{}'.format(start_rm, end_rm8)

	1199 # Instruction arguments

	1200 if not reg and not rm:

	1201 pass

	1202 elif not reg and rm:

	1203 if rexw:

	1204 regex += rm_regex0 + '.*'

	1205 else:

	1206 regex += rm_regex + '.*'

	1207 replacement_tuple += (replacement_rms, )

	1208 elif reg and not rm:

	1209 if rexw:

	1210 regex += reg_regex0 + '.*'

	1211 else:

	1212 regex += reg_regex + '.*'

	1213 replacement_tuple += (replacement_regs, )

	1214 elif rm_to_reg:

	1215 if rexw:

	1216 regex += rm_regex0 + ',' + reg_regex0 + '.*'

	1217 else:

	1218 regex += rm_regex + ',' + reg_regex + '.*'

	1219 replacement_tuple += (replacement_rms, replacement_regs)

	1220 else:

	1221 if rexw:

	1222 regex += reg_regex0 + ',' + rm_regex0 + '.*'

	1223 else:

	1224 regex += reg_regex + ',' + rm_regex + '.*'

	1225 replacement_tuple += (replacement_regs, replacement_rms)

	1226 # Input and output restricted registers

	1227 if input_rr:

	1228 regex += 'input_rr=\\[(%eax\\.\\.%edi)].*'

	1229 replacement_tuple += ('%eax..%r15d', )

	1230 if output_rr:

	1231 regex += 'output_rr=\\[(%eax\\.\\.%edi)].*'

	1232 replacement_tuple += ('%eax..%r14d', )

	1233 regex += '()'

	1234 replacement_tuple += ('', )

	1235 # Replacement cases

	1236 replacement_tuples = ()

	1237 for byte in (range(0x48, 0x50)

	1238 if rexw

	1239 else range(0x40, 0x48) + ['']):

	1240 replacement_case = (

	1241 ' {:02x}'.format(byte) if byte else byte, )

	1242 if byte:

	1243 if rm:

	1244 if byte & 0x1:

	1245 replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8)

	1246 else:

	1247 replacement_rms = '%{}..%{}'.format(start_rm, end_rm0)

	1248 if byte & 0x2:

	1249 replacement_index = '%r8d..%r15d'

	1250 else:

	1251 replacement_index = '%eax..%edi'

	1252 if reg:

	1253 if byte & 0x4:

	1254 replacement_regs = '%{}..%{}'.format(start_reg8,

	1255 end_reg8)

	1256 else:

	1257 replacement_regs = '%{}..%{}'.format(start_reg,

	1258 end_reg0)

	1259 else:

	1260 if rm:

	1261 replacement_rms = '%{}..%{}'.format(start_rm, end_rm)

	1262 replacement_index = '%eax..%edi'

	1263 if reg:

	1264 replacement_regs = '%{}..%{}'.format(start_reg, end_reg)

	1265 if not reg and not rm:

	1266 pass

	1267 elif not reg and rm:

	1268 replacement_case += (replacement_rms, )

	1269 if byte:

	1270 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1271 else:

	1272 final_rr = '%eax..%edi'

	1273 elif reg and not rm:

	1274 replacement_case += (replacement_regs, )

	1275 if byte:

	1276 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1277 else:

	1278 final_rr = '%eax..%edi'

	1279 elif rm_to_reg:

	1280 replacement_case += (replacement_rms, replacement_regs)

	1281 if byte:

	1282 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1283 else:

	1284 final_rr = '%eax..%edi'

	1285 else:

	1286 replacement_case += (replacement_regs, replacement_rms)

	1287 if byte:

	1288 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1289 else:

	1290 final_rr = '%eax..%edi'

	1291 if input_rr: replacement_case += (replacement_index, )

	1292 if output_rr: replacement_case += (final_rr, )

	1293 replacement_tuples += (replacement_case, )

	1294 compressors.append(Compressor(

	1295 regex, replacement_tuple, replacement_tuples))

	1296 # This is pretty simple compressor to combine two lines with different REX.W

	1297 # bits (only if they are otherwise identical).

	1298 compressors.append(Compressor(

	1299 '.(\\[REX:40\\.\\.47]\\?).()', ('[REX:40..4f]?', ''),

	1300 (('[REX:40..47]?', ), ('[REX:48..4f]', ))))

	1301

	1302

	1303 def ShowProgress(rule, instruction):

	1304 if rule not in ShowProgress.rules_shown:

	1305 first_print = True

	1306 ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown)

	1307 else:

	1308 first_print = False

	1309 print >> sys.stderr, '-------- Compressed --------'

	1310 print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule]

	1311 print >> sys.stderr, '--------'

	1312 compressor = compressors[rule]

	1313 match = compressor.regex.match(instruction)

	1314 assert match

	1315 format_str = CompressionTemplate(instruction, match, '{{{}}}')

	1316 replacements = sorted(format_str.format(*replacement)

	1317 for replacement in compressor.replacements)

	1318 if len(compressor.replacements) <= 4 or first_print:

	1319 for replacement in replacements:

	1320 print >> sys.stderr, replacement

	1321 else:

	1322 print >> sys.stderr, replacements[0]

	1323 print >> sys.stderr, "..."

	1324 print >> sys.stderr, replacements[-1]

	1325 print >> sys.stderr, '--------'

	1326 print >> sys.stderr, 'Compressed', (

	1327 format_str + '{{{}}}').format(*compressor.subst)

	1328 ShowProgress.rules_shown = {}

	1329

	1330

	1331 def main():

	1332 # We are keeping these global to share state graph and compressors

	1333 # between workers spawned by multiprocess. Passing them every time is slow.

	1334 global options, xml_file

	1335 global dfa

	1336 global worker_validator

	1337 options, xml_file = ParseOptions()

	1338 dfa = dfa_parser.ParseXml(xml_file)

	1339 worker_validator = validator.Validator(

	1340 validator_dll=options.validator_dll,

	1341 decoder_dll=options.decoder_dll)

	1342 PrepareCompressors()

	1343

	1344 assert dfa.initial_state.is_accepting

	1345 assert not dfa.initial_state.any_byte

	1346

	1347 print >> sys.stderr, len(dfa.states), 'states'

	1348

	1349 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)

	1350

	1351 # We can't just write 'num_suffixes[dfa.initial_state]' because

	1352 # initial state is accepting.

	1353 total_instructions = sum(

	1354 num_suffixes[t.to_state]

	1355 for t in dfa.initial_state.forward_transitions.values())

	1356 print >> sys.stderr, total_instructions, 'regular instructions total'

	1357

	1358 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)

	1359 print >> sys.stderr, len(tasks), 'tasks'

	1360

	1361 pool = multiprocessing.Pool()

	1362

	1363 results = pool.imap(Worker, tasks)

	1364

	1365 total = 0

	1366 num_valid = 0

	1367 full_output = set()

	1368 for prefix, count, valid_count, output, trace in results:

	1369 print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))

	1370 total += count

	1371 num_valid += valid_count

	1372 full_output \|= output

	1373 for rule, instruction in trace:

	1374 ShowProgress(rule, instruction)

	1375 for instruction in sorted(Compressed(full_output,

	1376 compressors,

	1377 ShowProgress)):

	1378 print instruction

	1379

	1380 print >> sys.stderr, total, 'instructions were processed'

	1381 print >> sys.stderr, num_valid, 'valid instructions'

	1382

	1383

	1384 if __name__ == '__main__':

	1385 main()

OLD	NEW

« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »