src/trusted/validator_ragel/compress_regular_instructions.py - Issue 49183002: Regular instructions golden file test.

Side by Side Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """

	6 Traverse the validator's DFA, collect all "normal" instruction and then

	7 compress output. Note: "anybyte fields" (immediates and displacements)

	8 are always filled with zeros. Otherwise processing of sextillions (sic!)

	9 of possibilities will take too long.

	10

	11 Each rule is applied only when all variants are accepted by validator.

	12 The following compression rules are present:

	13

	14 1. Compress ModR/M (+SIB & displacement).

	15 Instruction: 00 00 add %al,(%rax)

	16 ...

	17 Instruction: 00 ff add %bh,%bh

	18 becomes

	19 Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]

	20

	21 1a. Compress ModR/M (+SIB & displacement) memory-only.

	22 Instruction: f0 01 00 lock add %eax,(%eax)

	23 ...

	24 Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)

	25 becomes

	26 Instruction: f0 01 XX lock add [%eax..edi],[memory]

	27

	28 1b. Compress ModR/M register only.

	29 Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax

	30 ...

	31 Instruction: 66 0f 50 ff movmskpd %xmm7,%edi

	32 becomes

	33 Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]

	34

	35 2. Compress ModR/M (+SIB & displacement) with opcode extension.

	36 Instruction: 0f 90 00 seto (%eax)

	37 ...

	38 Instruction: 0f 90 c7 seto %bh

	39 becomes

	40 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	41

	42 2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.

	43 Instruction: f0 ff 00 lock incl (%eax)

	44 ...

	45 Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)

	46 becomes

	47 Instruction: f0 ff XX/1 lock decl [memory]

	48

	49 2b. Compress ModR/M register-only with opcode extension.

	50 Instruction: 0f 71 d0 00 psrlw $0x0,%mm0

	51 ...

	52 Instruction: 0f 71 d7 00 psrlw $0x0,%mm7

	53 becomes

	54 Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7]

	55

	56 3. Compress register-in-opcode.

	57 Instruction: d9 c0 fld %st(0)

	58 ...

	59 Instruction: d9 c7 fld %st(7)

	60 becomes

	61 Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]

	62

	63 Only applies if all possible register accesses are accepted by validator.

	64

	65 4. Special compressor for "set" instruction.

	66 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	67 ...

	68 Instruction: 0f 90 XX/7 seto [%al..%bh or memory]

	69 becomes

	70 Instruction: 0f 90 XX seto [%al..%bh or memory]

	71 """

	72

	73 import itertools

	74 import multiprocessing

	75 import optparse

	76 import os

	77 import re

	78 import subprocess

	79 import sys

	80 import tempfile

	81 import traceback

	82

	83 import dfa_parser

	84 import dfa_traversal

	85 import validator

	86

	87

	88 # Register names in 'natual' order (as defined by IA32/x86-64 ABI)

	89 #

	90 # X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field

	91 # in opcode, ModR/M, and/or SIB bytes to encode them.

	92 #

	93 # In most cases there are 16 registers of a given kind and two such groups,

	94 # but there are couple of exceptions:

	95 # 1. There are 20 8-bit registers and three groups (two of them overlap)

	96 # 2. There are eight X87 and MMX registers thus two groups are identical

	97 #

	98 # We use typical register from a group to name the whole group. Most groups

	99 # use first register, but 'spl' group uses fifth register because it's first

	100 # four registers are the same as 'al' group. We use mnemonic name 'mmalt'

	101 # to represent the "evil mirror" of the 'mm0' group.

	102 REGISTERS = {

	103 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],

	104 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],

	105 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],

	106 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],

	107 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],

	108 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],

	109 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],

	110 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],

	111 'r8': [ 'r{}'.format(N) for N in range(8,16) ],

	112 'mm0': [ 'mm{}'.format(N) for N in range(8) ],

	113 'mmalt': [ 'mm{}'.format(N) for N in range(8) ],

	114 'st(0)': [ 'st({})'.format(N) for N in range(8) ],

	115 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],

	116 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],

	117 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],

	118 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]

	119 }

	120

	121

	122 NOP = 0x90

	123

	124

	125 def PadToBundleSize(bytes):

	126 assert len(bytes) <= validator.BUNDLE_SIZE

	127 return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))

	128

	129

	130 # In x86-64 mode we have so-called 'restricted register' which is used to

	131 # tie two groups together. Some instructions require particular value to

	132 # be stored in this variable, while some accept any non-special restricted

	133 # register (%ebp and %esp are special because they can only be accepted by

	134 # a few 'special' instructions).

	135 #

	136 # You can find more details in the "NaCl SFI model on x86-64 systems" manual.

	137 #

	138 # We try to feed all possible 'restricted registers' into validator and then

	139 # classify the instruction using this map. If set of acceptable 'restricted

	140 # registers' is not here, then it's an error in validator.

	141 ACCEPTABLE_X86_64_INPUTS = {

	142 0x00001: 'input_rr=%eax',

	143 0x00002: 'input_rr=%ecx',

	144 0x00004: 'input_rr=%edx',

	145 0x00008: 'input_rr=%ebx',

	146 0x00010: 'input_rr=%esp',

	147 0x00020: 'input_rr=%ebp',

	148 0x00040: 'input_rr=%esi',

	149 0x00080: 'input_rr=%edi',

	150 0x00100: 'input_rr=%r8d',

	151 0x00200: 'input_rr=%r9d',

	152 0x00400: 'input_rr=%r10d',

	153 0x00800: 'input_rr=%r11d',

	154 0x01000: 'input_rr=%r12d',

	155 0x02000: 'input_rr=%r13d',

	156 0x04000: 'input_rr=%r14d',

	157 0x08000: 'input_rr=%r15d',

	158 0x1ffcf: 'input_rr=any_nonspecial'

	159 }

	160

	161 # Any instruction must produce either None or one of fifteen registers as an

	162 # output 'restricted register' value. 'r15d' is NOT acceptable as an output.

	163 ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple(

	164 '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1])

	165

	166

	167 def ValidateInstruction(instruction, validator_inst):

	168 bundle = ''.join(map(chr, PadToBundleSize(instruction)))

	169 if options.bitness == 32:

	170 result = validator_inst.ValidateChunk(bundle, bitness=32)

	171 return result, []

	172 else:

	173 valid_inputs = 0

	174 known_final_rr = None

	175 output_rr = None

	176 # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array

	177 # above.

	178 for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]):

	179 valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(

	180 bundle, len(instruction), initial_rr)

	181 if valid:

	182 # final_rr should not depend on input_rr

	183 assert valid_inputs == 0 or known_final_rr == final_rr

	184 valid_inputs \|= 1 << bit

	185 known_final_rr = final_rr

	186 # If nothing is accepted then instruction is not valid. Easy and simple.

	187 if valid_inputs == 0: return False, []

	188 # If returned value in unacceptable we'll get IndexError here and this

	189 # test will fail

	190 if known_final_rr is not None:

	191 output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr]

	192 # If collected valid_inputs are unacceptable we'll get KeyError here and

	193 # this test will fail

	194 return True, [ACCEPTABLE_X86_64_INPUTS[valid_inputs],

	195 'output_rr={}'.format(output_rr)]

	196

	197

	198 class WorkerState(object):

	199 def __init__(self, prefix, validator):

	200 self.total_instructions = 0

	201 self.num_valid = 0

	202 self.validator = validator

	203 self.output = set()

	204 self.trace = []

	205

	206

	207 def ReceiveInstruction(self, bytes):

	208 self.total_instructions += 1

	209 result, notes = ValidateInstruction(bytes, self.validator)

	210 if result:

	211 self.num_valid += 1

	212 dis = self.validator.DisassembleChunk(

	213 ''.join(map(chr, bytes)),

	214 bitness=options.bitness)

	215 for line_nr in xrange(len(dis)):

	216 dis[line_nr] = str(dis[line_nr])

	217 assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '

	218 assert dis[line_nr][-1:] == ')'

	219 dis[line_nr] = dis[line_nr][17:-1]

	220 # If %rip is involved then comment will be different depending on the

	221 # instruction length. Eliminate it.

	222 if '(%rip)' in dis[0]:

	223 dis[0] = re.sub(' # 0x[ ][0-9a-fA-F]', '', dis[0])

	224 # Zero displacements are represented as 0x0 for all instructions except

	225 # jumps where they disassembled as non-zero due to %eip/%rip-relative

	226 # addressing. We replace this displacement with %eip/%rip to simplify

	227 # compression.

	228 if ' 0x' in dis[0] and ' 0x0' not in dis[0]:

	229 for bytes in xrange(1, 16):

	230 dis[0] = re.sub(

	231 '(' + '(?:[0-9a-fA-F][0-9a-fA-F] ){' + str(bytes) + '} .* )' +

	232 hex(bytes) + '(.*)',

	233 '\\1%eip\\2' if options.bitness == 32 else '\\1%rip\\2',

	234 dis[0]);

	235 dis[0] = 'Instruction: ' + dis[0]

	236 dis += notes

	237 self.output.add('; '.join(dis))

	238

	239

	240 def RecordTrace(self, compressor_nr, instruction):

	241 self.trace.append((compressor_nr, instruction))

	242

	243

	244 # Compressor has three slots: regex (which picks apart given instruction),

	245 # subst (which is used to denote compressed version) and replacements (which

	246 # are used to generate set of instructions from a given code).

	247 #

	248 # Example compressor:

	249 # regex = '.?[0-9a-fA-F]([0-7]) \\w (%e(?:[abcd]x\|[sb]p\|[sd]i)).*()'

	250 # subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode')

	251 # replacements = ((0, '%eax'), (1, '%ecx'), (2, '%edx'), (3, '%ebx')

	252 # (4, '%esp'), (5, '%ebp'), (6, '%esi'), (7, '%edi'))

	253 #

	254 # When faced with instriuction '40 inc %eax' it will capture the following

	255 # pieces of said instruction: '4[0] inc [%eax]'.

	256 #

	257 # Then it will produce the following eight instructions:

	258 # '40 inc %eax'

	259 # '41 inc %ecx'

	260 # '42 inc %edx'

	261 # '43 inc %ebx'

	262 # '44 inc %esp'

	263 # '45 inc %ebp'

	264 # '46 inc %esi'

	265 # '47 inc %edi'

	266 #

	267 # If all these instructions can be found in a set of instructions then

	268 # compressor will remove them from said set and will insert one replacement

	269 # "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'.

	270 #

	271 # Note that last group is only used in the replacement. It's used to grab marks

	272 # added by previous compressors and to replace them with a new mark.

	273 class Compressor(object):

	274 __slots__ = [

	275 'regex',

	276 'subst',

	277 'replacements'

	278 ]

	279

	280 def __init__(self, regex, subst, replacements=None):

	281 self.regex = re.compile(regex)

	282 self.subst = subst

	283 self.replacements = [] if replacements is None else replacements

	284

	285

	286 def CompressionTemplate(instruction, match, mark):

	287 """ Replace all match groups with the mark. """

	288 pos = 0

	289 format_str = ''

	290 for group in range(1, len(match.groups())):

	291 format_str += instruction[pos:match.start(group)] + mark

	292 pos = match.end(group)

	293 return format_str + instruction[pos:match.start(len(match.groups()))]

	294

	295

	296 def CompressOneMatch(instructions, instruction, match, compressor):

	297 format_str = CompressionTemplate(instruction, match, '{}')

	298 subset = set()

	299 for replacement in compressor.replacements:

	300 replacement_str = format_str.format(*replacement)

	301 if not replacement_str in instructions:

	302 return (False, instructions)

	303 subset.add(replacement_str)

	304 instructions -= subset

	305 instructions.add((format_str + '{}').format(*compressor.subst))

	306 return (True, instructions)

	307

	308

	309 def CompressOneInstruction(instructions, compressors, split, cache):

	310 sorted_instructions = (sorted(i for i in instructions if i > split) +

	311 sorted(i for i in instructions if i < split))

	312 for instruction in sorted_instructions:

	313 if instruction in cache:

	314 compressors_list = cache[instruction]

	315 for compressor_nr, match, compressor in compressors_list:

	316 result, instructions = CompressOneMatch(

	317 instructions, instruction, match, compressor)

	318 if result:

	319 return (instructions, compressor_nr, instruction)

	320 else:

	321 compressors_list = []

	322 for compressor_nr, compressor in enumerate(compressors):

	323 match = compressor.regex.match(instruction)

	324 if match:

	325 compressors_list.append((compressor_nr, match, compressor))

	326 result, instructions = CompressOneMatch(

	327 instructions, instruction, match, compressor)

	328 if result:

	329 return (instructions, compressor_nr, instruction)

	330 cache[instruction] = compressors_list

	331 return (instructions, False, False)

	332

	333

	334 def Compressed(instructions, compressors, show_progress):

	335 split = ''

	336 cache = {}

	337 while True:

	338 instructions, rule, split = CompressOneInstruction(

	339 instructions, compressors, split, cache)

	340 if rule is False: break

	341 show_progress(rule, split)

	342 return instructions

	343

	344

	345 def Worker((prefix, state_index)):

	346 worker_state = WorkerState(prefix, worker_validator)

	347

	348 try:

	349 dfa_traversal.TraverseTree(

	350 dfa.states[state_index],

	351 final_callback=worker_state.ReceiveInstruction,

	352 prefix=prefix,

	353 anyfield=0)

	354 if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions

	355 worker_state.output = Compressed(set(worker_state.output),

	356 compressors,

	357 worker_state.RecordTrace)

	358 except Exception as e:

	359 traceback.print_exc() # because multiprocessing imap swallows traceback

	360 raise

	361

	362 return (

	363 prefix,

	364 worker_state.total_instructions,

	365 worker_state.num_valid,

	366 worker_state.output,

	367 worker_state.trace)

	368

	369

	370 def ParseOptions():

	371 parser = optparse.OptionParser(usage='%prog [options] xmlfile')

	372

	373 parser.add_option('--bitness',

	374 choices=['32', '64'],

	375 help='The subarchitecture: 32 or 64')

	376 parser.add_option('--validator_dll',

	377 help='Path to librdfa_validator_dll')

	378 parser.add_option('--decoder_dll',

	379 help='Path to librdfa_decoder_dll')

	380

	381 options, args = parser.parse_args()

	382 options.bitness = int(options.bitness)

	383

	384 if len(args) != 1:

	385 parser.error('specify one xml file')

	386

	387 (xml_file, ) = args

	388

	389 return options, xml_file

	390

	391

	392 # Version suitable for use in regular expressions

	393 REGISTERS_RE = REGISTERS.copy()

	394 REGISTERS_RE['st(0)'] = [ 'st\${}\$'.format(N) for N in range(8) ]

	395 REGISTERS_RE['st\$0\$'] = REGISTERS_RE['st(0)']

	396

	397 # Index names in 'natual' order (as defined by IA32/x86-64 ABI)

	398 INDEXES = {

	399 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],

	400 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],

	401 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]

	402 }

	403 # Register which can not be used as base in 64-bit mode in all incarnations

	404 X86_64_BASE_REGISTERS = set([

	405 '%spl', '%bpl', '%r15b',

	406 '%sp', '%bp', '%r15w',

	407 '%esp', '%ebp', '%r15d',

	408 '%rsp', '%rbp', '%r15',

	409 '%rip'

	410 ])

	411

	412

	413 def InstructionIsDangerous(input, output, register_write,

	414 writes_to, memory_accessed=False,

	415 base_text='%riz', index_text='%riz'):

	416 """ Check if instruction with given replacements will be dangerous

	417

	418 Args:

	419 input: input argument

	420 output: output argument

	421 register_write: three-state selector

	422 'sandbox' - instruction can be used to produce "restricted register"

	423 'protect' - instruction can damage output, protect "special registers"

	424 'ignore' - instruction does not affect it's operands (e.g. test) or

	425 is used with non-GP registers (X87, MMX, XMM, etc)

	426 memory_accessed: True if instruction accesses memory

	427 base: base register (if memory is accessed)

	428 index: index register (if memory is accessed)

	429

	430 Returns:

	431 True if instruction should be rejected by validator

	432 """

	433 if memory_accessed:

	434 if base_text not in X86_64_BASE_REGISTERS:

	435 return True

	436 if index_text in X86_64_BASE_REGISTERS - set(['%r15']):

	437 return True

	438 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	439 return True

	440 if register_write == 'sandbox' and output == '%r15d':

	441 return True

	442 if writes_to == 'both' and input in X86_64_BASE_REGISTERS:

	443 return True

	444 return False

	445

	446

	447 def AppendOperandsReplacement(replacement, rm_text, reg, modrm, writes_to):

	448 """ Appends replacement text to replacement list

	449

	450 Args:

	451 replacement: replacement list

	452 rm_text: replacement for rm field

	453 reg: register kind (or None if reg field is used as opcode extension)

	454 modrm: modrm byte

	455 writes_to: three-state selector

	456 'reg' - instruction uses rm as source, reg as destination

	457 'rm' - instruction uses reg as source, rm as destination

	458 'both' - instruction writes to both reg and rm

	459

	460 Returns:

	461 input: textual representation of input argument

	462 output: textual representation of output argument

	463

	464 Side-effect:

	465 output (if reg is None) or (input, output) tuple (if reg is not None)

	466 are added to replacement list.

	467 """

	468 if reg is None:

	469 assert writes_to == 'rm'

	470 input, output = None, rm_text

	471 replacement.append(output)

	472 else:

	473 reg_field = (modrm >> 3) & 0x07

	474 reg_text = '%' + REGISTERS[reg][reg_field]

	475 if writes_to == 'reg':

	476 input, output = rm_text, reg_text

	477 else: # rm, both

	478 input, output = reg_text, rm_text

	479 replacement.extend([input, output])

	480 return input, output

	481

	482

	483 def ModRMRegisterReplacements(rm, reg=None, writes_to='rm', opcode_bits=0,

	484 register_write='ignore'):

	485 """Creates replacement tuples list for register-to-register instructions

	486

	487 Args:

	488 rm: rm operand kind (see REGISTERS array)

	489 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	490 writes_to: three-state selector

	491 'reg' - instruction uses rm as source, reg as destination

	492 'rm' - instruction uses reg as source, rm as destination

	493 'both' - instruction writes to both reg and rm

	494 opcode_bits: opcode extensions code (used when reg is None)

	495 register_write: three-state selector

	496 'sandbox' - instruction can be used to produce "restricted register"

	497 'protect' - instruction can damage output, protect "special registers"

	498 'ignore' - instruction does not affect it's operands (e.g. test) or

	499 is used with non-GP registers (X87, MMX, XMM, etc)

	500 Returns:

	501 List of replacement tuples

	502 """

	503 # Reg field can be used either as reg or as opcode extension, but not both

	504 assert reg is None or opcode_bits == 0

	505

	506 output_key = (options.bitness, reg, rm, writes_to, opcode_bits,

	507 register_write)

	508 if output_key in ModRMRegisterReplacements.replacements:

	509 return ModRMRegisterReplacements.replacements[output_key]

	510

	511 replacements = []

	512

	513 # Two upper bits of ModR/M byte (mod field) must be equal to 11

	514 # This gives us range from 0xc0 to 0xff but we are going from the

	515 # end to make rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	516 if reg is None:

	517 # reg field is used as opcode extension

	518 byte_range = [byte

	519 for byte in range(0xff, 0xbf, -1)

	520 if (byte >> 3) & 0x7 == opcode_bits]

	521 else:

	522 byte_range = range(0xff, 0xbf, -1)

	523

	524 for modrm in byte_range:

	525 rm_field = (modrm & 0x07)

	526 rm_text = '%' + REGISTERS[rm][rm_field]

	527 byte_text = '{:02x}'.format(modrm)

	528 replacement = [byte_text]

	529 input, output = AppendOperandsReplacement(

	530 replacement, rm_text, reg, modrm, writes_to)

	531 if options.bitness == 64:

	532 replacement.append('any_nonspecial') # input_rr

	533 replacement.append(output if register_write == 'sandbox' else None)

	534 if InstructionIsDangerous(input, output, register_write, writes_to):

	535 continue

	536 replacements.append(tuple(replacement))

	537 ModRMRegisterReplacements.replacements[output_key] = tuple(replacements)

	538 return ModRMRegisterReplacements.replacements[output_key]

	539 ModRMRegisterReplacements.replacements = {}

	540

	541

	542 def BaseOnlyMemoryOperand(modrm, base):

	543 """Creates replacement tuples list for register-to-memory instructions

	544 (base only, no SIB)

	545

	546 Args:

	547 modrm: modrm byte

	548 base: register kind for base

	549 Returns:

	550 bytes_text: replacement for "bytes" group

	551 rm_text: textual representation of "rm" argument

	552 base_text: textual representation of "base" register

	553 """

	554 mod_field = (modrm >> 6) & 0x03

	555 rm_field = (modrm & 0x07)

	556 base_text = '%' + REGISTERS[base][rm_field]

	557 # If RM field == %rbp and MOD field is zero then it's absolute address

	558 # in 32-bit mode and %rip-based address in 64-bit mode

	559 if mod_field == 0 and rm_field == validator.REG_RBP:

	560 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	561 rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'

	562 base_text = '%eiz' if options.bitness == 32 else '%rip'

	563 # Memory access with just a base register

	564 elif mod_field == 0:

	565 bytes_text = '{:02x}'.format(modrm)

	566 rm_text = '({})'.format(base_text)

	567 # Memory access with base and 8bit offset

	568 elif mod_field == 1:

	569 bytes_text = '{:02x} 00'.format(modrm)

	570 rm_text = '0x0({})'.format(base_text)

	571 # Memory access with base and 32bit offset

	572 else: # mod_field == 2

	573 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	574 rm_text = '0x0({})'.format(base_text)

	575 return bytes_text, rm_text, base_text

	576

	577

	578 def SIBMemoryOperand(modrm, sib, base, index):

	579 """Creates replacement tuples list for register-to-memory instructions

	580 (base only, no SIB)

	581

	582 Args:

	583 modrm: modrm byte

	584 base: register kind for base

	585 Returns:

	586 bytes_text: replacement for "bytes" group

	587 rm_text: textual representation of "rm" argument

	588 base_text: textual representation of "base" register

	589 index_text: textual representation of "index" register

	590 """

	591 mod_field = (modrm >> 6) & 0x03

	592 scale_field = (sib >> 6) & 0x03

	593 index_field = (sib >> 3) & 0x07

	594 base_field = (sib & 0x07)

	595 index_text = '%' + INDEXES[index][index_field]

	596 base_text = '%' + REGISTERS[base][base_field]

	597 scale_text = str(1 << scale_field)

	598 # If BASE is %rbp and MOD == 0 then index with 32bit offset is used

	599 if mod_field == 0 and base_field == validator.REG_RBP:

	600 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	601 # In 64-bit mode this case is displayed as simple absolute address

	602 # In 32-bit mode there are another, shorter, form, but it's used
	halyavin 2013/11/18 14:13:10 Remove this comment since it confuses the reader. Remove this comment since it confuses the reader. khim 2013/11/19 09:26:48 Done. Show quoted text On 2013/11/18 14:13:10, halyavin wrote: > Remove this comment since it confuses the reader. Done.
	603 # for %rip-relative addressing in 64-bit mode

	604 if (options.bitness == 64 and

	605 index_text == '%riz' and

	606 scale_text == '1'):

	607 rm_text = '0x0'

	608 else:

	609 rm_text = '0x0(,{},{})'.format(index_text, scale_text)

	610 # There are no base in this case

	611 base_text = '%eiz' if options.bitness == 32 else '%riz'

	612 # Memory access with base and index (no offset)

	613 elif mod_field == 0:

	614 bytes_text = '{:02x} {:02x}'.format(modrm, sib)

	615 rm_text = '({},{},{})'.format(base_text, index_text, scale_text)

	616 # Memory access with base, index and 8bit offset

	617 elif mod_field == 1:

	618 bytes_text = '{:02x} {:02x} 00'.format(modrm, sib)

	619 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	620 # Memory access with base, index and 32bit offset

	621 elif mod_field == 2:

	622 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	623 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	624 # Pretty-printing of access via %rsp (or %r12)

	625 if (base_field == validator.REG_RSP and

	626 index_text in ('%eiz', '%riz') and

	627 scale_text == '1'):

	628 if mod_field == 0: # no offset

	629 rm_text = '({})'.format(base_text)

	630 else: # 8-bit or 32-bit offset

	631 rm_text = '0x0({})'.format(base_text)

	632 return bytes_text, rm_text, base_text, index_text

	633

	634

	635 def ModRMMemoryReplacements(reg=None, writes_to='rm', opcode_bits=0,

	636 memory_accessed=True, register_write='ignore',

	637 base_r8=False, index_r8=False):

	638 """Creates replacement tuples list for register-to-memory instructions

	639

	640 Args:

	641 rm: rm operand kind (see REGISTERS array)

	642 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	643 writes_to: three-state selector

	644 'reg' - instruction uses rm as source, reg as destination

	645 'rm' - instruction uses reg as source, rm as destination

	646 'both' - instruction writes to both reg and rm

	647 opcode_bits: opcode extensions code (used when reg is None)

	648 memory_accessed: True if instruction accesses memory

	649 register_write: three-state selector

	650 'sandbox' - instruction can be used to produce "restricted register"

	651 'protect' - instruction can damage output, protect "special registers"

	652 'ignore' - instruction does not affect it's operands (e.g. test) or

	653 is used with non-GP registers (X87, MMX, XMM, etc)

	654 index_r8: True if REX.X bit in the instruction set to 1

	655

	656 Returns:

	657 List of replacement tuples

	658 """

	659 # Reg field can be used either as reg or as opcode extension, but not both

	660 assert reg is None or opcode_bits == 0

	661

	662 output_key = (options.bitness, reg, writes_to, opcode_bits,

	663 base_r8, index_r8, memory_accessed, register_write)

	664 if output_key in ModRMMemoryReplacements.replacements:

	665 return ModRMMemoryReplacements.replacements[output_key]

	666

	667 if options.bitness == 32:

	668 base = 'eax'

	669 index = 'eax'

	670 else:

	671 base = 'r8' if base_r8 else 'rax'

	672 index = 'r8' if index_r8 else 'rax'

	673

	674 replacements = []

	675

	676 # Two upper bits of ModR/M byte (mod field) must be equal to 00, 01, or 10

	677 # This gives us range from 0x00 to 0xbf but we are going from the end to make

	678 # rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	679 if reg is None:

	680 # reg field is used as opcode extension

	681 byte_range = [byte

	682 for byte in range(0xbf, -1, -1)

	683 if (byte >> 3) & 0x7 == opcode_bits]

	684 else:

	685 byte_range = range(0xbf, -1, -1)

	686

	687 for modrm in byte_range:

	688 # If RM field != %rsp then there are no SIB byte

	689 if (modrm & 0x07) != validator.REG_RSP:

	690 bytes_text, rm_text, base_text = BaseOnlyMemoryOperand(modrm, base)

	691 replacement = [bytes_text]

	692 input, output = AppendOperandsReplacement(

	693 replacement, rm_text, reg, modrm, writes_to)

	694 if options.bitness == 64:

	695 replacement.append('any_nonspecial')

	696 # xchg with memory can not be used to sandbox it's operand, only

	697 # instriuction which explicitly writes to reg operand can do that

	698 if writes_to == 'reg' and register_write == 'sandbox':
	halyavin 2013/11/19 08:57:50 The same as below The same as below khim 2013/11/19 09:26:48 Done. Show quoted text On 2013/11/19 08:57:50, halyavin wrote: > The same as below Done.
	699 replacement.append(output)

	700 else:

	701 replacement.append(None)

	702 if InstructionIsDangerous(input, output, register_write, writes_to,

	703 memory_accessed, base_text):

	704 continue

	705 replacement = tuple(replacement)

	706 replacements.append(replacement)
	halyavin 2013/11/19 08:57:50 We wanted to join this lines. We wanted to join this lines. khim 2013/11/19 09:26:48 Done. Show quoted text On 2013/11/19 08:57:50, halyavin wrote: > We wanted to join this lines. Done.
	707 else:

	708 # If RM field == %rsp then we have SIB byte

	709 for sib in xrange(0x100):

	710 bytes_text, rm_text, base_text, index_text = SIBMemoryOperand(

	711 modrm, sib, base, index)

	712 replacement = [bytes_text]

	713 input, output = AppendOperandsReplacement(

	714 replacement, rm_text, reg, modrm, writes_to)

	715 if options.bitness == 64:

	716 if not memory_accessed or index_text == '%riz':

	717 replacement.append('any_nonspecial')

	718 else:

	719 if index_r8:

	720 # Convert %r8 to %r8d, %r9 to %r9d, etc

	721 replacement.append(index_text + 'd')

	722 else:

	723 # Convert %rax to %eax, %rsp to %esp, etc

	724 replacement.append('%e' + index_text[2:])

	725 # xchg with memory can not be used to sandbox it's operand, only
	halyavin 2013/11/18 14:13:10 Move comment to the else clause. Move comment to the else clause. khim 2013/11/19 09:26:48 Done. Show quoted text On 2013/11/18 14:13:10, halyavin wrote: > Move comment to the else clause. Done.
	726 # instruction which explicitly writes to reg operand can do that

	727 if writes_to == 'reg' and register_write == 'sandbox':
	halyavin 2013/11/18 14:13:10 Add comment that writes_to == 'reg' means that out Add comment that writes_to == 'reg' means that output is a register. khim 2013/11/19 09:26:48 Done. Show quoted text On 2013/11/18 14:13:10, halyavin wrote: > Add comment that writes_to == 'reg' means that output is a register. Done.
	728 replacement.append(output)

	729 else:

	730 replacement.append(None)

	731 if InstructionIsDangerous(input, output, register_write, writes_to,

	732 memory_accessed, base_text, index_text):

	733 continue

	734 replacements.append(tuple(replacement))

	735 ModRMMemoryReplacements.replacements[output_key] = tuple(replacements)

	736 return ModRMMemoryReplacements.replacements[output_key]

	737 ModRMMemoryReplacements.replacements = {}

	738

	739

	740 def PrepareCompressors():

	741 global compressors

	742 global main_compressors

	743 global register_compressors

	744 global memory_compressors

	745

	746 # "Larger" compressors should be tried first, then "smaller" ones.

	747 main_compressors = []

	748 register_compressors = []

	749 memory_compressors = []

	750 extra_compressors = []

	751

	752 # Map from "REX bit off" group of registers to "REX bit on" group of registers

	753 r8 = {

	754 'al': 'r8b',

	755 'ax': 'r8w',

	756 'eax': 'r8d',

	757 'rax': 'r8',

	758 'mm0': 'mmalt',

	759 'xmm0': 'xmm8',

	760 'ymm0': 'ymm8'

	761 }

	762

	763 if options.bitness == 32:

	764 register_kinds = ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')

	765 register_kind_pairs = (

	766 ( 'al', 'al'),

	767 ( 'ax', 'al'),

	768 ( 'ax', 'ax'),

	769 ( 'eax', 'al'),

	770 ( 'eax', 'ax'),

	771 ( 'eax', 'eax'),

	772 ( 'eax', 'mm0'),

	773 ( 'mm0', 'eax'),

	774 ( 'eax', 'xmm0'),

	775 ('xmm0', 'eax'),

	776 ( 'mm0', 'mm0'),

	777 ( 'mm0', 'xmm0'),

	778 ('xmm0', 'mm0'),

	779 ('xmm0', 'xmm0'),

	780 ('xmm0', 'ymm0'),

	781 ('ymm0', 'xmm0'),

	782 ('ymm0', 'ymm0')

	783 )

	784 else:

	785 register_kinds = ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0',

	786 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8')

	787 register_kind_pairs = (

	788 ( 'al', 'al'),

	789 ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),

	790 ( 'ax', 'al'),

	791 ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),

	792 ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),

	793 ( 'eax', 'al'),

	794 ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),

	795 ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),

	796 ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),

	797 ( 'rax', 'al'),

	798 ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),

	799 ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'),

	800 ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'),

	801 ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),

	802 ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'),

	803 ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'),

	804 ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'),

	805 ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'),

	806 ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),

	807 ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),

	808 ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),

	809 ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),

	810 ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'),

	811 ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'),

	812 ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'),

	813 ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),

	814 ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),

	815 ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),

	816 ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')

	817 )

	818

	819 # Largest compressors: both reg and rm fields are used

	820 for reg, rm in register_kind_pairs:

	821 start_reg = REGISTERS[reg][0]

	822 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	823 start_rm = REGISTERS[rm][0]

	824 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

	825 instruction_kinds = [

	826 # Normal instructions with two operands (rm to reg)

	827 ({'writes_to':'reg'}, '', ' # rm to reg', ''),

	828 # Normal instructions with two operands (reg to rm)

	829 ({'writes_to':'rm'}, '', ' # reg to rm', '')

	830 ]

	831 # Lea in 64 bit mode is truly unique instruction for now

	832 if options.bitness == 64 and reg in ('eax', 'r8d', 'rax', 'r8'):

	833 instruction_kinds = [

	834 ({'writes_to':'reg', 'memory_accessed':False,

	835 'register_write':'sandbox' if reg in ('eax', 'r8d') else 'protect'},

	836 ' # lea', ' # rm to reg; lea', ' # lea')] + instruction_kinds

	837 # There are few more forms in 64 bit case (rm to reg)

	838 if options.bitness == 64 and reg in ('eax', 'r8d'):

	839 # Zero-extending version.

	840 instruction_kinds.append(

	841 ({'writes_to':'reg', 'register_write':'sandbox'},

	842 '', ' # rm to reg', ''))

	843 # More forms in 64 bit case (reg to rm)

	844 if options.bitness == 64 and rm in ('eax', 'r8d'):

	845 # Zero-extending version.

	846 instruction_kinds.append(

	847 ({'writes_to':'rm', 'register_write':'sandbox'},

	848 '', ' # reg to rm', ''))

	849 # Zero-extending xchg/xadd

	850 instruction_kinds.append(

	851 ({'writes_to':'both', 'register_write':'sandbox'},

	852 ' # write to both',

	853 ' # reg to rm; write to both',

	854 ' # write to both'))

	855 # Still more forms for 64 bit case (rm to reg).

	856 if options.bitness == 64 and reg in ('al', 'spl', 'ax', 'eax', 'rax',

	857 'r8b', 'r8w', 'r8d', 'r8'):

	858 # Dangerous instructions (rm to reg)

	859 instruction_kinds.append(

	860 ({'writes_to':'reg', 'register_write':'protect'},

	861 '', ' # rm to reg', ''))

	862 # Still more forms for 64 bit case (reg to rm)

	863 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

	864 'r8b', 'r8w', 'r8d', 'r8'):

	865 # Dangerous instructions (reg to rm)

	866 instruction_kinds.append(

	867 ({'writes_to':'rm', 'register_write':'protect'},

	868 '', ' # reg to rm', ''))

	869 # Dangerous xchg/xadd

	870 instruction_kinds.append(

	871 ({'writes_to':'both', 'register_write':'protect'},

	872 ' # write to both',

	873 ' # reg to rm; write to both',

	874 ' # write to both'))

	875 # 3DNow! instructions

	876 instruction_kinds.append(

	877 ({'writes_to':'reg', '3dnow':'yes'}, '', ' # rm to reg', ''))

	878 for args, notes, notes_register, notes_memory in instruction_kinds:

	879 regex = '(?: 00)*'

	880 # Additional byte is opcode extension with 3DNow! instructions.

	881 if '3dnow' in args:

	882 regex = ' [0-9a-fA-F][0-9a-fA-F]'

	883 args.pop('3dnow')

	884 regex += ' (?:lock )?\\w* (?:\\$0x0,\|\\$0x0,\\$0x0,\|%cl,\|%xmm0,)?'

	885 # We only need to process ModR/M+SIB '04 04' or '04 07' here

	886 if options.bitness == 32:

	887 regex_mem = '\$%esp,%eax,1\$'

	888 else:

	889 regex_mem = '\$(?:%rsp\|%r15),(?:%rax\|%r8),1\$'

	890 output = None

	891 output_note = None

	892 if args['writes_to'] == 'reg':

	893 regex += '(%' + REGISTERS[rm][0] + '\|' + regex_mem + ')'

	894 regex += ',(%' + REGISTERS[reg][0] + ')'

	895 if 'register_write' in args and args['register_write'] == 'sandbox':

	896 assert reg in ('eax', 'r8d')

	897 output = '%' + reg + '\|None'

	898 output_note = '[%eax..%edi]' if reg == 'eax' else '[%r8d..%r14d]'

	899 subst = (

	900 'XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),

	901 '[%{}..%{}]'.format(start_reg, end_reg), notes)

	902 subst_register = (

	903 'XX', '[%{}..%{}]'.format(start_rm, end_rm),

	904 '[%{}..%{}]'.format(start_reg, end_reg), notes_register)

	905 subst_memory = (

	906 'XX', '[memory]',

	907 '[%{}..%{}]'.format(start_reg, end_reg), notes_memory)

	908 else:

	909 regex += '(%' + REGISTERS[reg][0] + ')'

	910 regex += ',(%' + REGISTERS[rm][0] + '\|' + regex_mem + ')'

	911 if 'register_write'in args and args['register_write'] == 'sandbox':

	912 assert rm in ('eax', 'r8d')

	913 output = '%' + rm + '\|None'

	914 output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	915 subst = (

	916 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	917 '[%{}..%{} or memory]'.format(start_rm, end_rm), notes)

	918 subst_register = (

	919 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	920 '[%{}..%{}]'.format(start_rm, end_rm), notes_register)

	921 subst_memory = (

	922 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	923 '[memory]', notes_memory)

	924 regex += '.*'

	925 if options.bitness == 64:

	926 regex += '; input_rr=(%eax\|%r8d\|any_nonspecial)'

	927 regex += '; output_rr=({})'.format(output)

	928 if 'memory_accessed' in args:

	929 input_note = 'any_nonspecial'

	930 input_note_r8 = 'any_nonspecial'

	931 else:

	932 input_note = '[%eax..%edi]'

	933 input_note_r8 = '[%r8d..%r15d]'

	934 subst_r8 = subst[0:-1] + (input_note_r8, output_note) + subst[-1:]

	935 subst = subst[0:-1] + (input_note, output_note) + subst[-1:]

	936 subst_memory_r8 = subst_memory[0:-1] + (

	937 input_note_r8, output_note) + subst_memory[-1:]

	938 subst_memory = subst_memory[0:-1] + (

	939 input_note, output_note) + subst_memory[-1:]

	940 subst_register = subst_register[0:-1] + (

	941 'any_nonspecial', output_note) + subst_register[-1:]

	942 regex += '()'

	943 base_r8 = rm in r8.values()

	944 memory_replacement = ModRMMemoryReplacements(

	945 reg=reg, base_r8=base_r8, **args)

	946 memory_compressors.append(Compressor(

	947 '.*?(04 0[47])' + regex, subst_memory, memory_replacement))

	948 if options.bitness == 64:

	949 memory_replacement_r8 = ModRMMemoryReplacements(

	950 reg=reg, base_r8=base_r8, index_r8=True, **args)

	951 memory_compressors.append(Compressor(

	952 '.*?(04 0[47])' + regex, subst_memory_r8, memory_replacement_r8))

	953 # Instructions with no memory access are instructions which are doing

	954 # something with memory address (e.g. lea) and as such they don't have

	955 # non-memory forms.

	956 if not 'memory_accessed' in args:

	957 register_replacement = ModRMRegisterReplacements(rm=rm, reg=reg, **args)

	958 register_compressors.append(Compressor(

	959 '.*?(c0)' + regex, subst_register, register_replacement))

	960 main_replacement = register_replacement + memory_replacement

	961 main_compressors.append(Compressor(

	962 '.*?(04 0[47])' + regex, subst, main_replacement))

	963 if options.bitness == 64:

	964 main_replacement_r8 = register_replacement + memory_replacement_r8

	965 main_compressors.append(Compressor(

	966 '.*?(04 0[47])' + regex, subst_r8, main_replacement_r8))

	967

	968 # Smaller compressors: only rm field is used.

	969 for rm in register_kinds:

	970 start_rm = REGISTERS[rm][0]

	971 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

	972 for opcode_bits in xrange(8):

	973 XX_byte_mark = 'XX/' + str(opcode_bits)

	974 instruction_kinds = [

	975 # The most basic form

	976 ({}, '', '', '')

	977 ]

	978 if options.bitness == 64:

	979 # No memory access (e.g. prefetch)

	980 instruction_kinds = [

	981 ({'memory_accessed':False}, '', '', '')] + instruction_kinds

	982 # More forms in 64 bit case.

	983 if options.bitness == 64 and rm in ('eax', 'r8d'):

	984 # Zero-extending version.

	985 instruction_kinds.append(

	986 ({'register_write':'sandbox'}, '', '', ''))

	987 # Still more forms for 64 bit case (reg to rm).

	988 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

	989 'r8b', 'r8w', 'r8d', 'r8'):

	990 # Dangerous instructions.

	991 instruction_kinds.append(

	992 ({'register_write':'protect'}, '', '', ''))

	993 for args, notes, notes_register, notes_memory in instruction_kinds:

	994 subst = (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm),

	995 notes)

	996 subst_register = (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm),

	997 notes_register)

	998 subst_memory = (XX_byte_mark, '[memory]',

	999 notes_memory)

	1000 regex = ('(?: 00)* (?:lock )?\\w* (?:\\$0x0,\|%cl,)?'

	1001 '(%' + REGISTERS[rm][0] + '\|' + regex_mem + ').*')

	1002 output = None

	1003 output_note = None

	1004 if options.bitness == 64:

	1005 if 'register_write' in args and args['register_write'] == 'sandbox':

	1006 assert rm in ('eax', 'r8d')

	1007 output = '%' + rm + '\|None'

	1008 output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	1009 regex += '; input_rr=(%eax\|%r8d\|any_nonspecial)'

	1010 regex += '; output_rr=({})'.format(output)

	1011 if 'memory_accessed' in args:

	1012 input_note = 'any_nonspecial'

	1013 input_note_r8 = 'any_nonspecial'

	1014 else:

	1015 input_note = '[%eax..%edi]'

	1016 input_note_r8 = '[%r8d..%r15d]'

	1017 subst_r8 = subst[0:-1] + (input_note_r8, output_note) + subst[-1:]

	1018 subst = subst[0:-1] + (input_note, output_note) + subst[-1:]

	1019 subst_memory_r8 = subst_memory[0:-1] + (

	1020 input_note_r8, output_note) + subst_memory[-1:]

	1021 subst_memory = subst_memory[0:-1] + (

	1022 input_note, output_note) + subst_memory[-1:]

	1023 subst_register = subst_register[0:-1] + (

	1024 'any_nonspecial', output_note) + subst_register[-1:]

	1025 regex += '()'

	1026 base_r8 = rm in r8.values()

	1027 memory_replacement = ModRMMemoryReplacements(

	1028 reg=None, base_r8=base_r8, opcode_bits=opcode_bits, **args)

	1029 memory_compressors.append(Compressor(

	1030 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	1031 subst_memory, memory_replacement))

	1032 if options.bitness == 64:

	1033 memory_replacement_r8 = ModRMMemoryReplacements(

	1034 reg=None, base_r8=base_r8, index_r8=True, opcode_bits=opcode_bits,

	1035 **args)

	1036 memory_compressors.append(Compressor(

	1037 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	1038 subst_memory_r8, memory_replacement_r8))

	1039 # Instructions with no memory access are instructions which are doing

	1040 # something with memory address (e.g. prefetch) and as such they don't

	1041 # have non-memory forms.

	1042 if not 'memory_accessed' in args:

	1043 register_replacement = ModRMRegisterReplacements(

	1044 reg=None, rm=rm, opcode_bits=opcode_bits, **args)

	1045 register_compressors.append(Compressor(

	1046 '.?({:02x})'.format(0xc0 + opcode_bits 8) + regex,

	1047 subst_register, register_replacement))

	1048 main_replacement = register_replacement + memory_replacement

	1049 main_compressors.append(Compressor(

	1050 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	1051 subst, main_replacement))

	1052 if options.bitness == 64:

	1053 main_replacement_r8 = register_replacement + memory_replacement_r8

	1054 main_compressors.append(Compressor(

	1055 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	1056 subst_r8, main_replacement_r8))

	1057

	1058 # Even smaller compressors: only low 3 bits of opcode are used.

	1059 for reg in register_kinds + ('st(0)',):

	1060 start_reg = REGISTERS[reg][0]

	1061 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1062 for opcode in xrange(8):

	1063 for text1, text2, nibble in (

	1064 ('[0..7]', '[8..f]', xrange(8)),

	1065 ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)),

	1066 ('[0..6]', '[8..e]', xrange(7))

	1067 ):

	1068 # Note that we use 2nd line here to avoid ambiguity when opcode is 0x00

	1069 extra_compressors.append(Compressor(

	1070 '.?[0-9a-fA-F](1)(?: 00)'

	1071 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1072 '(%(?:' + REGISTERS_RE[reg][1] + ')).*()',

	1073 (text1, '[%{}..%{}]'.format(start_reg, end_reg), ''),

	1074 tuple(('{:x}'.format(n), '%' + REGISTERS[reg][n])

	1075 for n in nibble)))

	1076 extra_compressors.append(Compressor(

	1077 '.?[0-9a-fA-F](8)(?: 00)'

	1078 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1079 '(%(?:' + REGISTERS_RE[reg][0] + ')).*()',

	1080 (text2, '[%{}..%{}]'.format(start_reg, end_reg), ''),

	1081 tuple(('{:x}'.format(n + 8), '%' + REGISTERS[reg][n])

	1082 for n in nibble)))

	1083 # Another version for 64 bit case

	1084 if options.bitness == 64 and reg in ('eax', 'r8d'):

	1085 extra_compressors.append(Compressor(

	1086 '.?[0-9a-fA-F](1)(?: 00)'

	1087 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1088 '(%(?:' + REGISTERS_RE[reg][1] + ')).*'

	1089 'output_rr=(%(?:'+ REGISTERS_RE[reg][1] + ')).*()',

	1090 tuple([text1] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +

	1091 ['']),

	1092 tuple(['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2

	1093 for n in nibble)))

	1094 extra_compressors.append(Compressor(

	1095 '.?[0-9a-fA-F](8)(?: 00)'

	1096 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1097 '(%(?:' + REGISTERS_RE[reg][0] + ')).*'

	1098 'output_rr=(%(?:'+ REGISTERS_RE[reg][0] + ')).*()',

	1099 tuple([text2] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +

	1100 ['']),

	1101 tuple(['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2

	1102 for n in nibble)))

	1103 compressors = (main_compressors + memory_compressors + register_compressors +

	1104 extra_compressors)

	1105

	1106 # Special compressors: will handle some cosmetic issues.

	1107 #

	1108 # SETxx ignores reg field and thus are described as many separate instructions

	1109 compressors.append(Compressor(

	1110 '.0f 9[0-9a-fA-F] XX(/[0-7]) set.()', ('', ''),

	1111 [('/' + str(i), ) for i in range(8)]))

	1112 # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual

	1113 if options.bitness == 32:

	1114 compressors.append(Compressor(

	1115 '.(XX/1) bswap.ax.*()', ('c[8..f]', ''), [('XX/1', )]))

	1116 else:

	1117 compressors.append(Compressor(

	1118 '.(XX/1) bswap.ax.*()', ('c[89abef]', ''), [('XX/1', )]))

	1119 compressors.append(Compressor(

	1120 '.(XX/1) bswap.r8.*()', ('c[8..e]', ''), [('XX/1', )]))

	1121 # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG

	1122 if options.bitness == 64:

	1123 compressors.append(Compressor(

	1124 '.* (?:cmpxchg\|xadd\|xchg).%al\\.\\.%bh[^#]()$',

	1125 (' # write to both', ), ((), )))

	1126 # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all

	1127 # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.

	1128 # Without this rule these ands comprise 2/3 of the whole output!

	1129 if options.bitness == 32:

	1130 compressors.append(Compressor(

	1131 '.*83 (e0 01 and \\$0x1,%eax)()',

	1132 ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', ' # special and'),

	1133 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]), )

	1134 for i in range(0x01, 0x100) for r in range(8)] +

	1135 [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', )]))

	1136 else:

	1137 for reg in ('eax', 'r8d'):

	1138 start_reg = REGISTERS[reg][0]

	1139 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1140 for index_reg in ('eax', 'r8d'):

	1141 start_index = REGISTERS[index_reg][0]

	1142 end_index = REGISTERS[index_reg][-1]

	1143 compressors.append(Compressor(

	1144 '.83 (e0 01 and \\$0x1,%' + reg + ').'

	1145 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()',

	1146 ('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1147 end_reg), '[%{}..%{}]'.format(start_index, end_index),

	1148 '[%{}..%{}]'.format(start_reg, end_reg),

	1149 ' # special and'),

	1150 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]),

	1151 'any_nonspecial', '%' + REGISTERS[reg][r])

	1152 for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] +

	1153 [('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1154 end_reg), '[%{}..%{}]'.format(start_index, end_index),

	1155 '[%{}..%{}]'.format(start_reg, end_reg))]))

	1156

	1157 # "and $e0" and similar are used to align %rsp. All negative values are

	1158 # accepted by validator and there are 127 of these.

	1159 # Consolidate them into one line.

	1160 if options.bitness == 64:

	1161 compressors.append(Compressor(

	1162 '.(?:81\|83) (?:e4\|e5) (80) (?:00 00 00 \|) and \\$0x(80),%r[bs]p.()',

	1163 ('[80..ff]', '[80..ff]', ' # alignment and'),

	1164 [('{:02x}'.format(i), '{:02x}'.format(i)) for i in range(0x80, 0x100)]))

	1165

	1166 # Merge memory and non-memory access

	1167 if options.bitness == 32:

	1168 letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', ''))

	1169 else:

	1170 letters_and_registers = (

	1171 ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'),

	1172 ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'),

	1173 ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'),

	1174 ('q', 'rax', 'eax'), ('q', 'r8', 'r8d')

	1175 )

	1176 for letter, reg, out_reg in letters_and_registers:

	1177 start_reg = REGISTERS[reg][0]

	1178 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1179 all_regs = '[%{}..%{}]'.format(start_reg, end_reg)

	1180 regs_mark = '[%{}..%{} or memory]'.format(start_reg, end_reg)

	1181 if options.bitness == 64:

	1182 start_out = REGISTERS[out_reg][0]

	1183 end_out = REGISTERS[out_reg][-1 if out_reg[0:2] != 'r8' else -2]

	1184 out_regs = '[%{}..%{}]'.format(start_out, end_out)

	1185 for notes in ('', ' # rm to reg', ' # reg to rm'):

	1186 compressors.append(Compressor(

	1187 '.* \\w(' + letter + ') .(\\[memory]).*()()',

	1188 ('[{}]?'.format(letter), regs_mark, '', ''),

	1189 ((letter, '[memory]', ''), ('', all_regs, notes))))

	1190 if options.bitness == 64:

	1191 for index_reg in ('eax', 'r8d'):

	1192 start_index = REGISTERS[index_reg][0]

	1193 end_index = REGISTERS[index_reg][-1]

	1194 index_regs = '[%{}..%{}]'.format(start_index, end_index)

	1195 for output_rrs in ((None, out_regs), (out_regs, None), (None, None)):

	1196 compressors.append(Compressor(

	1197 '.* \\w(' + letter + ') .(\\[memory]).*; '

	1198 'input_rr=(\\[%[a-z0-9]..%[a-z0-9]\\]); '

	1199 'output_rr=(\\[%[a-z0-9]..%[a-z0-9]\\]\|None)()()',

	1200 ('[{}]?'.format(letter), regs_mark, index_regs,

	1201 output_rrs[0] if output_rrs[0] is not None else output_rrs[1],

	1202 '', ''),

	1203 ((letter, '[memory]', index_regs, output_rrs[0], ''),

	1204 ('', all_regs, 'any_nonspecial', output_rrs[1], notes))))

	1205

	1206 # REX compressors

	1207 if options.bitness == 64:

	1208 # First pretty complex set of compressors to combine versions of REX with

	1209 # three lowest bits in different states.

	1210 register_kind_pairs = (

	1211 ( None, None),

	1212 ( 'al', 'al'), ( 'al', None), (None, 'al'),

	1213 ( 'ax', 'al'), ( 'al', 'ax'),

	1214 ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'),

	1215 ( 'eax', 'al'), ( 'al', 'eax'),

	1216 ( 'eax', 'ax'), ( 'ax', 'eax'),

	1217 ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'),

	1218 ( 'rax', 'al'), ( 'al', 'rax'),

	1219 ( 'rax', 'ax'), ( 'ax', 'rax'),

	1220 ( 'rax', 'eax'), ( 'eax', 'rax'),

	1221 ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'),

	1222 ( 'eax', 'mm0'), ( 'mm0', 'eax'),

	1223 ( 'rax', 'mm0'), ( 'mm0', 'rax'),

	1224 ( 'mm0', 'eax'), ( 'eax', 'mm0'),

	1225 ( 'mm0', 'rax'), ( 'rax', 'mm0'),

	1226 ( 'eax', 'xmm0'),

	1227 ( 'rax', 'xmm0'),

	1228 ('xmm0', 'eax'),

	1229 ('xmm0', 'rax'),

	1230 ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'),

	1231 ( 'mm0', 'xmm0'),

	1232 ('xmm0', 'mm0'),

	1233 ('xmm0', 'xmm0'),

	1234 ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'),

	1235 ('ymm0', 'xmm0'),

	1236 ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'),

	1237 )

	1238 for reg, rm in register_kind_pairs:

	1239 for last_reg, last_rm in ((-1, -1), (-1, -2), (-2, -1), (-2, -2)):

	1240 if reg:

	1241 start_reg = REGISTERS[reg][0]

	1242 start_reg8 = REGISTERS[r8[reg]][0]

	1243 end_reg = REGISTERS[reg][-1]

	1244 end_reg0 = 'dil' if reg == 'al' else end_reg

	1245 end_reg8 = REGISTERS[r8[reg]][last_reg]

	1246 reg_regex = '\\[(%' + start_reg + '\\.\\.%' + end_reg + ')]'

	1247 reg_regex0 = '\\[(%' + start_reg + '\\.\\.%' + end_reg0 + ')]'

	1248 elif last_reg == -2:

	1249 continue

	1250 if rm:

	1251 start_rm = REGISTERS[rm][0]

	1252 start_rm8 = REGISTERS[r8[rm]][0]

	1253 end_rm = REGISTERS[rm][-1]

	1254 end_rm0 = 'dil' if rm == 'al' else end_rm

	1255 end_rm8 = REGISTERS[r8[rm]][last_rm]

	1256 rm_regex = ('\\[(%' + start_rm + '\\.\\.%' + end_rm + ')'

	1257 '(?: or memory)?]')

	1258 rm_regex0 = ('\\[(%' + start_rm + '\\.\\.%' + end_rm0 + ')'

	1259 '(?: or memory)?]')

	1260 elif last_rm == -2:

	1261 continue

	1262 for rexw in (True, False):

	1263 for input_rr in (True, False):

	1264 for output_rr in (True, False) if reg or rm else (None, ):

	1265 for rm_to_reg in (True, False) if reg and rm else (None, ):

	1266 # Legacy prefixes

	1267 regex = '.:(?: 26\| 2e\| 36\| 3e\| 64\| 65\| 66\| 67\| f0\| f2\| f3)'

	1268 # REX

	1269 regex += '( 48).' if rexw else '( 40\|).'

	1270 # Replacement text

	1271 replacement_tuple = (

	1272 ' [REX:48..4f]' if rexw else ' [REX:40..47]?', )

	1273 if reg:

	1274 replacement_regs = '%{}..%{}'.format(start_reg, end_reg8)

	1275 if rm:

	1276 replacement_rms = '%{}..%{}'.format(start_rm, end_rm8)

	1277 # Instruction arguments

	1278 if not reg and not rm:

	1279 pass

	1280 elif not reg and rm:

	1281 if rexw:

	1282 regex += rm_regex0 + '.*'

	1283 else:

	1284 regex += rm_regex + '.*'

	1285 replacement_tuple += (replacement_rms, )

	1286 elif reg and not rm:

	1287 if rexw:

	1288 regex += reg_regex0 + '.*'

	1289 else:

	1290 regex += reg_regex + '.*'

	1291 replacement_tuple += (replacement_regs, )

	1292 elif rm_to_reg:

	1293 if rexw:

	1294 regex += rm_regex0 + ',' + reg_regex0 + '.*'

	1295 else:

	1296 regex += rm_regex + ',' + reg_regex + '.*'

	1297 replacement_tuple += (replacement_rms, replacement_regs)

	1298 else:

	1299 if rexw:

	1300 regex += reg_regex0 + ',' + rm_regex0 + '.*'

	1301 else:

	1302 regex += reg_regex + ',' + rm_regex + '.*'

	1303 replacement_tuple += (replacement_regs, replacement_rms)

	1304 # Input and output restricted registers

	1305 if input_rr:

	1306 regex += 'input_rr=\\[(%eax\\.\\.%edi)].*'

	1307 replacement_tuple += ('%eax..%r15d', )

	1308 if output_rr:

	1309 regex += 'output_rr=\\[(%eax\\.\\.%edi)].*'

	1310 replacement_tuple += ('%eax..%r14d', )

	1311 regex += '()'

	1312 replacement_tuple += ('', )

	1313 # Replacement cases

	1314 replacement_tuples = ()

	1315 for byte in (range(0x48, 0x50)

	1316 if rexw

	1317 else range(0x40, 0x48) + ['']):

	1318 replacement_case = (

	1319 ' {:02x}'.format(byte) if byte else byte, )

	1320 if byte:

	1321 if rm:

	1322 if byte & 0x1:

	1323 replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8)

	1324 else:

	1325 replacement_rms = '%{}..%{}'.format(start_rm, end_rm0)

	1326 if byte & 0x2:

	1327 replacement_index = '%r8d..%r15d'

	1328 else:

	1329 replacement_index = '%eax..%edi'

	1330 if reg:

	1331 if byte & 0x4:

	1332 replacement_regs = '%{}..%{}'.format(start_reg8,

	1333 end_reg8)

	1334 else:

	1335 replacement_regs = '%{}..%{}'.format(start_reg,

	1336 end_reg0)

	1337 else:

	1338 if rm:

	1339 replacement_rms = '%{}..%{}'.format(start_rm, end_rm)

	1340 replacement_index = '%eax..%edi'

	1341 if reg:

	1342 replacement_regs = '%{}..%{}'.format(start_reg, end_reg)

	1343 if not reg and not rm:

	1344 pass

	1345 elif not reg and rm:

	1346 replacement_case += (replacement_rms, )

	1347 if byte:

	1348 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1349 else:

	1350 final_rr = '%eax..%edi'

	1351 elif reg and not rm:

	1352 replacement_case += (replacement_regs, )

	1353 if byte:

	1354 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1355 else:

	1356 final_rr = '%eax..%edi'

	1357 elif rm_to_reg:

	1358 replacement_case += (replacement_rms, replacement_regs)

	1359 if byte:

	1360 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1361 else:

	1362 final_rr = '%eax..%edi'

	1363 else:

	1364 replacement_case += (replacement_regs, replacement_rms)

	1365 if byte:

	1366 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1367 else:

	1368 final_rr = '%eax..%edi'

	1369 if input_rr: replacement_case += (replacement_index, )

	1370 if output_rr: replacement_case += (final_rr, )

	1371 replacement_tuples += (replacement_case, )

	1372 compressors.append(Compressor(

	1373 regex, replacement_tuple, replacement_tuples))

	1374 # This is pretty simple compressor to combine two lines with different REX.W

	1375 # bits (only if they are otherwise identical).

	1376 compressors.append(Compressor(

	1377 '.(\\[REX:40\\.\\.47]\\?).()', ('[REX:40..4f]?', ''),

	1378 (('[REX:40..47]?', ), ('[REX:48..4f]', ))))

	1379

	1380

	1381 def ShowProgress(rule, instruction):

	1382 if rule not in ShowProgress.rules_shown:

	1383 first_print = True

	1384 ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown)

	1385 else:

	1386 first_print = False

	1387 print >> sys.stderr, '-------- Compressed --------'

	1388 print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule]

	1389 print >> sys.stderr, '--------'

	1390 compressor = compressors[rule]

	1391 match = compressor.regex.match(instruction)

	1392 assert match

	1393 format_str = CompressionTemplate(instruction, match, '{{{}}}')

	1394 replacements = sorted(format_str.format(*replacement)

	1395 for replacement in compressor.replacements)

	1396 if len(compressor.replacements) <= 4 or first_print:

	1397 for replacement in replacements:

	1398 print >> sys.stderr, replacement

	1399 else:

	1400 print >> sys.stderr, replacements[0]

	1401 print >> sys.stderr, '...'

	1402 print >> sys.stderr, replacements[-1]

	1403 print >> sys.stderr, '--------'

	1404 print >> sys.stderr, 'Compressed', (

	1405 format_str + '{{{}}}').format(*compressor.subst)

	1406 ShowProgress.rules_shown = {}

	1407

	1408

	1409 def main():

	1410 # We are keeping these global to share state graph and compressors

	1411 # between workers spawned by multiprocess. Passing them every time is slow.

	1412 global options, xml_file

	1413 global dfa

	1414 global worker_validator

	1415 options, xml_file = ParseOptions()

	1416 dfa = dfa_parser.ParseXml(xml_file)

	1417 worker_validator = validator.Validator(

	1418 validator_dll=options.validator_dll,

	1419 decoder_dll=options.decoder_dll)

	1420 PrepareCompressors()

	1421

	1422 assert dfa.initial_state.is_accepting

	1423 assert not dfa.initial_state.any_byte

	1424

	1425 print >> sys.stderr, len(dfa.states), 'states'

	1426

	1427 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)

	1428

	1429 # We can't just write 'num_suffixes[dfa.initial_state]' because

	1430 # initial state is accepting.

	1431 total_instructions = sum(

	1432 num_suffixes[t.to_state]

	1433 for t in dfa.initial_state.forward_transitions.values())

	1434 print >> sys.stderr, total_instructions, 'regular instructions total'

	1435

	1436 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)

	1437 print >> sys.stderr, len(tasks), 'tasks'

	1438

	1439 pool = multiprocessing.Pool()

	1440

	1441 results = pool.imap(Worker, tasks)

	1442

	1443 total = 0

	1444 num_valid = 0

	1445 full_output = set()

	1446 for prefix, count, valid_count, output, trace in results:

	1447 print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))

	1448 total += count

	1449 num_valid += valid_count

	1450 full_output \|= output

	1451 for rule, instruction in trace:

	1452 ShowProgress(rule, instruction)

	1453 for instruction in sorted(Compressed(full_output,

	1454 compressors,

	1455 ShowProgress)):

	1456 print instruction

	1457

	1458 print >> sys.stderr, total, 'instructions were processed'

	1459 print >> sys.stderr, num_valid, 'valid instructions'

	1460

	1461

	1462 if __name__ == '__main__':

	1463 main()

OLD	NEW

« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »