src/trusted/validator_ragel/compress_regular_instructions.py - Issue 49183002: Regular instructions golden file test.

Side by Side Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """

	6 Traverse the validator's DFA, collect all "normal" instruction and then

	7 compress output. Note: "anybyte fields" (immediates and displacements)

	8 are always filled with zeros. Otherwise processing of sextillions (sic!)

	9 of possibilities will take too long.

	10

	11 Each rule is applied only when all variants are accepted by validator.

	12 The following compression rules are present:

	13

	14 1. Compress ModR/M (+SIB & displacement).

	15 Instruction: 00 00 add %al,(%rax)

	16 ...

	17 Instruction: 00 ff add %bh,%bh

	18 becomes

	19 Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]

	20

	21 1a. Compress ModR/M (+SIB & displacement) memory-only.

	22 Instruction: f0 01 00 lock add %eax,(%eax)

	23 ...

	24 Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)

	25 becomes

	26 Instruction: f0 01 XX lock add [%eax..edi],[memory]

	27

	28 1b. Compress ModR/M register only.

	29 Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax

	30 ...

	31 Instruction: 66 0f 50 ff movmskpd %xmm7,%edi

	32 becomes

	33 Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]

	34

	35 2. Compress ModR/M (+SIB & displacement) with opcode extension.

	36 Instruction: 0f 90 00 seto (%eax)

	37 ...

	38 Instruction: 0f 90 c7 seto %bh

	39 becomes

	40 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	41

	42 2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.

	43 Instruction: f0 ff 00 lock incl (%eax)

	44 ...

	45 Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)

	46 becomes

	47 Instruction: f0 ff XX/1 lock decl [memory]

	48

	49 2b. Compress ModR/M register-only with opcode extension.

	50 Instruction: 0f 71 d0 00 psrlw $0x0,%mm0

	51 ...

	52 Instruction: 0f 71 d7 00 psrlw $0x0,%mm7

	53 becomes

	54 Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7]

	55

	56 3. Compress register-in-opcode.

	57 Instruction: d9 c0 fld %st(0)

	58 ...

	59 Instruction: d9 c7 fld %st(7)

	60 becomes

	61 Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]

	62

	63 Only applies if all possible register accesses are accepted by validator.

	64

	65 4. Special compressor for "set" instruction.

	66 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	67 ...

	68 Instruction: 0f 90 XX/7 seto [%al..%bh or memory]

	69 becomes

	70 Instruction: 0f 90 XX seto [%al..%bh or memory]

	71 """

	72

	73 import itertools

	74 import multiprocessing

	75 import optparse

	76 import os

	77 import re

	78 import subprocess

	79 import sys

	80 import tempfile

	81 import traceback

	82

	83 import dfa_parser

	84 import dfa_traversal

	85 import validator

	86

	87

	88 # Register names in 'natual' order (as defined by IA32/x86-64 ABI)

	89 #

	90 # X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field

	91 # in opcode, ModR/M, and/or SIB bytes to encode them.

	92 #

	93 # In most cases there are 16 registers of a given kind and two such groups,

	94 # but there are couple of exceptions:

	95 # 1. There are 20 8-bit registers and three groups (two of them overlap)

	96 # 2. There are eight X87 and MMX registers thus two groups are identical

	97 #

	98 # We use typical register from a group to name the whole group. Most groups

	99 # use first register, but 'spl' group uses fifth register because it's first

	100 # four registers are the same as 'al' group. We use mnemonic name 'mmalt'

	101 # to represent the "evil mirror" of the 'mm0' group.

	102 REGISTERS = {

	103 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],

	104 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],

	105 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],

	106 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],

	107 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],

	108 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],

	109 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],

	110 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],

	111 'r8': [ 'r{}'.format(N) for N in range(8,16) ],

	112 'mm0': [ 'mm{}'.format(N) for N in range(8) ],

	113 'mmalt': [ 'mm{}'.format(N) for N in range(8) ],

	114 'st(0)': [ 'st({})'.format(N) for N in range(8) ],

	115 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],

	116 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],

	117 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],

	118 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]

	119 }

	120

	121

	122 NOP = 0x90

	123

	124

	125 def PadToBundleSize(bytes):

	126 assert len(bytes) <= validator.BUNDLE_SIZE

	127 return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))

	128

	129

	130 # In x86-64 mode we have so-called 'restricted register' which is used to

	131 # tie two groups together. Some instructions require particular value to

	132 # be stored in this variable, while some accept any non-special restricted

	133 # register (%ebp and %esp are special because they can only be accepted by

	134 # a few 'special' instructions).

	135 #

	136 # You can find more details in the "NaCl SFI model on x86-64 systems" manual.

	137 #

	138 # We try to feed all possible 'restricted registers' into validator and then

	139 # classify the instruction using this map. If set of acceptable 'restricted

	140 # registers' is not here, then it's an error in validator.

	141 ACCEPTABLE_X86_64_INPUTS = {

	142 0x00001: 'input_rr=%eax',

	143 0x00002: 'input_rr=%ecx',

	144 0x00004: 'input_rr=%edx',

	145 0x00008: 'input_rr=%ebx',

	146 0x00010: 'input_rr=%esp',

	147 0x00020: 'input_rr=%ebp',

	148 0x00040: 'input_rr=%esi',

	149 0x00080: 'input_rr=%edi',

	150 0x00100: 'input_rr=%r8d',

	151 0x00200: 'input_rr=%r9d',

	152 0x00400: 'input_rr=%r10d',

	153 0x00800: 'input_rr=%r11d',

	154 0x01000: 'input_rr=%r12d',

	155 0x02000: 'input_rr=%r13d',

	156 0x04000: 'input_rr=%r14d',

	157 0x08000: 'input_rr=%r15d',

	158 0x1ffcf: 'input_rr=any_nonspecial'

	159 }

	160

	161 # Any instruction must produce either None or one of fifteen registers as an

	162 # output 'restricted register' value. 'r15d' is NOT acceptable as an output.

	163 ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple(

	164 '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1])

	165

	166

	167 def ValidateInstruction(instruction, validator_inst):

	168 bundle = ''.join(map(chr, PadToBundleSize(instruction)))

	169 if options.bitness == 32:

	170 result = validator_inst.ValidateChunk(bundle, bitness=32)

	171 return result, []

	172 else:

	173 valid_inputs = 0

	174 known_final_rr = None

	175 output_rr = None

	176 # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array

	177 # above.

	178 for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]):

	179 valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(

	180 bundle, len(instruction), initial_rr)

	181 if valid:

	182 # final_rr should not depend on input_rr

	183 assert valid_inputs == 0 or known_final_rr == final_rr

	184 valid_inputs \|= 1 << bit

	185 known_final_rr = final_rr

	186 # If nothing is accepted then instruction is not valid. Easy and simple.

	187 if valid_inputs == 0: return False, []

	188 # If returned value in unacceptable we'll get IndexError here and this

	189 # test will fail

	190 if known_final_rr is not None:

	191 output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr]

	192 # If collected valid_inputs are unacceptable we'll get KeyError here and

	193 # this test will fail

	194 return True, [ACCEPTABLE_X86_64_INPUTS[valid_inputs],

	195 'output_rr={}'.format(output_rr)]

	196

	197

	198 class WorkerState(object):

	199 def __init__(self, prefix, validator):

	200 self.total_instructions = 0

	201 self.num_valid = 0

	202 self.validator = validator

	203 self.output = set()

	204 self.trace = []

	205

	206

	207 def ReceiveInstruction(self, bytes):

	208 self.total_instructions += 1

	209 result, notes = ValidateInstruction(bytes, self.validator)

	210 if result:

	211 self.num_valid += 1

	212 dis = self.validator.DisassembleChunk(

	213 ''.join(map(chr, bytes)),

	214 bitness=options.bitness)

	215 for line_nr in xrange(len(dis)):

	216 dis[line_nr] = str(dis[line_nr])

	217 assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '

	218 assert dis[line_nr][-1:] == ')'

	219 dis[line_nr] = dis[line_nr][17:-1]

	220 # If %rip is involved then comment will be different depending on the

	221 # instruction length. Eliminate it.

	222 if '(%rip)' in dis[0]:

	223 dis[0] = re.sub(' # 0x[ ][0-9a-fA-F]', '', dis[0])

	224 # Zero displacements are represented as 0x0 for all instructions except

	225 # jumps where they disassembled as non-zero due to %eip/%rip-relative

	226 # addressing. We replace this displacement with %eip/%rip to simplify

	227 # compression.

	228 if ' 0x' in dis[0] and ' 0x0' not in dis[0]:

	229 for bytes in xrange(1, 16):

	230 dis[0] = re.sub(

	231 '(' + '(?:[0-9a-fA-F][0-9a-fA-F] ){' + str(bytes) + '} .* )' +

	232 hex(bytes) + '(.*)',

	233 '\\1%eip\\2' if options.bitness == 32 else '\\1%rip\\2',

	234 dis[0]);

	235 dis[0] = 'Instruction: ' + dis[0]

	236 dis += notes

	237 self.output.add('; '.join(dis))

	238

	239

	240 def RecordTrace(self, compressor_nr, instruction):

	241 self.trace.append((compressor_nr, instruction))

	242

	243

	244 # Compressor has three slots: regex (which picks apart given instruction),

	245 # subst (which is used to denote compressed version) and replacements (which

	246 # are used to generate set of instructions from a given code).

	247 #

	248 # Example compressor:

	249 # regex = '.?[0-9a-fA-F]([0-7]) \\w (%e(?:[abcd]x\|[sb]p\|[sd]i)).*()'

	250 # subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode')

	251 # replacements = ((0, '%eax'), (1, '%ecx'), (2, '%edx'), (3, '%ebx')

	252 # (4, '%esp'), (5, '%ebp'), (6, '%esi'), (7, '%edi'))

	253 #

	254 # When faced with instriuction '40 inc %eax' it will capture the following

	255 # pieces of said instruction: '4[0] inc [%eax]'.

	256 #

	257 # Then it will produce the following eight instructions:

	258 # '40 inc %eax'

	259 # '41 inc %ecx'

	260 # '42 inc %edx'

	261 # '43 inc %ebx'

	262 # '44 inc %esp'

	263 # '45 inc %ebp'

	264 # '46 inc %esi'

	265 # '47 inc %edi'

	266 #

	267 # If all these instructions can be found in a set of instructions then

	268 # compressor will remove them from said set and will insert one replacement

	269 # "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'.

	270 #

	271 # Note that last group is only used in the replacement. It's used to grab marks

	272 # added by previous compressors and to replace them with a new mark.

	273 class Compressor(object):

	274 __slots__ = [

	275 'regex',

	276 'subst',

	277 'replacements'

	278 ]

	279

	280 def __init__(self, regex, subst, replacements=None):

	281 self.regex = re.compile(regex)

	282 self.subst = subst

	283 self.replacements = [] if replacements is None else replacements

	284

	285

	286 def CompressionTemplate(instruction, match, mark):

	287 """ Replace all match groups with the mark. """

	288 pos = 0

	289 format_str = ''

	290 for group in range(1, len(match.groups())):

	291 format_str += instruction[pos:match.start(group)] + mark

	292 pos = match.end(group)

	293 return format_str + instruction[pos:match.start(len(match.groups()))]

	294

	295

	296 def CompressOneMatch(instructions, instruction, match, compressor):

	297 format_str = CompressionTemplate(instruction, match, '{}')

	298 subset = set()

	299 for replacement in compressor.replacements:

	300 replacement_str = format_str.format(*replacement)

	301 if not replacement_str in instructions:

	302 return (False, instructions)

	303 subset.add(replacement_str)

	304 instructions -= subset

	305 instructions.add((format_str + '{}').format(*compressor.subst))

	306 return (True, instructions)

	307

	308

	309 def CompressOneInstruction(instructions, compressors, split, cache):

	310 sorted_instructions = (sorted(i for i in instructions if i > split) +

	311 sorted(i for i in instructions if i < split))

	312 for instruction in sorted_instructions:

	313 if instruction in cache:

	314 compressors_list = cache[instruction]

	315 for compressor_nr, match, compressor in compressors_list:

	316 result, instructions = CompressOneMatch(

	317 instructions, instruction, match, compressor)

	318 if result:

	319 return (instructions, compressor_nr, instruction)

	320 else:

	321 compressors_list = []

	322 for compressor_nr, compressor in enumerate(compressors):

	323 match = compressor.regex.match(instruction)

	324 if match:

	325 compressors_list.append((compressor_nr, match, compressor))

	326 result, instructions = CompressOneMatch(

	327 instructions, instruction, match, compressor)

	328 if result:

	329 return (instructions, compressor_nr, instruction)

	330 cache[instruction] = compressors_list

	331 return (instructions, False, False)

	332

	333

	334 def Compressed(instructions, compressors, show_progress):

	335 split = ''

	336 cache = {}

	337 while True:

	338 instructions, rule, split = CompressOneInstruction(

	339 instructions, compressors, split, cache)

	340 if rule is False: break

	341 show_progress(rule, split)

	342 return instructions

	343

	344

	345 def Worker((prefix, state_index)):

	346 worker_state = WorkerState(prefix, worker_validator)

	347

	348 try:

	349 dfa_traversal.TraverseTree(

	350 dfa.states[state_index],

	351 final_callback=worker_state.ReceiveInstruction,

	352 prefix=prefix,

	353 anyfield=0)

	354 if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions

	355 worker_state.output = Compressed(set(worker_state.output),

	356 compressors,

	357 worker_state.RecordTrace)

	358 except Exception as e:

	359 traceback.print_exc() # because multiprocessing imap swallows traceback

	360 raise

	361

	362 return (

	363 prefix,

	364 worker_state.total_instructions,

	365 worker_state.num_valid,

	366 worker_state.output,

	367 worker_state.trace)

	368

	369

	370 def ParseOptions():

	371 parser = optparse.OptionParser(usage='%prog [options] xmlfile')

	372

	373 parser.add_option('--bitness',

	374 choices=['32', '64'],

	375 help='The subarchitecture: 32 or 64')

	376 parser.add_option('--validator_dll',

	377 help='Path to librdfa_validator_dll')

	378 parser.add_option('--decoder_dll',

	379 help='Path to librdfa_decoder_dll')

	380

	381 options, args = parser.parse_args()

	382 options.bitness = int(options.bitness)

	383

	384 if len(args) != 1:

	385 parser.error('specify one xml file')

	386

	387 (xml_file, ) = args

	388

	389 return options, xml_file

	390

	391

	392 # Version suitable for use in regular expressions

	393 REGISTERS_RE = REGISTERS.copy()

	394 REGISTERS_RE['st(0)'] = [ 'st\${}\$'.format(N) for N in range(8) ]

	395 REGISTERS_RE['st\$0\$'] = REGISTERS_RE['st(0)']

	396

	397 # Index names in 'natual' order (as defined by IA32/x86-64 ABI)

	398 INDEXES = {

	399 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],

	400 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],

	401 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]

	402 }

	403 # Register which can not be used as base in 64-bit mode in all incarnations

	404 X86_64_BASE_REGISTERS = set([

	405 '%spl', '%bpl', '%r15b',

	406 '%sp', '%bp', '%r15w',

	407 '%esp', '%ebp', '%r15d',

	408 '%rsp', '%rbp', '%r15',

	409 '%rip'

	410 ])

	411

	412

	413 def ModRMRegisterReplacements(rm, reg=None, writes_to='rm', opcode_bits=0,

	414 memory_accessed=True, register_write='ignore'):

	415 """Creates replacement tuples list for register-to-register instructions

	416

	417 Args:

	418 rm: rm operand kind (see REGISTERS array)

	419 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	420 writes_to: three-state selector

	421 'reg' - instruction uses rm as source, reg as destination

	422 'rm' - instruction uses reg as source, rm as destination

	423 'both' - instruction writes to both reg and rm

	424 opcode_bits: opcode extensions code (used when reg is None)

	425 memory_accessed: True if instruction accesses memory
	halyavin 2013/11/12 11:44:06 Not used. Default to True makes it even more misle Not used. Default to True makes it even more misleading. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Not used. Default to True makes it even more misleading. Done.
	426 register_write: three-state selector

	427 'sandbox' - instruction can be used to produce "restricted register"

	428 'protect' - instruction can damage output, protect "special registers"

	429 'ignore' - instruction does not affect it's operands (e.g. test) or

	430 is used with non-GP registers (X87, MMX, XMM, etc)

	431

	432 Returns:

	433 List of replacement tuples

	434 """

	435 output_key = (options.bitness, reg, rm, writes_to, opcode_bits,

	436 memory_accessed, register_write)

	437 if output_key in ModRMRegisterReplacements.replacements:

	438 return ModRMRegisterReplacements.replacements[output_key]

	439

	440 replacements = []

	441
	halyavin 2013/11/12 11:44:06 # Two upper bits of ModR/M byte (mod field) must b # Two upper bits of ModR/M byte (mod field) must be equal to 11. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > # Two upper bits of ModR/M byte (mod field) must be equal to 11. Done.
	442 if reg is None:

	443 # reg field is used as opcode extension

	444 byte_range = [byte

	445 for byte in range(0xff, 0xbf, -1)

	446 if byte & 0x38 == opcode_bits << 3]
	halyavin 2013/11/12 11:44:06 if (byte >> 3) & 0x7 == opcode_bits if (byte >> 3) & 0x7 == opcode_bits khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > if (byte >> 3) & 0x7 == opcode_bits Done.
	447 else:

	448 byte_range = range(0xff, 0xbf, -1)

	449

	450 for modrm in byte_range:

	451 # Parse ModRM

	452 mod_field = (modrm & 0xc0) >> 6
	halyavin 2013/11/12 11:44:06 (modrm >> 6) & 0x3 (modrm >> 6) & 0x3 khim 2013/11/13 00:32:00 I don't really like it: reight now it's easy to se Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > (modrm >> 6) & 0x3 I don't really like it: reight now it's easy to see that we've grabbed all the bits from ModR/M because 0xc0 \| 0x38 \| 0x7 == 0xff. But whatever, I'll not fight for that. Done.
	453 reg_field = (modrm & 0x38) >> 3
	halyavin 2013/11/12 11:44:06 (modrm >> 3) & 0x7 (modrm >> 3) & 0x7 khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > (modrm >> 3) & 0x7 Done.
	454 rm_field = (modrm & 0x07)

	455 if reg is not None:

	456 reg_text = '%' + REGISTERS[reg][reg_field]

	457 bytes = '{:02x}'.format(modrm)
	halyavin 2013/11/12 11:44:06 bytes->byte_text bytes->byte_text khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > bytes->byte_text Done.
	458 rm_text = '%' + REGISTERS[rm][rm_field]

	459 replacement = [bytes]
	halyavin 2013/11/12 11:44:06 It is not clear to me which variables can be equal It is not clear to me which variables can be equal to None in code below. I think some asserts will help to clear this up. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > It is not clear to me which variables can be equal to None in code below. I > think some asserts will help to clear this up. Done.
	460 if reg is None:

	461 replacement.append(rm_text)

	462 else:

	463 replacement.append(rm_text if writes_to == 'reg' else reg_text)
	halyavin 2013/11/12 11:44:06 Can any on these 2 fields equal to None? Can any on these 2 fields equal to None? khim 2013/11/13 00:32:00 Hmm... Reg can be None - that's why we are checkin Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Can any on these 2 fields equal to None? Hmm... Reg can be None - that's why we are checking for that case two lines above! Rm can not be none (there are actually couple of such instructions but this will just complicate logic immensely for little benefit thus we don't handle this case here).
	464 replacement.append(reg_text if writes_to == 'reg' else rm_text)

	465 if options.bitness == 64:

	466 replacement.append('any_nonspecial')

	467 output = reg_text if writes_to == 'reg' else rm_text
	halyavin 2013/11/12 11:44:06 Can output equal to None? Can output equal to None? khim 2013/11/13 00:32:00 Sure. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Can output equal to None? Sure.
	468 replacement.append(output if register_write == 'sandbox' else None)
	halyavin 2013/11/12 11:44:06 Why we are pushing None here? I thought that repla Why we are pushing None here? I thought that replacement is an array of strings without None values since they will lead to an exception during replacement. khim 2013/11/13 00:32:00 Replacement uses .format(...) which means None bec Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Why we are pushing None here? I thought that replacement is an array of strings > without None values since they will lead to an exception during replacement. Replacement uses .format(...) which means None becomes 'None'
	469 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:
	halyavin 2013/11/12 11:44:06 # Do not generate replacements that will violate s # Do not generate replacements that will violate sandboxing rules. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > # Do not generate replacements that will violate sandboxing rules. Done.
	470 continue

	471 if register_write == 'sandbox' and output == '%r15d':

	472 continue

	473 if writes_to == 'both' and reg_text in X86_64_BASE_REGISTERS:
	halyavin 2013/11/12 11:44:06 Can reg_text equal to None here? Can reg_text equal to None here? khim 2013/11/13 00:32:00 Makes no sense, really but should be handled corre Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Can reg_text equal to None here? Makes no sense, really but should be handled correctly. Added assert to clarify the situation.
	474 continue

	475 replacement = tuple(replacement)

	476 replacements.append(replacement)
	halyavin 2013/11/12 11:44:06 We can join these 2 lines to avoid changing type o We can join these 2 lines to avoid changing type of replacement variable. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > We can join these 2 lines to avoid changing type of replacement variable. Done.
	477 ModRMRegisterReplacements.replacements[output_key] = tuple(replacements)

	478 return ModRMRegisterReplacements.replacements[output_key]

	479 ModRMRegisterReplacements.replacements = {}

	480

	481

	482 def ModRMMemoryReplacements(rm, reg=None, writes_to='rm', opcode_bits=0,

	483 memory_accessed=True, register_write='ignore',

	484 index_r8=False):

	485 """Creates replacement tuples list for register-to-register instructions
	halyavin 2013/11/12 11:44:06 Copy paste error: "register-to-register" Copy paste error: "register-to-register" khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Copy paste error: "register-to-register" Done.
	486

	487 Args:

	488 rm: rm operand kind (see REGISTERS array)

	489 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	490 writes_to: three-state selector

	491 'reg' - instruction uses rm as source, reg as destination

	492 'rm' - instruction uses reg as source, rm as destination

	493 'both' - instruction writes to both reg and rm

	494 opcode_bits: opcode extensions code (used when reg is None)

	495 memory_accessed: True if instruction accesses memory

	496 register_write: three-state selector

	497 'sandbox' - instruction can be used to produce "restricted register"

	498 'protect' - instruction can damage output, protect "special registers"

	499 'ignore' - instruction does not affect it's operands (e.g. test) or

	500 is used with non-GP registers (X87, MMX, XMM, etc)

	501 index_r8: True if REX.X bit in the instruction set to 1

	502

	503 Returns:

	504 List of replacement tuples

	505 """

	506 output_key = (options.bitness, reg, rm, writes_to, opcode_bits, index_r8,

	507 memory_accessed, register_write)

	508 if output_key in ModRMMemoryReplacements.replacements:

	509 return ModRMMemoryReplacements.replacements[output_key]

	510

	511 if options.bitness == 32:

	512 base = 'eax'

	513 index = 'eax'

	514 else:

	515 base = 'r8' if '8' in rm or rm == 'mmalt' else 'rax'
	halyavin 2013/11/12 11:44:06 We wanted to replace it with the access to diction We wanted to replace it with the access to dictionary. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > We wanted to replace it with the access to dictionary. Done.
	516 index = 'r8' if index_r8 else 'rax'

	517 input = 'r8d' if index_r8 else 'eax'

	518

	519 replacements = []

	520
	halyavin 2013/11/12 11:44:06 # Two upper bits in ModR/M byte (mod field) must b # Two upper bits in ModR/M byte (mod field) must be equal to 00, 01 or 10. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > # Two upper bits in ModR/M byte (mod field) must be equal to 00, 01 or 10. Done.
	521 if reg is None:

	522 # reg field is used as opcode extension

	523 byte_range = [byte

	524 for byte in range(0xbf, -1, -1)

	525 if byte & 0x38 == opcode_bits << 3]

	526 else:

	527 byte_range = range(0xbf, -1, -1)

	528

	529 for modrm in byte_range:

	530 # Parse ModRM

	531 mod_field = (modrm & 0xc0) >> 6

	532 reg_field = (modrm & 0x38) >> 3

	533 rm_field = (modrm & 0x07)

	534 if reg is not None:

	535 reg_text = '%' + REGISTERS[reg][reg_field]

	536 # If RM field != %rsp then there are no index

	537 if rm_field != validator.REG_RSP:

	538 base_text = '%' + REGISTERS[base][rm_field]

	539 # If RM field == %rbp and MOD fiels is zero then it's absolute address
	halyavin 2013/11/12 11:44:06 fiels->field fiels->field khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > fiels->field Done.
	540 if mod_field == 0 and rm_field == validator.REG_RBP:

	541 bytes = '{:02x} 00 00 00 00'.format(modrm)

	542 rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'

	543 base_text = '%rip'

	544 # Memory access with just a base register

	545 elif mod_field == 0:

	546 bytes = '{:02x}'.format(modrm)

	547 rm_text = '({})'.format(base_text)

	548 # Memory access with base and 8bit offset

	549 elif mod_field == 1:

	550 bytes = '{:02x} 00'.format(modrm)

	551 rm_text = '0x0({})'.format(base_text)

	552 # Memory access with base and 32bit offset

	553 else: # mod_field == 2

	554 bytes = '{:02x} 00 00 00 00'.format(modrm)

	555 rm_text = '0x0({})'.format(base_text)

	556 replacement = [bytes]

	557 if reg is None:

	558 replacement.append(rm_text)

	559 else:

	560 replacement.append(rm_text if writes_to == 'reg' else reg_text)

	561 replacement.append(reg_text if writes_to == 'reg' else rm_text)

	562 if options.bitness == 64:

	563 replacement.append('any_nonspecial')

	564 output = reg_text if writes_to == 'reg' else None

	565 replacement.append(output if register_write == 'sandbox' else None)

	566 if memory_accessed and base_text not in X86_64_BASE_REGISTERS:

	567 continue

	568 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	569 continue

	570 if register_write == 'sandbox' and output == '%r15d':

	571 continue

	572 if writes_to == 'both' and reg_text in X86_64_BASE_REGISTERS:

	573 continue

	574 replacement = tuple(replacement)

	575 replacements.append(replacement)

	576 else:

	577 # If RM field == %rsp then we have SIB byte

	578 for sib in xrange(0x100):

	579 scale_field = (sib & 0xc0) >> 6

	580 index_field = (sib & 0x38) >> 3

	581 base_field = (sib & 0x07)

	582 index_text = '%' + INDEXES[index][index_field]

	583 base_text = '%' + REGISTERS[base][base_field]

	584 scale_text = pow(2, scale_field)
	halyavin 2013/11/12 11:44:06 Use shift instead and apply str(). Use shift instead and apply str(). khim 2013/11/13 00:32:00 str() is not really needed, but to make sure "..._ Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Use shift instead and apply str(). str() is not really needed, but to make sure "..._text" actually text... It's not time-critical part of the code thus probably Ok
	585 # If BASE is %rbp and MOD == 0 then index with 32bit offset is used

	586 if mod_field == 0 and base_field == validator.REG_RBP:

	587 bytes = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	588 if (options.bitness == 32 or
	halyavin 2013/11/12 11:44:06 Reverting this if will reduce amount of negative l Reverting this if will reduce amount of negative logic here. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Reverting this if will reduce amount of negative logic here. Done.
	589 index_field != validator.REG_RSP or

	590 scale_field != 0 or index[0:2] == 'r8'):
	halyavin 2013/11/12 11:44:06 Can index be equal to r8d/r8w/r8b here? Can index be equal to r8d/r8w/r8b here? khim 2013/11/13 00:32:00 Apparently not. We don't support this form of lea Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Can index be equal to r8d/r8w/r8b here? Apparently not. We don't support this form of lea and all other instructions are forbidden because of security rules.
	591 rm_text = '0x0(,{},{})'.format(index_text, scale_text)

	592 else:

	593 rm_text = '0x0'

	594 base_text = ''

	595 # Memory access with base and index (no offset)

	596 elif mod_field == 0:

	597 bytes = '{:02x} {:02x}'.format(modrm, sib)

	598 rm_text = '({},{},{})'.format(base_text, index_text, scale_text)

	599 # Memory access with base, index and 8bit offset

	600 elif mod_field == 1:

	601 bytes = '{:02x} {:02x} 00'.format(modrm, sib)

	602 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	603 # Memory access with base, index and 32bit offset

	604 elif mod_field == 2:

	605 bytes = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	606 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	607 # Pretty-printing of access via %rsp

	608 if (scale_field == 0 and index != 'r8' and

	609 base_field == validator.REG_RSP and

	610 index_field == validator.REG_RSP):

	611 #index_text = 'any_nonspecial'

	612 rm_text = ('0x0({})' if mod_field else '({})').format(base_text)

	613 if index_text == "%riz":
	halyavin 2013/11/12 11:44:06 Use singular quotes. Use singular quotes. khim 2013/11/13 00:32:00 Done. Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Use singular quotes. Done.
	614 index_text = 'any_nonspecial'
	halyavin 2013/11/12 11:44:06 Why we need this replacement? Why we need this replacement? khim 2013/11/13 00:32:00 Because i've copy-pasted check from another place Show quoted text On 2013/11/12 11:44:06, halyavin wrote: > Why we need this replacement? Because i've copy-pasted check from another place and it expected to see 'any_nonspecial' there. Fixed.
	615 replacement = [bytes]

	616 if reg is None:

	617 replacement.append(rm_text)

	618 else:

	619 replacement.append(rm_text if writes_to == 'reg' else reg_text)

	620 replacement.append(reg_text if writes_to == 'reg' else rm_text)

	621 if options.bitness == 64:

	622 if not memory_accessed or index_text == 'any_nonspecial':

	623 replacement.append('any_nonspecial')

	624 else:

	625 replacement.append('%' + REGISTERS[input][index_field])

	626 # Currently xchg can not used used for sandboxing

	627 output = reg_text if writes_to == 'reg' else None

	628 replacement.append(output if register_write == 'sandbox' else None)

	629 if memory_accessed:

	630 if base_text not in X86_64_BASE_REGISTERS: continue

	631 if index_text in X86_64_BASE_REGISTERS - set(['%r15']): continue

	632 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	633 continue

	634 if register_write == 'sandbox' and output == '%r15d':

	635 continue

	636 if (writes_to == 'both' and

	637 reg_text in X86_64_BASE_REGISTERS): continue

	638 replacement = tuple(replacement)

	639 replacements.append(replacement)

	640 ModRMMemoryReplacements.replacements[output_key] = tuple(replacements)

	641 return ModRMMemoryReplacements.replacements[output_key]

	642 ModRMMemoryReplacements.replacements = {}

	643

	644

	645 def PrepareCompressors():

	646 global compressors

	647 global main_compressors

	648 global register_compressors

	649 global memory_compressors

	650

	651 # "Larger" compressors should be tried first, then "smaller" ones.

	652 main_compressors = []

	653 register_compressors = []

	654 memory_compressors = []

	655 extra_compressors = []

	656

	657 if options.bitness == 32:

	658 register_kinds = ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')

	659 register_kind_pairs = (

	660 ( 'al', 'al'),

	661 ( 'ax', 'al'),

	662 ( 'ax', 'ax'),

	663 ( 'eax', 'al'),

	664 ( 'eax', 'ax'),

	665 ( 'eax', 'eax'),

	666 ( 'eax', 'mm0'),

	667 ( 'mm0', 'eax'),

	668 ( 'eax', 'xmm0'),

	669 ('xmm0', 'eax'),

	670 ( 'mm0', 'mm0'),

	671 ( 'mm0', 'xmm0'),

	672 ('xmm0', 'mm0'),

	673 ('xmm0', 'xmm0'),

	674 ('xmm0', 'ymm0'),

	675 ('ymm0', 'xmm0'),

	676 ('ymm0', 'ymm0')

	677 )

	678 else:

	679 register_kinds = ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0',

	680 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8')

	681 register_kind_pairs = (

	682 ( 'al', 'al'),

	683 ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),

	684 ( 'ax', 'al'),

	685 ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),

	686 ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),

	687 ( 'eax', 'al'),

	688 ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),

	689 ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),

	690 ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),

	691 ( 'rax', 'al'),

	692 ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),

	693 ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'),

	694 ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'),

	695 ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),

	696 ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'),

	697 ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'),

	698 ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'),

	699 ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'),

	700 ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),

	701 ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),

	702 ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),

	703 ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),

	704 ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'),

	705 ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'),

	706 ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'),

	707 ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),

	708 ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),

	709 ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),

	710 ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')

	711 )

	712

	713 # Largest compressors: both reg and rm fields are used

	714 for reg, rm in register_kind_pairs:

	715 start_reg = REGISTERS[reg][0]

	716 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	717 start_rm = REGISTERS[rm][0]

	718 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

	719 instruction_kinds = [

	720 # Normal instructions with two operands (rm to reg)

	721 ({'writes_to':'reg'}, '', ' # rm to reg', ''),

	722 # Normal instructions with two operands (reg to rm)

	723 ({'writes_to':'rm'}, '', ' # reg to rm', '')

	724 ]

	725 # Lea in 64 bit mode is truly unique instruction for now

	726 if options.bitness == 64 and reg in ('eax', 'r8d', 'rax', 'r8'):

	727 instruction_kinds = [

	728 ({'writes_to':'reg', 'memory_accessed':False,

	729 'register_write':'sandbox' if reg in ('eax', 'r8d') else 'protect'},

	730 ' # lea', ' # rm to reg; lea', ' # lea')] + instruction_kinds

	731 # There are few more forms in 64 bit case (rm to reg)

	732 if options.bitness == 64 and reg in ('eax', 'r8d'):

	733 # Zero-extending version.

	734 instruction_kinds.append(

	735 ({'writes_to':'reg', 'register_write':'sandbox'},

	736 '', ' # rm to reg', ''))

	737 # More forms in 64 bit case (reg to rm)

	738 if options.bitness == 64 and rm in ('eax', 'r8d'):

	739 # Zero-extending version.

	740 instruction_kinds.append(

	741 ({'writes_to':'rm', 'register_write':'sandbox'},

	742 '', ' # reg to rm', ''))

	743 # Zero-extending xchg/xadd

	744 instruction_kinds.append(

	745 ({'writes_to':'both', 'register_write':'sandbox'},

	746 ' # write to both',

	747 ' # reg to rm; write to both',

	748 ' # write to both'))

	749 # Still more forms for 64 bit case (rm to reg).

	750 if options.bitness == 64 and reg in ('al', 'spl', 'ax', 'eax', 'rax',

	751 'r8b', 'r8w', 'r8d', 'r8'):

	752 # Dangerous instructions (rm to reg)

	753 instruction_kinds.append(

	754 ({'writes_to':'reg', 'register_write':'protect'},

	755 '', ' # rm to reg', ''))

	756 # Still more forms for 64 bit case (reg to rm)

	757 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

	758 'r8b', 'r8w', 'r8d', 'r8'):

	759 # Dangerous instructions (reg to rm)

	760 instruction_kinds.append(

	761 ({'writes_to':'rm', 'register_write':'protect'},

	762 '', ' # reg to rm', ''))

	763 # Dangerous xchg/xadd

	764 instruction_kinds.append(

	765 ({'writes_to':'both', 'register_write':'protect'},

	766 ' # write to both',

	767 ' # reg to rm; write to both',

	768 ' # write to both'))

	769 # 3DNow! instructions

	770 instruction_kinds.append(

	771 ({'writes_to':'reg', '3dnow':'yes'}, '', ' # rm to reg', ''))

	772 if options.bitness == 64:

	773 instruction_kinds_index_r8 = []

	774 for instruction_kind in instruction_kinds:

	775 args = instruction_kind[0].copy()

	776 args['index_r8'] = True

	777 instruction_kinds_index_r8.append((args, ) + instruction_kind[1:])

	778 instruction_kinds.extend(instruction_kinds_index_r8)

	779 for args, notes, notes_register, notes_memory in instruction_kinds:

	780 regex = '(?: 00)*'

	781 # Additional byte is opcode extension with 3DNow! instructions.

	782 if '3dnow' in args:

	783 regex = ' [0-9a-fA-F][0-9a-fA-F]'

	784 args.pop('3dnow')

	785 regex += ' (?:lock )?\\w* (?:\\$0x0,\|\\$0x0,\\$0x0,\|%cl,\|%xmm0,)?'

	786 # We only need to process ModR/M+SIB '04 04' or '04 07' here

	787 if options.bitness == 32:

	788 regex_mem = '\$%esp,%eax,1\$'

	789 else:

	790 regex_mem = '\$(?:%rsp\|%r15),(?:%rax\|%r8),1\$'

	791 output = None

	792 output_note = None

	793 if args['writes_to'] == 'reg':

	794 regex += '(%' + REGISTERS[rm][0] + '\|' + regex_mem + ')'

	795 regex += ',(%' + REGISTERS[reg][0] + ')'

	796 if 'register_write' in args and args['register_write'] == 'sandbox':

	797 assert reg in ('eax', 'r8d')

	798 output = '%' + reg + '\|None'

	799 output_note = '[%eax..%edi]' if reg == 'eax' else '[%r8d..%r14d]'

	800 subst = (

	801 'XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),

	802 '[%{}..%{}]'.format(start_reg, end_reg), notes)

	803 subst_register = (

	804 'XX', '[%{}..%{}]'.format(start_rm, end_rm),

	805 '[%{}..%{}]'.format(start_reg, end_reg), notes_register)

	806 subst_memory = (

	807 'XX', '[memory]',

	808 '[%{}..%{}]'.format(start_reg, end_reg), notes_memory)

	809 else:

	810 regex += '(%' + REGISTERS[reg][0] + ')'

	811 regex += ',(%' + REGISTERS[rm][0] + '\|' + regex_mem + ')'

	812 if 'register_write'in args and args['register_write'] == 'sandbox':

	813 assert rm in ('eax', 'r8d')

	814 output = '%' + rm + '\|None'

	815 output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	816 subst = (

	817 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	818 '[%{}..%{} or memory]'.format(start_rm, end_rm), notes)

	819 subst_register = (

	820 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	821 '[%{}..%{}]'.format(start_rm, end_rm), notes_register)

	822 subst_memory = (

	823 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	824 '[memory]', notes_memory)

	825 regex += '.*'

	826 if options.bitness == 64:

	827 regex += '; input_rr=(%eax\|%r8d\|any_nonspecial)'

	828 regex += '; output_rr=({})'.format(output)

	829 if 'memory_accessed' in args:

	830 input_note = 'any_nonspecial'

	831 else:

	832 input_note = '[%r8d..%r15d]' if 'index_r8' in args else '[%eax..%edi]'

	833 subst = subst[0:-1] + (input_note, output_note) + subst[-1:]

	834 subst_register = subst_register[0:-1] + (

	835 'any_nonspecial', output_note) + subst_register[-1:]

	836 subst_memory = subst_memory[0:-1] + (

	837 input_note, output_note) + subst_memory[-1:]

	838 regex += '()'

	839 if not 'index_r8' in args:

	840 register_replacement = ModRMRegisterReplacements(rm=rm, reg=reg, **args)

	841 memory_replacement = ModRMMemoryReplacements(rm=rm, reg=reg, **args)

	842 main_replacement = register_replacement + memory_replacement

	843 main_compressors.append(Compressor(

	844 '.*?(04 0[47])' + regex, subst, main_replacement))

	845 memory_compressors.append(Compressor(

	846 '.*?(04 0[47])' + regex, subst_memory, memory_replacement))

	847 register_compressors.append(Compressor(

	848 '.*?(c0)' + regex, subst_register, register_replacement))

	849

	850 # Smaller compressors: only rm field is used.

	851 for rm in register_kinds:

	852 start_rm = REGISTERS[rm][0]

	853 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

	854 for opcode_bits in xrange(8):

	855 XX_byte_mark = 'XX/' + str(opcode_bits)

	856 instruction_kinds = [

	857 # The most basic form

	858 ({}, '', '', '')

	859 ]

	860 if options.bitness == 64:

	861 # No memory access (e.g. prefetch)

	862 instruction_kinds = [

	863 ({'memory_accessed':False}, '', '', '')] + instruction_kinds

	864 # More forms in 64 bit case.

	865 if options.bitness == 64 and rm in ('eax', 'r8d'):

	866 # Zero-extending version.

	867 instruction_kinds.append(

	868 ({'register_write':'sandbox'}, '', '', ''))

	869 # Still more forms for 64 bit case (reg to rm).

	870 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

	871 'r8b', 'r8w', 'r8d', 'r8'):

	872 # Dangerous instructions.

	873 instruction_kinds.append(

	874 ({'register_write':'protect'}, '', '', ''))

	875 if options.bitness == 64:

	876 instruction_kinds_index_r8 = []

	877 for instruction_kind in instruction_kinds:

	878 args = instruction_kind[0].copy()

	879 args['index_r8'] = True

	880 instruction_kinds_index_r8.append((args, ) + instruction_kind[1:])

	881 instruction_kinds.extend(instruction_kinds_index_r8)

	882 for args, notes, notes_register, notes_memory in instruction_kinds:

	883 subst = (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm),

	884 notes)

	885 subst_register = (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm),

	886 notes_register)

	887 subst_memory = (XX_byte_mark, '[memory]',

	888 notes_memory)

	889 regex = ('(?: 00)* (?:lock )?\\w* (?:\\$0x0,\|%cl,)?'

	890 '(%' + REGISTERS[rm][0] + '\|' + regex_mem + ').*')

	891 output = None

	892 output_note = None

	893 if options.bitness == 64:

	894 if 'register_write' in args and args['register_write'] == 'sandbox':

	895 assert rm in ('eax', 'r8d')

	896 output = '%' + rm + '\|None'

	897 output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	898 regex += '; input_rr=(%eax\|%r8d\|any_nonspecial)'

	899 regex += '; output_rr=({})'.format(output)

	900 if 'memory_accessed' in args:

	901 input_note = 'any_nonspecial'

	902 elif 'index_r8' in args:

	903 input_note = '[%r8d..%r15d]'

	904 else:

	905 input_note = '[%eax..%edi]'

	906 subst = subst[0:-1] + (input_note, output_note) + subst[-1:]

	907 subst_register = subst_register[0:-1] + (

	908 'any_nonspecial', output_note) + subst_register[-1:]

	909 subst_memory = subst_memory[0:-1] + (

	910 input_note, output_note) + subst_memory[-1:]

	911 regex += '()'

	912 if not 'index_r8' in args:

	913 register_replacement = ModRMRegisterReplacements(

	914 reg=None, rm=rm, opcode_bits=opcode_bits, **args)

	915 memory_replacement = ModRMMemoryReplacements(

	916 reg=None, rm=rm, opcode_bits=opcode_bits, **args)

	917 main_replacement = register_replacement + memory_replacement

	918 main_compressors.append(Compressor(

	919 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	920 subst, main_replacement))

	921 memory_compressors.append(Compressor(

	922 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	923 subst_memory, memory_replacement))

	924 register_compressors.append(Compressor(

	925 '.?({:02x})'.format(0xc0 + opcode_bits 8) + regex,

	926 subst_register, register_replacement))

	927

	928 # Even smaller compressors: only low 3 bits of opcode are used.

	929 for reg in register_kinds + ('st(0)',):

	930 start_reg = REGISTERS[reg][0]

	931 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	932 for opcode in xrange(8):

	933 for text1, text2, nibble in (

	934 ('[0..7]', '[8..f]', xrange(8)),

	935 ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)),

	936 ('[0..6]', '[8..e]', xrange(7))

	937 ):

	938 # Operand is encoded in opcode

	939 extra_compressors.append(Compressor(

	940 '.?[0-9a-fA-F](1)(?: 00)'

	941 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	942 '(%(?:' + REGISTERS_RE[reg][1] + ')).*()',

	943 (text1, '[%{}..%{}]'.format(start_reg, end_reg), ''),

	944 tuple(('{:x}'.format(n), '%' + REGISTERS[reg][n])

	945 for n in nibble)))

	946 extra_compressors.append(Compressor(

	947 '.?[0-9a-fA-F](8)(?: 00)'

	948 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	949 '(%(?:' + REGISTERS_RE[reg][0] + ')).*()',

	950 (text2, '[%{}..%{}]'.format(start_reg, end_reg), ''),

	951 tuple(('{:x}'.format(n + 8), '%' + REGISTERS[reg][n])

	952 for n in nibble)))

	953 # Another version for 64 bit case

	954 if options.bitness == 64 and reg in ('eax', 'r8d'):

	955 # Operand is encoded in opcode and output

	956 extra_compressors.append(Compressor(

	957 '.?[0-9a-fA-F](1)(?: 00)'

	958 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	959 '(%(?:' + REGISTERS_RE[reg][1] + ')).*'

	960 'output_rr=(%(?:'+ REGISTERS_RE[reg][1] + ')).*()',

	961 tuple([text1] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +

	962 ['']),

	963 tuple(['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2

	964 for n in nibble)))

	965 extra_compressors.append(Compressor(

	966 '.?[0-9a-fA-F](8)(?: 00)'

	967 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	968 '(%(?:' + REGISTERS_RE[reg][0] + ')).*'

	969 'output_rr=(%(?:'+ REGISTERS_RE[reg][0] + ')).*()',

	970 tuple([text2] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +

	971 ['']),

	972 tuple(['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2

	973 for n in nibble)))

	974 compressors = (main_compressors + memory_compressors + register_compressors +

	975 extra_compressors)

	976

	977 # Special compressors: will handle some cosmetic issues.

	978 #

	979 # SETxx ignores reg field and thus are described as many separate instructions

	980 compressors.append(Compressor(

	981 '.0f 9[0-9a-fA-F] XX(/[0-7]) set.()', ('', ''),

	982 [('/' + str(i), ) for i in range(8)]))

	983 # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual

	984 if options.bitness == 32:

	985 compressors.append(Compressor(

	986 '.(XX/1) bswap.ax.*()', ('c[8..f]', ''), [('XX/1', )]))

	987 else:

	988 compressors.append(Compressor(

	989 '.(XX/1) bswap.ax.*()', ('c[89abef]', ''), [('XX/1', )]))

	990 compressors.append(Compressor(

	991 '.(XX/1) bswap.r8.*()', ('c[8..e]', ''), [('XX/1', )]))

	992 # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG

	993 if options.bitness == 64:

	994 compressors.append(Compressor(

	995 '.* (?:cmpxchg\|xadd\|xchg).%al\\.\\.%bh[^#]()$',

	996 (' # write to both', ), ((), )))

	997 # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all

	998 # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.

	999 # Without this rule these ands comprise 2/3 of the whole output!

	1000 if options.bitness == 32:

	1001 compressors.append(Compressor(

	1002 '.*83 (e0 01 and \\$0x1,%eax)()',

	1003 ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', ' # special and'),

	1004 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]), )

	1005 for i in range(0x01, 0x100) for r in range(8)] +

	1006 [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', )]))

	1007 else:

	1008 for reg in ('eax', 'r8d'):

	1009 start_reg = REGISTERS[reg][0]

	1010 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1011 for index_reg in ('eax', 'r8d'):

	1012 start_index = REGISTERS[index_reg][0]

	1013 end_index = REGISTERS[index_reg][-1]

	1014 compressors.append(Compressor(

	1015 '.83 (e0 01 and \\$0x1,%' + reg + ').'

	1016 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()',

	1017 ('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1018 end_reg), '[%{}..%{}]'.format(start_index, end_index),

	1019 '[%{}..%{}]'.format(start_reg, end_reg),

	1020 ' # special and'),

	1021 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]),

	1022 'any_nonspecial', '%' + REGISTERS[reg][r])

	1023 for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] +

	1024 [('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1025 end_reg), '[%{}..%{}]'.format(start_index, end_index),

	1026 '[%{}..%{}]'.format(start_reg, end_reg))]))

	1027

	1028 # "and $e0" and similar are used to align %rsp. All negative values are

	1029 # accepted by validator and there are 127 of these.

	1030 # Consolidate them into one line.

	1031 if options.bitness == 64:

	1032 compressors.append(Compressor(

	1033 '.(?:81\|83) (?:e4\|e5) (80) (?:00 00 00 \|) and \\$0x(80),%r[bs]p.()',

	1034 ('[80..ff]', '[80..ff]', ' # alignment and'),

	1035 [('{:02x}'.format(i), '{:02x}'.format(i)) for i in range(0x80, 0x100)]))

	1036

	1037 # Merge memory and non-memory access

	1038 if options.bitness == 32:

	1039 letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', ''))

	1040 else:

	1041 letters_and_registers = (

	1042 ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'),

	1043 ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'),

	1044 ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'),

	1045 ('q', 'rax', 'eax'), ('q', 'r8', 'r8d')

	1046 )

	1047 for letter, reg, out_reg in letters_and_registers:

	1048 start_reg = REGISTERS[reg][0]

	1049 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1050 all_regs = '[%{}..%{}]'.format(start_reg, end_reg)

	1051 regs_mark = '[%{}..%{} or memory]'.format(start_reg, end_reg)

	1052 if options.bitness == 64:

	1053 start_out = REGISTERS[out_reg][0]

	1054 end_out = REGISTERS[out_reg][-1 if out_reg[0:2] != 'r8' else -2]

	1055 out_regs = '[%{}..%{}]'.format(start_out, end_out)

	1056 for notes in ('', ' # rm to reg', ' # reg to rm'):

	1057 compressors.append(Compressor(

	1058 '.* \\w(' + letter + ') .(\\[memory]).*()()',

	1059 ('[{}]?'.format(letter), regs_mark, '', ''),

	1060 ((letter, '[memory]', ''), ('', all_regs, notes))))

	1061 if options.bitness == 64:

	1062 for index_reg in ('eax', 'r8d'):

	1063 start_index = REGISTERS[index_reg][0]

	1064 end_index = REGISTERS[index_reg][-1]

	1065 index_regs = '[%{}..%{}]'.format(start_index, end_index)

	1066 for output_rrs in ((None, out_regs), (out_regs, None), (None, None)):

	1067 compressors.append(Compressor(

	1068 '.* \\w(' + letter + ') .(\\[memory]).*; '

	1069 'input_rr=(\\[%[a-z0-9]..%[a-z0-9]\\]); '

	1070 'output_rr=(\\[%[a-z0-9]..%[a-z0-9]\\]\|None)()()',

	1071 ('[{}]?'.format(letter), regs_mark, index_regs,

	1072 output_rrs[0] if output_rrs[0] is not None else output_rrs[1],

	1073 '', ''),

	1074 ((letter, '[memory]', index_regs, output_rrs[0], ''),

	1075 ('', all_regs, 'any_nonspecial', output_rrs[1], notes))))

	1076

	1077 # REX compressors

	1078 if options.bitness == 64:

	1079 # First pretty complex set of compressors to combine versions of REX with

	1080 # three lowest bits in different states.

	1081 register_kind_pairs = (

	1082 ( None, None),

	1083 ( 'al', 'al'), ( 'al', None), (None, 'al'),

	1084 ( 'ax', 'al'), ( 'al', 'ax'),

	1085 ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'),

	1086 ( 'eax', 'al'), ( 'al', 'eax'),

	1087 ( 'eax', 'ax'), ( 'ax', 'eax'),

	1088 ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'),

	1089 ( 'rax', 'al'), ( 'al', 'rax'),

	1090 ( 'rax', 'ax'), ( 'ax', 'rax'),

	1091 ( 'rax', 'eax'), ( 'eax', 'rax'),

	1092 ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'),

	1093 ( 'eax', 'mm0'), ( 'mm0', 'eax'),

	1094 ( 'rax', 'mm0'), ( 'mm0', 'rax'),

	1095 ( 'mm0', 'eax'), ( 'eax', 'mm0'),

	1096 ( 'mm0', 'rax'), ( 'rax', 'mm0'),

	1097 ( 'eax', 'xmm0'),

	1098 ( 'rax', 'xmm0'),

	1099 ('xmm0', 'eax'),

	1100 ('xmm0', 'rax'),

	1101 ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'),

	1102 ( 'mm0', 'xmm0'),

	1103 ('xmm0', 'mm0'),

	1104 ('xmm0', 'xmm0'),

	1105 ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'),

	1106 ('ymm0', 'xmm0'),

	1107 ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'),

	1108 )

	1109 r8 = {

	1110 'al': 'r8b',

	1111 'ax': 'r8w',

	1112 'eax': 'r8d',

	1113 'rax': 'r8',

	1114 'mm0': 'mmalt',

	1115 'xmm0': 'xmm8',

	1116 'ymm0': 'ymm8'

	1117 }

	1118 for reg, rm in register_kind_pairs:

	1119 for last_reg, last_rm in ((-1, -1), (-1, -2), (-2, -1), (-2, -2)):

	1120 if reg:

	1121 start_reg = REGISTERS[reg][0]

	1122 start_reg8 = REGISTERS[r8[reg]][0]

	1123 end_reg = REGISTERS[reg][-1]

	1124 end_reg0 = 'dil' if reg == 'al' else end_reg

	1125 end_reg8 = REGISTERS[r8[reg]][last_reg]

	1126 reg_regex = '\\[(%' + start_reg + '\\.\\.%' + end_reg + ')]'

	1127 reg_regex0 = '\\[(%' + start_reg + '\\.\\.%' + end_reg0 + ')]'

	1128 elif last_reg == -2:

	1129 continue

	1130 if rm:

	1131 start_rm = REGISTERS[rm][0]

	1132 start_rm8 = REGISTERS[r8[rm]][0]

	1133 end_rm = REGISTERS[rm][-1]

	1134 end_rm0 = 'dil' if rm == 'al' else end_rm

	1135 end_rm8 = REGISTERS[r8[rm]][last_rm]

	1136 rm_regex = ('\\[(%' + start_rm + '\\.\\.%' + end_rm + ')'

	1137 '(?: or memory)?]')

	1138 rm_regex0 = ('\\[(%' + start_rm + '\\.\\.%' + end_rm0 + ')'

	1139 '(?: or memory)?]')

	1140 elif last_rm == -2:

	1141 continue

	1142 for rexw in (True, False):

	1143 for input_rr in (True, False):

	1144 for output_rr in (True, False) if reg or rm else (None, ):

	1145 for rm_to_reg in (True, False) if reg and rm else (None, ):

	1146 # Legacy prefixes

	1147 regex = '.:(?: 26\| 2e\| 36\| 3e\| 64\| 65\| 66\| 67\| f0\| f2\| f3)'

	1148 # REX

	1149 regex += '( 48).' if rexw else '( 40\|).'

	1150 # Replacement text

	1151 replacement_tuple = (

	1152 ' [REX:48..4f]' if rexw else ' [REX:40..47]?', )

	1153 if reg:

	1154 replacement_regs = '%{}..%{}'.format(start_reg, end_reg8)

	1155 if rm:

	1156 replacement_rms = '%{}..%{}'.format(start_rm, end_rm8)

	1157 # Instruction arguments

	1158 if not reg and not rm:

	1159 pass

	1160 elif not reg and rm:

	1161 if rexw:

	1162 regex += rm_regex0 + '.*'

	1163 else:

	1164 regex += rm_regex + '.*'

	1165 replacement_tuple += (replacement_rms, )

	1166 elif reg and not rm:

	1167 if rexw:

	1168 regex += reg_regex0 + '.*'

	1169 else:

	1170 regex += reg_regex + '.*'

	1171 replacement_tuple += (replacement_regs, )

	1172 elif rm_to_reg:

	1173 if rexw:

	1174 regex += rm_regex0 + ',' + reg_regex0 + '.*'

	1175 else:

	1176 regex += rm_regex + ',' + reg_regex + '.*'

	1177 replacement_tuple += (replacement_rms, replacement_regs)

	1178 else:

	1179 if rexw:

	1180 regex += reg_regex0 + ',' + rm_regex0 + '.*'

	1181 else:

	1182 regex += reg_regex + ',' + rm_regex + '.*'

	1183 replacement_tuple += (replacement_regs, replacement_rms)

	1184 # Input and output restricted registers

	1185 if input_rr:

	1186 regex += 'input_rr=\\[(%eax\\.\\.%edi)].*'

	1187 replacement_tuple += ('%eax..%r15d', )

	1188 if output_rr:

	1189 regex += 'output_rr=\\[(%eax\\.\\.%edi)].*'

	1190 replacement_tuple += ('%eax..%r14d', )

	1191 regex += '()'

	1192 replacement_tuple += ('', )

	1193 # Replacement cases

	1194 replacement_tuples = ()

	1195 for byte in (range(0x48, 0x50)

	1196 if rexw

	1197 else range(0x40, 0x48) + ['']):

	1198 replacement_case = (

	1199 ' {:02x}'.format(byte) if byte else byte, )

	1200 if byte:

	1201 if rm:

	1202 if byte & 0x1:

	1203 replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8)

	1204 else:

	1205 replacement_rms = '%{}..%{}'.format(start_rm, end_rm0)

	1206 if byte & 0x2:

	1207 replacement_index = '%r8d..%r15d'

	1208 else:

	1209 replacement_index = '%eax..%edi'

	1210 if reg:

	1211 if byte & 0x4:

	1212 replacement_regs = '%{}..%{}'.format(start_reg8,

	1213 end_reg8)

	1214 else:

	1215 replacement_regs = '%{}..%{}'.format(start_reg,

	1216 end_reg0)

	1217 else:

	1218 if rm:

	1219 replacement_rms = '%{}..%{}'.format(start_rm, end_rm)

	1220 replacement_index = '%eax..%edi'

	1221 if reg:

	1222 replacement_regs = '%{}..%{}'.format(start_reg, end_reg)

	1223 if not reg and not rm:

	1224 pass

	1225 elif not reg and rm:

	1226 replacement_case += (replacement_rms, )

	1227 if byte:

	1228 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1229 else:

	1230 final_rr = '%eax..%edi'

	1231 elif reg and not rm:

	1232 replacement_case += (replacement_regs, )

	1233 if byte:

	1234 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1235 else:

	1236 final_rr = '%eax..%edi'

	1237 elif rm_to_reg:

	1238 replacement_case += (replacement_rms, replacement_regs)

	1239 if byte:

	1240 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1241 else:

	1242 final_rr = '%eax..%edi'

	1243 else:

	1244 replacement_case += (replacement_regs, replacement_rms)

	1245 if byte:

	1246 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1247 else:

	1248 final_rr = '%eax..%edi'

	1249 if input_rr: replacement_case += (replacement_index, )

	1250 if output_rr: replacement_case += (final_rr, )

	1251 replacement_tuples += (replacement_case, )

	1252 compressors.append(Compressor(

	1253 regex, replacement_tuple, replacement_tuples))

	1254 # This is pretty simple compressor to combine two lines with different REX.W

	1255 # bits (only if they are otherwise identical).

	1256 compressors.append(Compressor(

	1257 '.(\\[REX:40\\.\\.47]\\?).()', ('[REX:40..4f]?', ''),

	1258 (('[REX:40..47]?', ), ('[REX:48..4f]', ))))

	1259

	1260

	1261 def ShowProgress(rule, instruction):

	1262 if rule not in ShowProgress.rules_shown:

	1263 first_print = True

	1264 ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown)

	1265 else:

	1266 first_print = False

	1267 print >> sys.stderr, '-------- Compressed --------'

	1268 print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule]

	1269 print >> sys.stderr, '--------'

	1270 compressor = compressors[rule]

	1271 match = compressor.regex.match(instruction)

	1272 assert match

	1273 format_str = CompressionTemplate(instruction, match, '{{{}}}')

	1274 replacements = sorted(format_str.format(*replacement)

	1275 for replacement in compressor.replacements)

	1276 if len(compressor.replacements) <= 4 or first_print:

	1277 for replacement in replacements:

	1278 print >> sys.stderr, replacement

	1279 else:

	1280 print >> sys.stderr, replacements[0]

	1281 print >> sys.stderr, "..."

	1282 print >> sys.stderr, replacements[-1]

	1283 print >> sys.stderr, '--------'

	1284 print >> sys.stderr, 'Compressed', (

	1285 format_str + '{{{}}}').format(*compressor.subst)

	1286 ShowProgress.rules_shown = {}

	1287

	1288

	1289 def main():

	1290 # We are keeping these global to share state graph and compressors

	1291 # between workers spawned by multiprocess. Passing them every time is slow.

	1292 global options, xml_file

	1293 global dfa

	1294 global worker_validator

	1295 options, xml_file = ParseOptions()

	1296 dfa = dfa_parser.ParseXml(xml_file)

	1297 worker_validator = validator.Validator(

	1298 validator_dll=options.validator_dll,

	1299 decoder_dll=options.decoder_dll)

	1300 PrepareCompressors()

	1301

	1302 assert dfa.initial_state.is_accepting

	1303 assert not dfa.initial_state.any_byte

	1304

	1305 print >> sys.stderr, len(dfa.states), 'states'

	1306

	1307 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)

	1308

	1309 # We can't just write 'num_suffixes[dfa.initial_state]' because

	1310 # initial state is accepting.

	1311 total_instructions = sum(

	1312 num_suffixes[t.to_state]

	1313 for t in dfa.initial_state.forward_transitions.values())

	1314 print >> sys.stderr, total_instructions, 'regular instructions total'

	1315

	1316 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)

	1317 print >> sys.stderr, len(tasks), 'tasks'

	1318

	1319 pool = multiprocessing.Pool()

	1320

	1321 results = pool.imap(Worker, tasks)

	1322

	1323 total = 0

	1324 num_valid = 0

	1325 full_output = set()

	1326 for prefix, count, valid_count, output, trace in results:

	1327 print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))

	1328 total += count

	1329 num_valid += valid_count

	1330 full_output \|= output

	1331 for rule, instruction in trace:

	1332 ShowProgress(rule, instruction)

	1333 for instruction in sorted(Compressed(full_output,

	1334 compressors,

	1335 ShowProgress)):

	1336 print instruction

	1337

	1338 print >> sys.stderr, total, 'instructions were processed'

	1339 print >> sys.stderr, num_valid, 'valid instructions'

	1340

	1341

	1342 if __name__ == '__main__':

	1343 main()

OLD	NEW

« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »