src/trusted/validator_ragel/compress_regular_instructions.py - Issue 49183002: Regular instructions golden file test.

Side by Side Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """

	6 Traverse the validator's DFA, collect all "normal" instruction and then

	7 compress output. Note: "anybyte fields" (immediates and displacements)

	8 are always filled with zeros. Otherwise processing of sextillions (sic!)

	9 of possibilities will take too long.

	10

	11 Each rule is applied only when all variants are accepted by validator.

	12 The following compression rules are present:

	13

	14 1. Compress ModR/M (+SIB & displacement).

	15 Instruction: 00 00 add %al,(%rax)

	16 ...

	17 Instruction: 00 ff add %bh,%bh

	18 becomes

	19 Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]

	20

	21 1a. Compress ModR/M (+SIB & displacement) memory-only.

	22 Instruction: f0 01 00 lock add %eax,(%eax)

	23 ...

	24 Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)

	25 becomes

	26 Instruction: f0 01 XX lock add [%eax..edi],[memory]

	27

	28 1b. Compress ModR/M register only.

	29 Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax

	30 ...

	31 Instruction: 66 0f 50 ff movmskpd %xmm7,%edi

	32 becomes

	33 Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]

	34

	35 2. Compress ModR/M (+SIB & displacement) with opcode extension.

	36 Instruction: 0f 90 00 seto (%eax)

	37 ...

	38 Instruction: 0f 90 c7 seto %bh

	39 becomes

	40 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	41

	42 2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.

	43 Instruction: f0 ff 00 lock incl (%eax)

	44 ...

	45 Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)

	46 becomes

	47 Instruction: f0 ff XX/1 lock decl [memory]

	48

	49 2b. Compress ModR/M register-only with opcode extension.

	50 Instruction: 0f 71 d0 00 psrlw $0x0,%mm0

	51 ...

	52 Instruction: 0f 71 d7 00 psrlw $0x0,%mm7

	53 becomes

	54 Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7]

	55

	56 3. Compress register-in-opcode.

	57 Instruction: d9 c0 fld %st(0)

	58 ...

	59 Instruction: d9 c7 fld %st(7)

	60 becomes

	61 Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]

	62

	63 Only applies if all possible register accesses are accepted by validator.

	64

	65 4. Special compressor for "set" instruction.

	66 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	67 ...

	68 Instruction: 0f 90 XX/7 seto [%al..%bh or memory]

	69 becomes

	70 Instruction: 0f 90 XX seto [%al..%bh or memory]

	71 """

	72

	73 import itertools

	74 import multiprocessing

	75 import optparse

	76 import os

	77 import re

	78 import subprocess

	79 import sys

	80 import tempfile

	81 import traceback

	82

	83 import dfa_parser

	84 import dfa_traversal

	85 import validator

	86

	87

	88 # Register names in 'natual' order (as defined by IA32/x86-64 ABI)

	89 #

	90 # X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field

	91 # in opcode, ModR/M, and/or SIB bytes to encode them.

	92 #

	93 # In most cases there are 16 registers of a given kind and two such groups,

	94 # but there are couple of exceptions:

	95 # 1. There are 20 8-bit registers and three groups (two of them overlap)

	96 # 2. There are eight X87 and MMX registers thus two groups are identical

	97 #

	98 # We use typical register from a group to name the whole group. Most groups

	99 # use first register, but 'spl' group uses fifth register because it's first

	100 # four registers are the same as 'al' group. We use mnemonic name 'mmalt'

	101 # to represent the "evil mirror" of the 'mm0' group.

	102 REGISTERS = {

	103 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],

	104 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],

	105 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],

	106 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],

	107 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],

	108 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],

	109 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],

	110 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],

	111 'r8': [ 'r{}'.format(N) for N in range(8,16) ],

	112 'mm0': [ 'mm{}'.format(N) for N in range(8) ],

	113 'mmalt': [ 'mm{}'.format(N) for N in range(8) ],

	114 'st(0)': [ 'st({})'.format(N) for N in range(8) ],

	115 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],

	116 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],

	117 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],

	118 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]

	119 }

	120

	121

	122 NOP = 0x90

	123

	124

	125 def PadToBundleSize(bytes):

	126 assert len(bytes) <= validator.BUNDLE_SIZE

	127 return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))

	128

	129

	130 # In x86-64 mode we have so-called 'restricted register' which is used to

	131 # tie two groups together. Some instructions require particular value to

	132 # be stored in this variable, while some accept any non-special restricted

	133 # register (%ebp and %esp are special because they can only be accepted by

	134 # a few 'special' instructions).

	135 #

	136 # You can find more details in the "NaCl SFI model on x86-64 systems" manual.

	137 #

	138 # We try to feed all possible 'restricted registers' into validator and then

	139 # classify the instruction using this map. If set of acceptable 'restricted

	140 # registers' is not here, then it's an error in validator.

	141 ACCEPTABLE_X86_64_INPUTS = {

	142 0x00001: 'input_rr=%eax',

	143 0x00002: 'input_rr=%ecx',

	144 0x00004: 'input_rr=%edx',

	145 0x00008: 'input_rr=%ebx',

	146 0x00010: 'input_rr=%esp',

	147 0x00020: 'input_rr=%ebp',

	148 0x00040: 'input_rr=%esi',

	149 0x00080: 'input_rr=%edi',

	150 0x00100: 'input_rr=%r8d',

	151 0x00200: 'input_rr=%r9d',

	152 0x00400: 'input_rr=%r10d',

	153 0x00800: 'input_rr=%r11d',

	154 0x01000: 'input_rr=%r12d',

	155 0x02000: 'input_rr=%r13d',

	156 0x04000: 'input_rr=%r14d',

	157 0x08000: 'input_rr=%r15d',

	158 0x1ffcf: 'input_rr=any_nonspecial'

	159 }

	160

	161 # Any instruction must produce either None or one of fifteen registers as an

	162 # output 'restricted register' value. 'r15d' is NOT acceptable as an output.

	163 ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple(

	164 '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1])

	165

	166

	167 def ValidateInstruction(instruction, validator_inst):

	168 bundle = ''.join(map(chr, PadToBundleSize(instruction)))

	169 if options.bitness == 32:

	170 result = validator_inst.ValidateChunk(bundle, bitness=32)

	171 return result, []

	172 else:

	173 valid_inputs = 0

	174 known_final_rr = None

	175 output_rr = None

	176 # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array

	177 # above.

	178 for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]):

	179 valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(

	180 bundle, len(instruction), initial_rr)

	181 if valid:

	182 # final_rr should not depend on input_rr

	183 assert valid_inputs == 0 or known_final_rr == final_rr

	184 valid_inputs \|= 1 << bit

	185 known_final_rr = final_rr

	186 # If nothing is accepted then instruction is not valid. Easy and simple.

	187 if valid_inputs == 0: return False, []

	188 # If returned value in unacceptable we'll get IndexError here and this

	189 # test will fail

	190 if known_final_rr is not None:

	191 output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr]

	192 # If collected valid_inputs are unacceptable we'll get KeyError here and

	193 # this test will fail

	194 return True, [ACCEPTABLE_X86_64_INPUTS[valid_inputs],

	195 'output_rr={}'.format(output_rr)]

	196

	197

	198 class WorkerState(object):

	199 def __init__(self, prefix, validator):

	200 self.total_instructions = 0

	201 self.num_valid = 0

	202 self.validator = validator

	203 self.output = set()

	204 self.trace = []

	205

	206

	207 def ReceiveInstruction(self, bytes):

	208 self.total_instructions += 1

	209 result, notes = ValidateInstruction(bytes, self.validator)

	210 if result:

	211 self.num_valid += 1

	212 dis = self.validator.DisassembleChunk(

	213 ''.join(map(chr, bytes)),

	214 bitness=options.bitness)

	215 for line_nr in xrange(len(dis)):

	216 dis[line_nr] = str(dis[line_nr])

	217 assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '

	218 assert dis[line_nr][-1:] == ')'

	219 dis[line_nr] = dis[line_nr][17:-1]

	220 # If %rip is involved then comment will be different depending on the

	221 # instruction length. Eliminate it.

	222 if '(%rip)' in dis[0]:

	223 dis[0] = re.sub(' # 0x[ ][0-9a-fA-F]', '', dis[0])

	224 # Zero displacements are represented as 0x0 for all instructions except

	225 # jumps where they disassembled as non-zero due to %eip/%rip-relative

	226 # addressing. We replace this displacement with %eip/%rip to simplify

	227 # compression.

	228 if ' 0x' in dis[0] and ' 0x0' not in dis[0]:

	229 for bytes in xrange(1, 16):

	230 dis[0] = re.sub(

	231 '(' + '(?:[0-9a-fA-F][0-9a-fA-F] ){' + str(bytes) + '} .* )' +

	232 hex(bytes) + '(.*)',

	233 '\\1%eip\\2' if options.bitness == 32 else '\\1%rip\\2',

	234 dis[0]);

	235 dis[0] = 'Instruction: ' + dis[0]

	236 dis += notes

	237 self.output.add('; '.join(dis))

	238

	239

	240 def RecordTrace(self, compressor_nr, instruction):

	241 self.trace.append((compressor_nr, instruction))

	242

	243

	244 # Compressor has three slots: regex (which picks apart given instruction),

	245 # subst (which is used to denote compressed version) and replacements (which

	246 # are used to generate set of instructions from a given code).

	247 #

	248 # Example compressor:

	249 # regex = '.?[0-9a-fA-F]([0-7]) \\w (%e(?:[abcd]x\|[sb]p\|[sd]i)).*()'

	250 # subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode')

	251 # replacements = ((0, '%eax'), (1, '%ecx'), (2, '%edx'), (3, '%ebx')

	252 # (4, '%esp'), (5, '%ebp'), (6, '%esi'), (7, '%edi'))

	253 #

	254 # When faced with instriuction '40 inc %eax' it will capture the following

	255 # pieces of said instruction: '4[0] inc [%eax]'.

	256 #

	257 # Then it will produce the following eight instructions:

	258 # '40 inc %eax'

	259 # '41 inc %ecx'

	260 # '42 inc %edx'

	261 # '43 inc %ebx'

	262 # '44 inc %esp'

	263 # '45 inc %ebp'

	264 # '46 inc %esi'

	265 # '47 inc %edi'

	266 #

	267 # If all these instructions can be found in a set of instructions then

	268 # compressor will remove them from said set and will insert one replacement

	269 # "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'.

	270 #

	271 # Note that last group is only used in the replacement. It's used to grab marks

	272 # added by previous compressors and to replace them with a new mark.

	273 class Compressor(object):

	274 __slots__ = [

	275 'regex',

	276 'subst',

	277 'replacements'

	278 ]

	279

	280 def __init__(self, regex, subst, replacements=None):

	281 self.regex = re.compile(regex)

	282 self.subst = subst

	283 self.replacements = [] if replacements is None else replacements

	284

	285

	286 def CompressionTemplate(instruction, match, mark):

	287 """ Replace all match groups with the mark. """

	288 pos = 0

	289 format_str = ''

	290 for group in range(1, len(match.groups())):

	291 format_str += instruction[pos:match.start(group)] + mark

	292 pos = match.end(group)

	293 return format_str + instruction[pos:match.start(len(match.groups()))]

	294

	295

	296 def CompressOneMatch(instructions, instruction, match, compressor):

	297 format_str = CompressionTemplate(instruction, match, '{}')

	298 subset = set()

	299 for replacement in compressor.replacements:

	300 replacement_str = format_str.format(*replacement)

	301 if not replacement_str in instructions:

	302 return (False, instructions)

	303 subset.add(replacement_str)

	304 instructions -= subset

	305 instructions.add((format_str + '{}').format(*compressor.subst))

	306 return (True, instructions)

	307

	308

	309 def CompressOneInstruction(instructions, compressors, split, cache):

	310 sorted_instructions = (sorted(i for i in instructions if i > split) +

	311 sorted(i for i in instructions if i < split))

	312 for instruction in sorted_instructions:

	313 if instruction in cache:

	314 compressors_list = cache[instruction]

	315 for compressor_nr, match, compressor in compressors_list:

	316 result, instructions = CompressOneMatch(

	317 instructions, instruction, match, compressor)

	318 if result:

	319 return (instructions, compressor_nr, instruction)

	320 else:

	321 compressors_list = []

	322 for compressor_nr, compressor in enumerate(compressors):

	323 match = compressor.regex.match(instruction)

	324 if match:

	325 compressors_list.append((compressor_nr, match, compressor))

	326 result, instructions = CompressOneMatch(

	327 instructions, instruction, match, compressor)

	328 if result:

	329 return (instructions, compressor_nr, instruction)

	330 cache[instruction] = compressors_list

	331 return (instructions, False, False)

	332

	333

	334 def Compressed(instructions, compressors, show_progress):

	335 split = ''

	336 cache = {}

	337 while True:

	338 instructions, rule, split = CompressOneInstruction(

	339 instructions, compressors, split, cache)

	340 if rule is False: break

	341 show_progress(rule, split)

	342 return instructions

	343

	344

	345 def Worker((prefix, state_index)):

	346 worker_state = WorkerState(prefix, worker_validator)

	347

	348 try:

	349 dfa_traversal.TraverseTree(

	350 dfa.states[state_index],

	351 final_callback=worker_state.ReceiveInstruction,

	352 prefix=prefix,

	353 anyfield=0)

	354 if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions

	355 worker_state.output = Compressed(set(worker_state.output),

	356 compressors,

	357 worker_state.RecordTrace)

	358 except Exception as e:

	359 traceback.print_exc() # because multiprocessing imap swallows traceback

	360 raise

	361

	362 return (

	363 prefix,

	364 worker_state.total_instructions,

	365 worker_state.num_valid,

	366 worker_state.output,

	367 worker_state.trace)

	368

	369

	370 def ParseOptions():

	371 parser = optparse.OptionParser(usage='%prog [options] xmlfile')

	372

	373 parser.add_option('--bitness',

	374 choices=['32', '64'],

	375 help='The subarchitecture: 32 or 64')

	376 parser.add_option('--validator_dll',

	377 help='Path to librdfa_validator_dll')

	378 parser.add_option('--decoder_dll',

	379 help='Path to librdfa_decoder_dll')

	380

	381 options, args = parser.parse_args()

	382 options.bitness = int(options.bitness)

	383

	384 if len(args) != 1:

	385 parser.error('specify one xml file')

	386

	387 (xml_file, ) = args

	388

	389 return options, xml_file

	390

	391

	392 # Version suitable for use in regular expressions

	393 REGISTERS_RE = REGISTERS.copy()

	394 REGISTERS_RE['st(0)'] = [ 'st\${}\$'.format(N) for N in range(8) ]

	395 REGISTERS_RE['st\$0\$'] = REGISTERS_RE['st(0)']

	396

	397 # Index names in 'natual' order (as defined by IA32/x86-64 ABI)

	398 INDEXES = {

	399 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],

	400 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],

	401 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]

	402 }

	403 # Register which can not be used as base in 64-bit mode in all incarnations

	404 X86_64_BASE_REGISTERS = set([

	405 '%spl', '%bpl', '%r15b',

	406 '%sp', '%bp', '%r15w',

	407 '%esp', '%ebp', '%r15d',

	408 '%rsp', '%rbp', '%r15',

	409 '%rip'

	410 ])

	411

	412

	413 def InstructionIsDangerous(input, output, register_write,

	414 writes_to, memory_accessed=False,

	415 base_text='%r15', index_text='%r15'):
	halyavin 2013/11/18 09:50:47 Can we use %riz as default value instead? Can we use %riz as default value instead? khim 2013/11/18 12:10:21 Hmm... Yes, I just wanted to use "always valid" re Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > Can we use %riz as default value instead? Hmm... Yes, I just wanted to use "always valid" register as default, but it's not important for base.
	416 """ Check if instruction with given replacements will be dangerous

	417

	418 Args:

	419 input: input argument

	420 output: output argument

	421 register_write: three-state selector

	422 'sandbox' - instruction can be used to produce "restricted register"

	423 'protect' - instruction can damage output, protect "special registers"

	424 'ignore' - instruction does not affect it's operands (e.g. test) or

	425 is used with non-GP registers (X87, MMX, XMM, etc)

	426 memory_accessed: True if instruction accesses memory

	427 base: base register (if memory is accessed)

	428 index: index register (if memory is accessed)

	429

	430 Returns:

	431 True if instruction should be rejected by validator

	432 """

	433 if memory_accessed:

	434 if base_text not in X86_64_BASE_REGISTERS:

	435 return True

	436 if index_text in X86_64_BASE_REGISTERS - set(['%r15']):
	halyavin 2013/11/18 09:50:47 Why index_text can be %r15? Is (%r15,%r15,) valid? Why index_text can be %r15? Is (%r15,%r15,) valid? If you need neutral value, use %riz. khim 2013/11/18 12:10:21 That's two separate questions :-) Yes, I need a ne Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > Why index_text can be %r15? Is (%r15,%r15,) valid? If you need neutral value, > use %riz. That's two separate questions :-) Yes, I need a neutral value for index and I wanted to use something valid for base since I initially had memory_accessed=True by default, but the fact that (%r15,%r15,1) is valid is just a quirk of our automata: we don't allow modification of %r15 (it must be disallowed even if it's not used as restricted register for memory access!) thus it's safe to accept %r15 as index. Note: ValidateInstruction actually verifies that %r15 is never produced as restricted register.
	437 return True

	438 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	439 return True

	440 if register_write == 'sandbox' and output == '%r15d':

	441 return True

	442 if (writes_to == 'both' and input in X86_64_BASE_REGISTERS):
	halyavin 2013/11/18 09:50:47 We don't need brackets here. We don't need brackets here. khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > We don't need brackets here. Done.
	443 return True

	444 return False

	445

	446

	447 def AppendOperandsReplacement(replacement, rm_text, reg, modrm, writes_to):

	448 """ Appends replacement text to replacement list

	449

	450 Args:

	451 replacement: replacement list

	452 rm_text: replacement for rm field

	453 reg: register kind (or None if reg field is used as opcode extension)

	454 reg_field: value of reg field in ModR/M byte (is only used if reg != None)
	halyavin 2013/11/18 09:50:47 You changed the argument from reg_field to modrm. You changed the argument from reg_field to modrm. khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > You changed the argument from reg_field to modrm. Done.
	455 writes_to: three-state selector

	456 'reg' - instruction uses rm as source, reg as destination

	457 'rm' - instruction uses reg as source, rm as destination

	458 'both' - instruction writes to both reg and rm

	459

	460 Returns:

	461 input: textual representation of input argument

	462 output: textual representation of output argument

	463

	464 Side-effect:

	465 output (if reg is None) or (input, output) tuple (if reg is not None)

	466 are added to replacement list.

	467 """

	468 reg_field = (modrm >> 3) & 0x07
	halyavin 2013/11/18 09:50:47 Move this line to else clause. Move this line to else clause. khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > Move this line to else clause. Done.
	469 if reg is None:

	470 assert writes_to == 'rm'

	471 input, output = None, rm_text

	472 replacement.append(output)

	473 else:

	474 reg_text = '%' + REGISTERS[reg][reg_field]

	475 if writes_to == 'reg':

	476 input, output = rm_text, reg_text

	477 else: # rm, both

	478 input, output = reg_text, rm_text

	479 replacement.extend([input, output])

	480 return input, output

	481

	482

	483 def ModRMRegisterReplacements(rm, reg=None, writes_to='rm', opcode_bits=0,

	484 register_write='ignore'):

	485 """Creates replacement tuples list for register-to-register instructions

	486

	487 Args:

	488 rm: rm operand kind (see REGISTERS array)

	489 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	490 writes_to: three-state selector

	491 'reg' - instruction uses rm as source, reg as destination

	492 'rm' - instruction uses reg as source, rm as destination

	493 'both' - instruction writes to both reg and rm

	494 opcode_bits: opcode extensions code (used when reg is None)

	495 register_write: three-state selector

	496 'sandbox' - instruction can be used to produce "restricted register"

	497 'protect' - instruction can damage output, protect "special registers"

	498 'ignore' - instruction does not affect it's operands (e.g. test) or

	499 is used with non-GP registers (X87, MMX, XMM, etc)

	500 Returns:

	501 List of replacement tuples

	502 """

	503 # Reg field can be used either as reg or as opcode extension, but not both

	504 assert reg is None or opcode_bits == 0

	505

	506 output_key = (options.bitness, reg, rm, writes_to, opcode_bits,

	507 register_write)

	508 if output_key in ModRMRegisterReplacements.replacements:

	509 return ModRMRegisterReplacements.replacements[output_key]

	510

	511 replacements = []

	512

	513 # Two upper bits of ModR/M byte (mod field) must be equal to 11

	514 # This gives us range from 0xc0 to 0xff but we are going from the

	515 # end to make rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	516 if reg is None:

	517 # reg field is used as opcode extension

	518 byte_range = [byte

	519 for byte in range(0xff, 0xbf, -1)

	520 if (byte >> 3) & 0x7 == opcode_bits]

	521 else:

	522 byte_range = range(0xff, 0xbf, -1)

	523

	524 for modrm in byte_range:

	525 rm_field = (modrm & 0x07)

	526 rm_text = '%' + REGISTERS[rm][rm_field]

	527 byte_text = '{:02x}'.format(modrm)

	528 replacement = [byte_text]

	529 input, output = AppendOperandsReplacement(

	530 replacement, rm_text, reg, modrm, writes_to)

	531 if options.bitness == 64:

	532 replacement.append('any_nonspecial') # input_rr

	533 replacement.append(output if register_write == 'sandbox' else None)

	534 if InstructionIsDangerous(input, output, register_write, writes_to):

	535 continue

	536 replacements.append(tuple(replacement))

	537 ModRMRegisterReplacements.replacements[output_key] = tuple(replacements)

	538 return ModRMRegisterReplacements.replacements[output_key]

	539 ModRMRegisterReplacements.replacements = {}

	540

	541

	542 def BaseOnlyMemoryOperand(modrm, base):

	543 """Creates replacement tuples list for register-to-memory instructions

	544 (base only, no SIB)

	545

	546 Args:

	547 modrm: modrm byte

	548 base: register kind for base

	549 Returns:

	550 bytes_text: replacement for "bytes" group

	551 rm_text: textual representation of "rm" argument

	552 base_text: textual representation of "base" register

	553 """

	554 mod_field = (modrm >> 6) & 0x03

	555 rm_field = (modrm & 0x07)

	556 base_text = '%' + REGISTERS[base][rm_field]

	557 # If RM field == %rbp and MOD field is zero then it's absolute address

	558 # in 32-bit mode and %rip-based address in 64-bit mode

	559 if mod_field == 0 and rm_field == validator.REG_RBP:

	560 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	561 rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'

	562 base_text = '%rip'
	halyavin 2013/11/18 09:50:47 Use %riz or %eiz in 32-bit case. Use %riz or %eiz in 32-bit case. khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > Use %riz or %eiz in 32-bit case. Done.
	563 # Memory access with just a base register

	564 elif mod_field == 0:

	565 bytes_text = '{:02x}'.format(modrm)

	566 rm_text = '({})'.format(base_text)

	567 # Memory access with base and 8bit offset

	568 elif mod_field == 1:

	569 bytes_text = '{:02x} 00'.format(modrm)

	570 rm_text = '0x0({})'.format(base_text)

	571 # Memory access with base and 32bit offset

	572 else: # mod_field == 2

	573 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	574 rm_text = '0x0({})'.format(base_text)

	575 return bytes_text, rm_text, base_text

	576

	577

	578 def SIBMemoryOperand(modrm, sib, base, index):

	579 """Creates replacement tuples list for register-to-memory instructions

	580 (base only, no SIB)

	581

	582 Args:

	583 modrm: modrm byte

	584 base: register kind for base

	585 Returns:

	586 bytes_text: replacement for "bytes" group

	587 rm_text: textual representation of "rm" argument

	588 base_text: textual representation of "base" register

	589 index_text: textual representation of "base" register
	halyavin 2013/11/18 09:50:47 base->index base->index khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > base->index Done.
	590 """

	591 mod_field = (modrm >> 6) & 0x03

	592 scale_field = (sib >> 6) & 0x03

	593 index_field = (sib >> 3) & 0x07

	594 base_field = (sib & 0x07)

	595 index_text = '%' + INDEXES[index][index_field]

	596 base_text = '%' + REGISTERS[base][base_field]

	597 scale_text = str(1 << scale_field)

	598 # If BASE is %rbp and MOD == 0 then index with 32bit offset is used

	599 if mod_field == 0 and base_field == validator.REG_RBP:

	600 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	601 # In 64-bit mode this case is displayed as simple absolute address

	602 # In 32-bit mode there are another, shorter, form, but it's used

	603 # for %rip-relative addressing in 64-bit mode

	604 if (options.bitness == 64 and

	605 index_text == '%riz' and

	606 scale_field == 0):
	halyavin 2013/11/18 09:50:47 scale_text == '1'. It will make easier to understa scale_text == '1'. It will make easier to understand that this form is equivalent to one in else branch. khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > scale_text == '1'. It will make easier to understand that this form is > equivalent to one in else branch. Done.
	607 rm_text = '0x0'

	608 else:

	609 rm_text = '0x0(,{},{})'.format(index_text, scale_text)

	610 # There are no base in this case

	611 base_text = ''
	halyavin 2013/11/18 09:50:47 Use %riz/%eiz. Use %riz/%eiz. khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > Use %riz/%eiz. Done.
	612 # Memory access with base and index (no offset)

	613 elif mod_field == 0:

	614 bytes_text = '{:02x} {:02x}'.format(modrm, sib)

	615 rm_text = '({},{},{})'.format(base_text, index_text, scale_text)

	616 # Memory access with base, index and 8bit offset

	617 elif mod_field == 1:

	618 bytes_text = '{:02x} {:02x} 00'.format(modrm, sib)

	619 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	620 # Memory access with base, index and 32bit offset

	621 elif mod_field == 2:

	622 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	623 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	624 # Pretty-printing of access via %rsp (or %r12)

	625 if (base_field == validator.REG_RSP and

	626 index_text in ('%eiz', '%riz') and

	627 scale_field == 0):
	halyavin 2013/11/18 09:50:47 scale_text == '1' scale_text == '1' khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > scale_text == '1' Done.
	628 rm_text = ('0x0({})' if mod_field else '({})').format(base_text)
	halyavin 2013/11/18 09:50:47 if mod_field in (1, 2): # 8-bit or 32-bit offset if mod_field in (1, 2): # 8-bit or 32-bit offset rm_text = '0x0({})'.format(base_text) else: # mod_field == 0, no offset. rm_text = '({})'.format(base_text) khim 2013/11/18 12:10:21 Grr. Four lines instead of one. Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > if mod_field in (1, 2): # 8-bit or 32-bit offset > rm_text = '0x0({})'.format(base_text) > else: # mod_field == 0, no offset. > rm_text = '({})'.format(base_text) Grr. Four lines instead of one. Done.
	629 return bytes_text, rm_text, base_text, index_text

	630

	631

	632 def ModRMMemoryReplacements(reg=None, writes_to='rm', opcode_bits=0,

	633 memory_accessed=True, register_write='ignore',

	634 base_r8=False, index_r8=False):

	635 """Creates replacement tuples list for register-to-memory instructions

	636

	637 Args:

	638 rm: rm operand kind (see REGISTERS array)

	639 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	640 writes_to: three-state selector

	641 'reg' - instruction uses rm as source, reg as destination

	642 'rm' - instruction uses reg as source, rm as destination

	643 'both' - instruction writes to both reg and rm

	644 opcode_bits: opcode extensions code (used when reg is None)

	645 memory_accessed: True if instruction accesses memory

	646 register_write: three-state selector

	647 'sandbox' - instruction can be used to produce "restricted register"

	648 'protect' - instruction can damage output, protect "special registers"

	649 'ignore' - instruction does not affect it's operands (e.g. test) or

	650 is used with non-GP registers (X87, MMX, XMM, etc)

	651 index_r8: True if REX.X bit in the instruction set to 1

	652

	653 Returns:

	654 List of replacement tuples

	655 """

	656 # Reg field can be used either as reg or as opcode extension, but not both

	657 assert reg is None or opcode_bits == 0

	658

	659 output_key = (options.bitness, reg, writes_to, opcode_bits,

	660 base_r8, index_r8, memory_accessed, register_write)

	661 if output_key in ModRMMemoryReplacements.replacements:

	662 return ModRMMemoryReplacements.replacements[output_key]

	663

	664 if options.bitness == 32:

	665 base = 'eax'

	666 index = 'eax'

	667 else:

	668 base = 'r8' if base_r8 else 'rax'

	669 index = 'r8' if index_r8 else 'rax'

	670

	671 replacements = []

	672

	673 # Two upper bits of ModR/M byte (mod field) must be equal to 00, 01, or 10

	674 # This gives us range from 0x00 to 0xbf but we are going from the end to make

	675 # rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	676 if reg is None:

	677 # reg field is used as opcode extension

	678 byte_range = [byte

	679 for byte in range(0xbf, -1, -1)

	680 if (byte >> 3) & 0x7 == opcode_bits]

	681 else:

	682 byte_range = range(0xbf, -1, -1)

	683

	684 for modrm in byte_range:

	685 # If RM field != %rsp then there are no SIB byte

	686 if (modrm & 0x07) != validator.REG_RSP:

	687 bytes_text, rm_text, base_text = BaseOnlyMemoryOperand(modrm, base)

	688 replacement = [bytes_text]

	689 input, output = AppendOperandsReplacement(

	690 replacement, rm_text, reg, modrm, writes_to)

	691 if options.bitness == 64:

	692 replacement.append('any_nonspecial')

	693 # xchg with memory can not be used to sandbox it's operand, only

	694 # instriuction which explicitly writes to reg operand can do that

	695 if writes_to == 'reg' and register_write == 'sandbox':

	696 replacement.append(output)

	697 else:

	698 replacement.append(None)

	699 if InstructionIsDangerous(input, output, register_write, writes_to,

	700 memory_accessed, base_text):

	701 continue

	702 replacement = tuple(replacement)

	703 replacements.append(replacement)

	704 else:

	705 # If RM field == %rsp then we have SIB byte

	706 for sib in xrange(0x100):

	707 bytes_text, rm_text, base_text, index_text = SIBMemoryOperand(

	708 modrm, sib, base, index)

	709 replacement = [bytes_text]

	710 input, output = AppendOperandsReplacement(

	711 replacement, rm_text, reg, modrm, writes_to)

	712 if options.bitness == 64:

	713 if not memory_accessed or index_text == '%riz':

	714 replacement.append('any_nonspecial')

	715 else:

	716 if index_r8:

	717 # Convert %r8 to %r8, %r9 to %r9d, etc
	halyavin 2013/11/18 09:50:47 Convert %r8 to %r8d Convert %r8 to %r8d khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > Convert %r8 to %r8d Done.
	718 replacement.append(index_text + 'd')

	719 else:

	720 # Convert %rax to %eax, %rsp to %esp, etc

	721 replacement.append('%e' + index_text[2:])

	722 # xchg with memory can not be used to sandbox it's operand, only

	723 # instriuction which explicitly write to reg operand can do that
	halyavin 2013/11/18 09:50:47 instriuction->instruction instriuction->instruction khim 2013/11/18 12:10:21 Done. Show quoted text On 2013/11/18 09:50:47, halyavin wrote: > instriuction->instruction Done.
	724 if writes_to == 'reg' and register_write == 'sandbox':

	725 replacement.append(output)

	726 else:

	727 replacement.append(None)

	728 if InstructionIsDangerous(input, output, register_write, writes_to,

	729 memory_accessed, base_text, index_text):

	730 continue

	731 replacements.append(tuple(replacement))

	732 ModRMMemoryReplacements.replacements[output_key] = tuple(replacements)

	733 return ModRMMemoryReplacements.replacements[output_key]

	734 ModRMMemoryReplacements.replacements = {}

	735

	736

	737 def PrepareCompressors():

	738 global compressors

	739 global main_compressors

	740 global register_compressors

	741 global memory_compressors

	742

	743 # "Larger" compressors should be tried first, then "smaller" ones.

	744 main_compressors = []

	745 register_compressors = []

	746 memory_compressors = []

	747 extra_compressors = []

	748

	749 # Map from "REX bit off" group of registers to "REX bit on" group of registers

	750 r8 = {

	751 'al': 'r8b',

	752 'ax': 'r8w',

	753 'eax': 'r8d',

	754 'rax': 'r8',

	755 'mm0': 'mmalt',

	756 'xmm0': 'xmm8',

	757 'ymm0': 'ymm8'

	758 }

	759

	760 if options.bitness == 32:

	761 register_kinds = ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')

	762 register_kind_pairs = (

	763 ( 'al', 'al'),

	764 ( 'ax', 'al'),

	765 ( 'ax', 'ax'),

	766 ( 'eax', 'al'),

	767 ( 'eax', 'ax'),

	768 ( 'eax', 'eax'),

	769 ( 'eax', 'mm0'),

	770 ( 'mm0', 'eax'),

	771 ( 'eax', 'xmm0'),

	772 ('xmm0', 'eax'),

	773 ( 'mm0', 'mm0'),

	774 ( 'mm0', 'xmm0'),

	775 ('xmm0', 'mm0'),

	776 ('xmm0', 'xmm0'),

	777 ('xmm0', 'ymm0'),

	778 ('ymm0', 'xmm0'),

	779 ('ymm0', 'ymm0')

	780 )

	781 else:

	782 register_kinds = ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0',

	783 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8')

	784 register_kind_pairs = (

	785 ( 'al', 'al'),

	786 ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),

	787 ( 'ax', 'al'),

	788 ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),

	789 ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),

	790 ( 'eax', 'al'),

	791 ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),

	792 ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),

	793 ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),

	794 ( 'rax', 'al'),

	795 ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),

	796 ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'),

	797 ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'),

	798 ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),

	799 ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'),

	800 ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'),

	801 ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'),

	802 ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'),

	803 ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),

	804 ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),

	805 ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),

	806 ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),

	807 ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'),

	808 ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'),

	809 ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'),

	810 ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),

	811 ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),

	812 ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),

	813 ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')

	814 )

	815

	816 # Largest compressors: both reg and rm fields are used

	817 for reg, rm in register_kind_pairs:

	818 start_reg = REGISTERS[reg][0]

	819 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	820 start_rm = REGISTERS[rm][0]

	821 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

	822 instruction_kinds = [

	823 # Normal instructions with two operands (rm to reg)

	824 ({'writes_to':'reg'}, '', ' # rm to reg', ''),

	825 # Normal instructions with two operands (reg to rm)

	826 ({'writes_to':'rm'}, '', ' # reg to rm', '')

	827 ]

	828 # Lea in 64 bit mode is truly unique instruction for now

	829 if options.bitness == 64 and reg in ('eax', 'r8d', 'rax', 'r8'):

	830 instruction_kinds = [

	831 ({'writes_to':'reg', 'memory_accessed':False,

	832 'register_write':'sandbox' if reg in ('eax', 'r8d') else 'protect'},

	833 ' # lea', ' # rm to reg; lea', ' # lea')] + instruction_kinds

	834 # There are few more forms in 64 bit case (rm to reg)

	835 if options.bitness == 64 and reg in ('eax', 'r8d'):

	836 # Zero-extending version.

	837 instruction_kinds.append(

	838 ({'writes_to':'reg', 'register_write':'sandbox'},

	839 '', ' # rm to reg', ''))

	840 # More forms in 64 bit case (reg to rm)

	841 if options.bitness == 64 and rm in ('eax', 'r8d'):

	842 # Zero-extending version.

	843 instruction_kinds.append(

	844 ({'writes_to':'rm', 'register_write':'sandbox'},

	845 '', ' # reg to rm', ''))

	846 # Zero-extending xchg/xadd

	847 instruction_kinds.append(

	848 ({'writes_to':'both', 'register_write':'sandbox'},

	849 ' # write to both',

	850 ' # reg to rm; write to both',

	851 ' # write to both'))

	852 # Still more forms for 64 bit case (rm to reg).

	853 if options.bitness == 64 and reg in ('al', 'spl', 'ax', 'eax', 'rax',

	854 'r8b', 'r8w', 'r8d', 'r8'):

	855 # Dangerous instructions (rm to reg)

	856 instruction_kinds.append(

	857 ({'writes_to':'reg', 'register_write':'protect'},

	858 '', ' # rm to reg', ''))

	859 # Still more forms for 64 bit case (reg to rm)

	860 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

	861 'r8b', 'r8w', 'r8d', 'r8'):

	862 # Dangerous instructions (reg to rm)

	863 instruction_kinds.append(

	864 ({'writes_to':'rm', 'register_write':'protect'},

	865 '', ' # reg to rm', ''))

	866 # Dangerous xchg/xadd

	867 instruction_kinds.append(

	868 ({'writes_to':'both', 'register_write':'protect'},

	869 ' # write to both',

	870 ' # reg to rm; write to both',

	871 ' # write to both'))

	872 # 3DNow! instructions

	873 instruction_kinds.append(

	874 ({'writes_to':'reg', '3dnow':'yes'}, '', ' # rm to reg', ''))

	875 for args, notes, notes_register, notes_memory in instruction_kinds:

	876 regex = '(?: 00)*'

	877 # Additional byte is opcode extension with 3DNow! instructions.

	878 if '3dnow' in args:

	879 regex = ' [0-9a-fA-F][0-9a-fA-F]'

	880 args.pop('3dnow')

	881 regex += ' (?:lock )?\\w* (?:\\$0x0,\|\\$0x0,\\$0x0,\|%cl,\|%xmm0,)?'

	882 # We only need to process ModR/M+SIB '04 04' or '04 07' here

	883 if options.bitness == 32:

	884 regex_mem = '\$%esp,%eax,1\$'

	885 else:

	886 regex_mem = '\$(?:%rsp\|%r15),(?:%rax\|%r8),1\$'

	887 output = None

	888 output_note = None

	889 if args['writes_to'] == 'reg':

	890 regex += '(%' + REGISTERS[rm][0] + '\|' + regex_mem + ')'

	891 regex += ',(%' + REGISTERS[reg][0] + ')'

	892 if 'register_write' in args and args['register_write'] == 'sandbox':

	893 assert reg in ('eax', 'r8d')

	894 output = '%' + reg + '\|None'

	895 output_note = '[%eax..%edi]' if reg == 'eax' else '[%r8d..%r14d]'

	896 subst = (

	897 'XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),

	898 '[%{}..%{}]'.format(start_reg, end_reg), notes)

	899 subst_register = (

	900 'XX', '[%{}..%{}]'.format(start_rm, end_rm),

	901 '[%{}..%{}]'.format(start_reg, end_reg), notes_register)

	902 subst_memory = (

	903 'XX', '[memory]',

	904 '[%{}..%{}]'.format(start_reg, end_reg), notes_memory)

	905 else:

	906 regex += '(%' + REGISTERS[reg][0] + ')'

	907 regex += ',(%' + REGISTERS[rm][0] + '\|' + regex_mem + ')'

	908 if 'register_write'in args and args['register_write'] == 'sandbox':

	909 assert rm in ('eax', 'r8d')

	910 output = '%' + rm + '\|None'

	911 output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	912 subst = (

	913 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	914 '[%{}..%{} or memory]'.format(start_rm, end_rm), notes)

	915 subst_register = (

	916 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	917 '[%{}..%{}]'.format(start_rm, end_rm), notes_register)

	918 subst_memory = (

	919 'XX', '[%{}..%{}]'.format(start_reg, end_reg),

	920 '[memory]', notes_memory)

	921 regex += '.*'

	922 if options.bitness == 64:

	923 regex += '; input_rr=(%eax\|%r8d\|any_nonspecial)'

	924 regex += '; output_rr=({})'.format(output)

	925 if 'memory_accessed' in args:

	926 input_note = 'any_nonspecial'

	927 input_note_r8 = 'any_nonspecial'

	928 else:

	929 input_note = '[%eax..%edi]'

	930 input_note_r8 = '[%r8d..%r15d]'

	931 subst_r8 = subst[0:-1] + (input_note_r8, output_note) + subst[-1:]

	932 subst = subst[0:-1] + (input_note, output_note) + subst[-1:]

	933 subst_memory_r8 = subst_memory[0:-1] + (

	934 input_note_r8, output_note) + subst_memory[-1:]

	935 subst_memory = subst_memory[0:-1] + (

	936 input_note, output_note) + subst_memory[-1:]

	937 subst_register = subst_register[0:-1] + (

	938 'any_nonspecial', output_note) + subst_register[-1:]

	939 regex += '()'

	940 base_r8 = rm in r8.values()

	941 memory_replacement = ModRMMemoryReplacements(

	942 reg=reg, base_r8=base_r8, **args)

	943 memory_compressors.append(Compressor(

	944 '.*?(04 0[47])' + regex, subst_memory, memory_replacement))

	945 if options.bitness == 64:

	946 memory_replacement_r8 = ModRMMemoryReplacements(

	947 reg=reg, base_r8=base_r8, index_r8=True, **args)

	948 memory_compressors.append(Compressor(

	949 '.*?(04 0[47])' + regex, subst_memory_r8, memory_replacement_r8))

	950 # Instructions with no memory access are instructions which are doing

	951 # something with memory address (e.g. lea) and as such they don't have

	952 # non-memory forms.

	953 if not 'memory_accessed' in args:

	954 register_replacement = ModRMRegisterReplacements(rm=rm, reg=reg, **args)

	955 register_compressors.append(Compressor(

	956 '.*?(c0)' + regex, subst_register, register_replacement))

	957 main_replacement = register_replacement + memory_replacement

	958 main_compressors.append(Compressor(

	959 '.*?(04 0[47])' + regex, subst, main_replacement))

	960 if options.bitness == 64:

	961 main_replacement_r8 = register_replacement + memory_replacement_r8

	962 main_compressors.append(Compressor(

	963 '.*?(04 0[47])' + regex, subst_r8, main_replacement_r8))

	964

	965 # Smaller compressors: only rm field is used.

	966 for rm in register_kinds:

	967 start_rm = REGISTERS[rm][0]

	968 end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

	969 for opcode_bits in xrange(8):

	970 XX_byte_mark = 'XX/' + str(opcode_bits)

	971 instruction_kinds = [

	972 # The most basic form

	973 ({}, '', '', '')

	974 ]

	975 if options.bitness == 64:

	976 # No memory access (e.g. prefetch)

	977 instruction_kinds = [

	978 ({'memory_accessed':False}, '', '', '')] + instruction_kinds

	979 # More forms in 64 bit case.

	980 if options.bitness == 64 and rm in ('eax', 'r8d'):

	981 # Zero-extending version.

	982 instruction_kinds.append(

	983 ({'register_write':'sandbox'}, '', '', ''))

	984 # Still more forms for 64 bit case (reg to rm).

	985 if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

	986 'r8b', 'r8w', 'r8d', 'r8'):

	987 # Dangerous instructions.

	988 instruction_kinds.append(

	989 ({'register_write':'protect'}, '', '', ''))

	990 for args, notes, notes_register, notes_memory in instruction_kinds:

	991 subst = (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm),

	992 notes)

	993 subst_register = (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm),

	994 notes_register)

	995 subst_memory = (XX_byte_mark, '[memory]',

	996 notes_memory)

	997 regex = ('(?: 00)* (?:lock )?\\w* (?:\\$0x0,\|%cl,)?'

	998 '(%' + REGISTERS[rm][0] + '\|' + regex_mem + ').*')

	999 output = None

	1000 output_note = None

	1001 if options.bitness == 64:

	1002 if 'register_write' in args and args['register_write'] == 'sandbox':

	1003 assert rm in ('eax', 'r8d')

	1004 output = '%' + rm + '\|None'

	1005 output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	1006 regex += '; input_rr=(%eax\|%r8d\|any_nonspecial)'

	1007 regex += '; output_rr=({})'.format(output)

	1008 if 'memory_accessed' in args:

	1009 input_note = 'any_nonspecial'

	1010 input_note_r8 = 'any_nonspecial'

	1011 else:

	1012 input_note = '[%eax..%edi]'

	1013 input_note_r8 = '[%r8d..%r15d]'

	1014 subst_r8 = subst[0:-1] + (input_note_r8, output_note) + subst[-1:]

	1015 subst = subst[0:-1] + (input_note, output_note) + subst[-1:]

	1016 subst_memory_r8 = subst_memory[0:-1] + (

	1017 input_note_r8, output_note) + subst_memory[-1:]

	1018 subst_memory = subst_memory[0:-1] + (

	1019 input_note, output_note) + subst_memory[-1:]

	1020 subst_register = subst_register[0:-1] + (

	1021 'any_nonspecial', output_note) + subst_register[-1:]

	1022 regex += '()'

	1023 base_r8 = rm in r8.values()

	1024 memory_replacement = ModRMMemoryReplacements(

	1025 reg=None, base_r8=base_r8, opcode_bits=opcode_bits, **args)

	1026 memory_compressors.append(Compressor(

	1027 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	1028 subst_memory, memory_replacement))

	1029 if options.bitness == 64:

	1030 memory_replacement_r8 = ModRMMemoryReplacements(

	1031 reg=None, base_r8=base_r8, index_r8=True, opcode_bits=opcode_bits,

	1032 **args)

	1033 memory_compressors.append(Compressor(

	1034 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	1035 subst_memory_r8, memory_replacement_r8))

	1036 # Instructions with no memory access are instructions which are doing

	1037 # something with memory address (e.g. prefetch) and as such they don't

	1038 # have non-memory forms.

	1039 if not 'memory_accessed' in args:

	1040 register_replacement = ModRMRegisterReplacements(

	1041 reg=None, rm=rm, opcode_bits=opcode_bits, **args)

	1042 register_compressors.append(Compressor(

	1043 '.?({:02x})'.format(0xc0 + opcode_bits 8) + regex,

	1044 subst_register, register_replacement))

	1045 main_replacement = register_replacement + memory_replacement

	1046 main_compressors.append(Compressor(

	1047 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	1048 subst, main_replacement))

	1049 if options.bitness == 64:

	1050 main_replacement_r8 = register_replacement + memory_replacement_r8

	1051 main_compressors.append(Compressor(

	1052 '.?({:02x} 0[47])'.format(0x04 + opcode_bits 8) + regex,

	1053 subst_r8, main_replacement_r8))

	1054

	1055 # Even smaller compressors: only low 3 bits of opcode are used.

	1056 for reg in register_kinds + ('st(0)',):

	1057 start_reg = REGISTERS[reg][0]

	1058 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1059 for opcode in xrange(8):

	1060 for text1, text2, nibble in (

	1061 ('[0..7]', '[8..f]', xrange(8)),

	1062 ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)),

	1063 ('[0..6]', '[8..e]', xrange(7))

	1064 ):

	1065 # Note that we use 2nd line here to avoid ambiguity when opcode is 0x00

	1066 extra_compressors.append(Compressor(

	1067 '.?[0-9a-fA-F](1)(?: 00)'

	1068 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1069 '(%(?:' + REGISTERS_RE[reg][1] + ')).*()',

	1070 (text1, '[%{}..%{}]'.format(start_reg, end_reg), ''),

	1071 tuple(('{:x}'.format(n), '%' + REGISTERS[reg][n])

	1072 for n in nibble)))

	1073 extra_compressors.append(Compressor(

	1074 '.?[0-9a-fA-F](8)(?: 00)'

	1075 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1076 '(%(?:' + REGISTERS_RE[reg][0] + ')).*()',

	1077 (text2, '[%{}..%{}]'.format(start_reg, end_reg), ''),

	1078 tuple(('{:x}'.format(n + 8), '%' + REGISTERS[reg][n])

	1079 for n in nibble)))

	1080 # Another version for 64 bit case

	1081 if options.bitness == 64 and reg in ('eax', 'r8d'):

	1082 extra_compressors.append(Compressor(

	1083 '.?[0-9a-fA-F](1)(?: 00)'

	1084 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1085 '(%(?:' + REGISTERS_RE[reg][1] + ')).*'

	1086 'output_rr=(%(?:'+ REGISTERS_RE[reg][1] + ')).*()',

	1087 tuple([text1] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +

	1088 ['']),

	1089 tuple(['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2

	1090 for n in nibble)))

	1091 extra_compressors.append(Compressor(

	1092 '.?[0-9a-fA-F](8)(?: 00)'

	1093 ' \\w* (?:\\$0x0,\|%ax,\|%st,)?'

	1094 '(%(?:' + REGISTERS_RE[reg][0] + ')).*'

	1095 'output_rr=(%(?:'+ REGISTERS_RE[reg][0] + ')).*()',

	1096 tuple([text2] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 +

	1097 ['']),

	1098 tuple(['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2

	1099 for n in nibble)))

	1100 compressors = (main_compressors + memory_compressors + register_compressors +

	1101 extra_compressors)

	1102

	1103 # Special compressors: will handle some cosmetic issues.

	1104 #

	1105 # SETxx ignores reg field and thus are described as many separate instructions

	1106 compressors.append(Compressor(

	1107 '.0f 9[0-9a-fA-F] XX(/[0-7]) set.()', ('', ''),

	1108 [('/' + str(i), ) for i in range(8)]))

	1109 # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual

	1110 if options.bitness == 32:

	1111 compressors.append(Compressor(

	1112 '.(XX/1) bswap.ax.*()', ('c[8..f]', ''), [('XX/1', )]))

	1113 else:

	1114 compressors.append(Compressor(

	1115 '.(XX/1) bswap.ax.*()', ('c[89abef]', ''), [('XX/1', )]))

	1116 compressors.append(Compressor(

	1117 '.(XX/1) bswap.r8.*()', ('c[8..e]', ''), [('XX/1', )]))

	1118 # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG

	1119 if options.bitness == 64:

	1120 compressors.append(Compressor(

	1121 '.* (?:cmpxchg\|xadd\|xchg).%al\\.\\.%bh[^#]()$',

	1122 (' # write to both', ), ((), )))

	1123 # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all

	1124 # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.

	1125 # Without this rule these ands comprise 2/3 of the whole output!

	1126 if options.bitness == 32:

	1127 compressors.append(Compressor(

	1128 '.*83 (e0 01 and \\$0x1,%eax)()',

	1129 ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', ' # special and'),

	1130 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]), )

	1131 for i in range(0x01, 0x100) for r in range(8)] +

	1132 [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', )]))

	1133 else:

	1134 for reg in ('eax', 'r8d'):

	1135 start_reg = REGISTERS[reg][0]

	1136 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1137 for index_reg in ('eax', 'r8d'):

	1138 start_index = REGISTERS[index_reg][0]

	1139 end_index = REGISTERS[index_reg][-1]

	1140 compressors.append(Compressor(

	1141 '.83 (e0 01 and \\$0x1,%' + reg + ').'

	1142 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()',

	1143 ('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1144 end_reg), '[%{}..%{}]'.format(start_index, end_index),

	1145 '[%{}..%{}]'.format(start_reg, end_reg),

	1146 ' # special and'),

	1147 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]),

	1148 'any_nonspecial', '%' + REGISTERS[reg][r])

	1149 for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] +

	1150 [('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1151 end_reg), '[%{}..%{}]'.format(start_index, end_index),

	1152 '[%{}..%{}]'.format(start_reg, end_reg))]))

	1153

	1154 # "and $e0" and similar are used to align %rsp. All negative values are

	1155 # accepted by validator and there are 127 of these.

	1156 # Consolidate them into one line.

	1157 if options.bitness == 64:

	1158 compressors.append(Compressor(

	1159 '.(?:81\|83) (?:e4\|e5) (80) (?:00 00 00 \|) and \\$0x(80),%r[bs]p.()',

	1160 ('[80..ff]', '[80..ff]', ' # alignment and'),

	1161 [('{:02x}'.format(i), '{:02x}'.format(i)) for i in range(0x80, 0x100)]))

	1162

	1163 # Merge memory and non-memory access

	1164 if options.bitness == 32:

	1165 letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', ''))

	1166 else:

	1167 letters_and_registers = (

	1168 ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'),

	1169 ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'),

	1170 ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'),

	1171 ('q', 'rax', 'eax'), ('q', 'r8', 'r8d')

	1172 )

	1173 for letter, reg, out_reg in letters_and_registers:

	1174 start_reg = REGISTERS[reg][0]

	1175 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1176 all_regs = '[%{}..%{}]'.format(start_reg, end_reg)

	1177 regs_mark = '[%{}..%{} or memory]'.format(start_reg, end_reg)

	1178 if options.bitness == 64:

	1179 start_out = REGISTERS[out_reg][0]

	1180 end_out = REGISTERS[out_reg][-1 if out_reg[0:2] != 'r8' else -2]

	1181 out_regs = '[%{}..%{}]'.format(start_out, end_out)

	1182 for notes in ('', ' # rm to reg', ' # reg to rm'):

	1183 compressors.append(Compressor(

	1184 '.* \\w(' + letter + ') .(\\[memory]).*()()',

	1185 ('[{}]?'.format(letter), regs_mark, '', ''),

	1186 ((letter, '[memory]', ''), ('', all_regs, notes))))

	1187 if options.bitness == 64:

	1188 for index_reg in ('eax', 'r8d'):

	1189 start_index = REGISTERS[index_reg][0]

	1190 end_index = REGISTERS[index_reg][-1]

	1191 index_regs = '[%{}..%{}]'.format(start_index, end_index)

	1192 for output_rrs in ((None, out_regs), (out_regs, None), (None, None)):

	1193 compressors.append(Compressor(

	1194 '.* \\w(' + letter + ') .(\\[memory]).*; '

	1195 'input_rr=(\\[%[a-z0-9]..%[a-z0-9]\\]); '

	1196 'output_rr=(\\[%[a-z0-9]..%[a-z0-9]\\]\|None)()()',

	1197 ('[{}]?'.format(letter), regs_mark, index_regs,

	1198 output_rrs[0] if output_rrs[0] is not None else output_rrs[1],

	1199 '', ''),

	1200 ((letter, '[memory]', index_regs, output_rrs[0], ''),

	1201 ('', all_regs, 'any_nonspecial', output_rrs[1], notes))))

	1202

	1203 # REX compressors

	1204 if options.bitness == 64:

	1205 # First pretty complex set of compressors to combine versions of REX with

	1206 # three lowest bits in different states.

	1207 register_kind_pairs = (

	1208 ( None, None),

	1209 ( 'al', 'al'), ( 'al', None), (None, 'al'),

	1210 ( 'ax', 'al'), ( 'al', 'ax'),

	1211 ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'),

	1212 ( 'eax', 'al'), ( 'al', 'eax'),

	1213 ( 'eax', 'ax'), ( 'ax', 'eax'),

	1214 ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'),

	1215 ( 'rax', 'al'), ( 'al', 'rax'),

	1216 ( 'rax', 'ax'), ( 'ax', 'rax'),

	1217 ( 'rax', 'eax'), ( 'eax', 'rax'),

	1218 ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'),

	1219 ( 'eax', 'mm0'), ( 'mm0', 'eax'),

	1220 ( 'rax', 'mm0'), ( 'mm0', 'rax'),

	1221 ( 'mm0', 'eax'), ( 'eax', 'mm0'),

	1222 ( 'mm0', 'rax'), ( 'rax', 'mm0'),

	1223 ( 'eax', 'xmm0'),

	1224 ( 'rax', 'xmm0'),

	1225 ('xmm0', 'eax'),

	1226 ('xmm0', 'rax'),

	1227 ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'),

	1228 ( 'mm0', 'xmm0'),

	1229 ('xmm0', 'mm0'),

	1230 ('xmm0', 'xmm0'),

	1231 ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'),

	1232 ('ymm0', 'xmm0'),

	1233 ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'),

	1234 )

	1235 for reg, rm in register_kind_pairs:

	1236 for last_reg, last_rm in ((-1, -1), (-1, -2), (-2, -1), (-2, -2)):

	1237 if reg:

	1238 start_reg = REGISTERS[reg][0]

	1239 start_reg8 = REGISTERS[r8[reg]][0]

	1240 end_reg = REGISTERS[reg][-1]

	1241 end_reg0 = 'dil' if reg == 'al' else end_reg

	1242 end_reg8 = REGISTERS[r8[reg]][last_reg]

	1243 reg_regex = '\\[(%' + start_reg + '\\.\\.%' + end_reg + ')]'

	1244 reg_regex0 = '\\[(%' + start_reg + '\\.\\.%' + end_reg0 + ')]'

	1245 elif last_reg == -2:

	1246 continue

	1247 if rm:

	1248 start_rm = REGISTERS[rm][0]

	1249 start_rm8 = REGISTERS[r8[rm]][0]

	1250 end_rm = REGISTERS[rm][-1]

	1251 end_rm0 = 'dil' if rm == 'al' else end_rm

	1252 end_rm8 = REGISTERS[r8[rm]][last_rm]

	1253 rm_regex = ('\\[(%' + start_rm + '\\.\\.%' + end_rm + ')'

	1254 '(?: or memory)?]')

	1255 rm_regex0 = ('\\[(%' + start_rm + '\\.\\.%' + end_rm0 + ')'

	1256 '(?: or memory)?]')

	1257 elif last_rm == -2:

	1258 continue

	1259 for rexw in (True, False):

	1260 for input_rr in (True, False):

	1261 for output_rr in (True, False) if reg or rm else (None, ):

	1262 for rm_to_reg in (True, False) if reg and rm else (None, ):

	1263 # Legacy prefixes

	1264 regex = '.:(?: 26\| 2e\| 36\| 3e\| 64\| 65\| 66\| 67\| f0\| f2\| f3)'

	1265 # REX

	1266 regex += '( 48).' if rexw else '( 40\|).'

	1267 # Replacement text

	1268 replacement_tuple = (

	1269 ' [REX:48..4f]' if rexw else ' [REX:40..47]?', )

	1270 if reg:

	1271 replacement_regs = '%{}..%{}'.format(start_reg, end_reg8)

	1272 if rm:

	1273 replacement_rms = '%{}..%{}'.format(start_rm, end_rm8)

	1274 # Instruction arguments

	1275 if not reg and not rm:

	1276 pass

	1277 elif not reg and rm:

	1278 if rexw:

	1279 regex += rm_regex0 + '.*'

	1280 else:

	1281 regex += rm_regex + '.*'

	1282 replacement_tuple += (replacement_rms, )

	1283 elif reg and not rm:

	1284 if rexw:

	1285 regex += reg_regex0 + '.*'

	1286 else:

	1287 regex += reg_regex + '.*'

	1288 replacement_tuple += (replacement_regs, )

	1289 elif rm_to_reg:

	1290 if rexw:

	1291 regex += rm_regex0 + ',' + reg_regex0 + '.*'

	1292 else:

	1293 regex += rm_regex + ',' + reg_regex + '.*'

	1294 replacement_tuple += (replacement_rms, replacement_regs)

	1295 else:

	1296 if rexw:

	1297 regex += reg_regex0 + ',' + rm_regex0 + '.*'

	1298 else:

	1299 regex += reg_regex + ',' + rm_regex + '.*'

	1300 replacement_tuple += (replacement_regs, replacement_rms)

	1301 # Input and output restricted registers

	1302 if input_rr:

	1303 regex += 'input_rr=\\[(%eax\\.\\.%edi)].*'

	1304 replacement_tuple += ('%eax..%r15d', )

	1305 if output_rr:

	1306 regex += 'output_rr=\\[(%eax\\.\\.%edi)].*'

	1307 replacement_tuple += ('%eax..%r14d', )

	1308 regex += '()'

	1309 replacement_tuple += ('', )

	1310 # Replacement cases

	1311 replacement_tuples = ()

	1312 for byte in (range(0x48, 0x50)

	1313 if rexw

	1314 else range(0x40, 0x48) + ['']):

	1315 replacement_case = (

	1316 ' {:02x}'.format(byte) if byte else byte, )

	1317 if byte:

	1318 if rm:

	1319 if byte & 0x1:

	1320 replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8)

	1321 else:

	1322 replacement_rms = '%{}..%{}'.format(start_rm, end_rm0)

	1323 if byte & 0x2:

	1324 replacement_index = '%r8d..%r15d'

	1325 else:

	1326 replacement_index = '%eax..%edi'

	1327 if reg:

	1328 if byte & 0x4:

	1329 replacement_regs = '%{}..%{}'.format(start_reg8,

	1330 end_reg8)

	1331 else:

	1332 replacement_regs = '%{}..%{}'.format(start_reg,

	1333 end_reg0)

	1334 else:

	1335 if rm:

	1336 replacement_rms = '%{}..%{}'.format(start_rm, end_rm)

	1337 replacement_index = '%eax..%edi'

	1338 if reg:

	1339 replacement_regs = '%{}..%{}'.format(start_reg, end_reg)

	1340 if not reg and not rm:

	1341 pass

	1342 elif not reg and rm:

	1343 replacement_case += (replacement_rms, )

	1344 if byte:

	1345 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1346 else:

	1347 final_rr = '%eax..%edi'

	1348 elif reg and not rm:

	1349 replacement_case += (replacement_regs, )

	1350 if byte:

	1351 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1352 else:

	1353 final_rr = '%eax..%edi'

	1354 elif rm_to_reg:

	1355 replacement_case += (replacement_rms, replacement_regs)

	1356 if byte:

	1357 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1358 else:

	1359 final_rr = '%eax..%edi'

	1360 else:

	1361 replacement_case += (replacement_regs, replacement_rms)

	1362 if byte:

	1363 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1364 else:

	1365 final_rr = '%eax..%edi'

	1366 if input_rr: replacement_case += (replacement_index, )

	1367 if output_rr: replacement_case += (final_rr, )

	1368 replacement_tuples += (replacement_case, )

	1369 compressors.append(Compressor(

	1370 regex, replacement_tuple, replacement_tuples))

	1371 # This is pretty simple compressor to combine two lines with different REX.W

	1372 # bits (only if they are otherwise identical).

	1373 compressors.append(Compressor(

	1374 '.(\\[REX:40\\.\\.47]\\?).()', ('[REX:40..4f]?', ''),

	1375 (('[REX:40..47]?', ), ('[REX:48..4f]', ))))

	1376

	1377

	1378 def ShowProgress(rule, instruction):

	1379 if rule not in ShowProgress.rules_shown:

	1380 first_print = True

	1381 ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown)

	1382 else:

	1383 first_print = False

	1384 print >> sys.stderr, '-------- Compressed --------'

	1385 print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule]

	1386 print >> sys.stderr, '--------'

	1387 compressor = compressors[rule]

	1388 match = compressor.regex.match(instruction)

	1389 assert match

	1390 format_str = CompressionTemplate(instruction, match, '{{{}}}')

	1391 replacements = sorted(format_str.format(*replacement)

	1392 for replacement in compressor.replacements)

	1393 if len(compressor.replacements) <= 4 or first_print:

	1394 for replacement in replacements:

	1395 print >> sys.stderr, replacement

	1396 else:

	1397 print >> sys.stderr, replacements[0]

	1398 print >> sys.stderr, '...'

	1399 print >> sys.stderr, replacements[-1]

	1400 print >> sys.stderr, '--------'

	1401 print >> sys.stderr, 'Compressed', (

	1402 format_str + '{{{}}}').format(*compressor.subst)

	1403 ShowProgress.rules_shown = {}

	1404

	1405

	1406 def main():

	1407 # We are keeping these global to share state graph and compressors

	1408 # between workers spawned by multiprocess. Passing them every time is slow.

	1409 global options, xml_file

	1410 global dfa

	1411 global worker_validator

	1412 options, xml_file = ParseOptions()

	1413 dfa = dfa_parser.ParseXml(xml_file)

	1414 worker_validator = validator.Validator(

	1415 validator_dll=options.validator_dll,

	1416 decoder_dll=options.decoder_dll)

	1417 PrepareCompressors()

	1418

	1419 assert dfa.initial_state.is_accepting

	1420 assert not dfa.initial_state.any_byte

	1421

	1422 print >> sys.stderr, len(dfa.states), 'states'

	1423

	1424 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)

	1425

	1426 # We can't just write 'num_suffixes[dfa.initial_state]' because

	1427 # initial state is accepting.

	1428 total_instructions = sum(

	1429 num_suffixes[t.to_state]

	1430 for t in dfa.initial_state.forward_transitions.values())

	1431 print >> sys.stderr, total_instructions, 'regular instructions total'

	1432

	1433 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)

	1434 print >> sys.stderr, len(tasks), 'tasks'

	1435

	1436 pool = multiprocessing.Pool()

	1437

	1438 results = pool.imap(Worker, tasks)

	1439

	1440 total = 0

	1441 num_valid = 0

	1442 full_output = set()

	1443 for prefix, count, valid_count, output, trace in results:

	1444 print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))

	1445 total += count

	1446 num_valid += valid_count

	1447 full_output \|= output

	1448 for rule, instruction in trace:

	1449 ShowProgress(rule, instruction)

	1450 for instruction in sorted(Compressed(full_output,

	1451 compressors,

	1452 ShowProgress)):

	1453 print instruction

	1454

	1455 print >> sys.stderr, total, 'instructions were processed'

	1456 print >> sys.stderr, num_valid, 'valid instructions'

	1457

	1458

	1459 if __name__ == '__main__':

	1460 main()

OLD	NEW

« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »