src/trusted/validator_ragel/compress_regular_instructions.py - Issue 49183002: Regular instructions golden file test.

Side by Side Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """

	6 Traverse the validator's DFA, collect all "normal" instructions and then

	7 compress output. Note: "anybyte fields" (immediates and displacements)

	8 are always filled with zeros. Otherwise processing of sextillions (sic!)

	9 of possibilities will take too long.

	10

	11 Each rule is applied only when all variants are accepted by validator.

	12 The following compression rules are present:

	13

	14 1. Compress ModR/M (+SIB & displacement).

	15 Instruction: 00 00 add %al,(%rax)

	16 ...

	17 Instruction: 00 ff add %bh,%bh

	18 becomes

	19 Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]

	20

	21 1a. Compress ModR/M (+SIB & displacement) memory-only.

	22 Instruction: f0 01 00 lock add %eax,(%eax)

	23 ...

	24 Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)

	25 becomes

	26 Instruction: f0 01 XX lock add [%eax..edi],[memory]

	27

	28 1b. Compress ModR/M register only.

	29 Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax

	30 ...

	31 Instruction: 66 0f 50 ff movmskpd %xmm7,%edi

	32 becomes

	33 Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]

	34

	35 2. Compress ModR/M (+SIB & displacement) with opcode extension.

	36 Instruction: 0f 90 00 seto (%eax)

	37 ...

	38 Instruction: 0f 90 c7 seto %bh

	39 becomes

	40 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	41

	42 2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.

	43 Instruction: f0 ff 00 lock incl (%eax)

	44 ...

	45 Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)

	46 becomes

	47 Instruction: f0 ff XX/1 lock decl [memory]

	48

	49 2b. Compress ModR/M register-only with opcode extension.

	50 Instruction: 0f 71 d0 00 psrlw $0x0,%mm0

	51 ...

	52 Instruction: 0f 71 d7 00 psrlw $0x0,%mm7

	53 becomes

	54 Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7]

	55

	56 3. Compress register-in-opcode.

	57 Instruction: d9 c0 fld %st(0)

	58 ...

	59 Instruction: d9 c7 fld %st(7)

	60 becomes

	61 Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]

	62

	63 Only applies if all possible register accesses are accepted by validator.

	64

	65 4. Special compressor for "set" instruction.

	66 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	67 ...

	68 Instruction: 0f 90 XX/7 seto [%al..%bh or memory]

	69 becomes

	70 Instruction: 0f 90 XX seto [%al..%bh or memory]

	71 """

	72

	73 import itertools

	74 import multiprocessing

	75 import optparse

	76 import os

	77 import re

	78 import subprocess

	79 import sys

	80 import tempfile

	81 import traceback

	82

	83 import dfa_parser

	84 import dfa_traversal

	85 import validator

	86

	87

	88 # Register names in 'natual' order (as defined by IA32/x86-64 ABI)

	89 #

	90 # X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field

	91 # in opcode, ModR/M, and/or SIB bytes to encode them.

	92 #

	93 # In most cases there are 16 registers of a given kind and two such groups,

	94 # but there are couple of exceptions:

	95 # 1. There are 20 8-bit registers and three groups (two of them overlap)

	96 # 2. There are eight X87 and MMX registers thus two groups are identical

	97 #

	98 # We use typical register from a group to name the whole group. Most groups

	99 # are named by the first register but 'spl' group is named by the fifth register

	100 # because it's first four registers are the same as 'al' group.

	101 # We use mnemonic name 'mmalt' to represent the "evil mirror" of the 'mm0'

	102 # group: it the same as 'mm0', but implies use of the appropriate REX bit.

	103 REGISTERS = {

	104 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],

	105 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],

	106 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],

	107 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],

	108 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],

	109 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],

	110 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],

	111 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],

	112 'r8': [ 'r{}'.format(N) for N in range(8,16) ],

	113 'mm0': [ 'mm{}'.format(N) for N in range(8) ],

	114 'mmalt': [ 'mm{}'.format(N) for N in range(8) ],

	115 'st(0)': [ 'st({})'.format(N) for N in range(8) ],

	116 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],

	117 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],

	118 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],

	119 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]

	120 }

	121

	122

	123 NOP = 0x90

	124

	125

	126 def PadToBundleSize(bytes):

	127 assert len(bytes) <= validator.BUNDLE_SIZE

	128 return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))

	129

	130

	131 # In x86-64 mode we have so-called 'restricted register' which is used to

	132 # tie two groups together. Some instructions require particular value to

	133 # be stored in this variable, while some accept any non-special restricted

	134 # register (%ebp and %esp are special because they can only be accepted by

	135 # a few 'special' instructions).

	136 #

	137 # You can find more details in the "NaCl SFI model on x86-64 systems" manual.

	138 #

	139 # We try to feed all possible 'restricted registers' into validator and then

	140 # classify the instruction using this map. If set of acceptable 'restricted

	141 # registers' is not here, then it's an error in validator.

	142 ACCEPTABLE_X86_64_INPUTS = {

	143 0x00001: 'input_rr=%eax',

	144 0x00002: 'input_rr=%ecx',

	145 0x00004: 'input_rr=%edx',

	146 0x00008: 'input_rr=%ebx',

	147 0x00010: 'input_rr=%esp',

	148 0x00020: 'input_rr=%ebp',

	149 0x00040: 'input_rr=%esi',

	150 0x00080: 'input_rr=%edi',

	151 0x00100: 'input_rr=%r8d',

	152 0x00200: 'input_rr=%r9d',

	153 0x00400: 'input_rr=%r10d',

	154 0x00800: 'input_rr=%r11d',

	155 0x01000: 'input_rr=%r12d',

	156 0x02000: 'input_rr=%r13d',

	157 0x04000: 'input_rr=%r14d',

	158 0x08000: 'input_rr=%r15d',

	159 0x1ffcf: 'input_rr=any_nonspecial'

	160 }

	161

	162 # Any instruction must produce either None or one of fifteen registers as an

	163 # output 'restricted register' value. 'r15d' is NOT acceptable as an output.

	164 ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple(

	165 '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1])

	166

	167

	168 def ValidateInstruction(instruction, validator_inst):

	169 bundle = ''.join(map(chr, PadToBundleSize(instruction)))

	170 if options.bitness == 32:

	171 result = validator_inst.ValidateChunk(bundle, bitness=32)

	172 return result, []

	173 else:

	174 valid_inputs = 0

	175 known_final_rr = None

	176 output_rr = None

	177 # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array

	178 # above.

	179 for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]):

	180 valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(

	181 bundle, len(instruction), initial_rr)

	182 if valid:

	183 # final_rr should not depend on input_rr

	184 assert valid_inputs == 0 or known_final_rr == final_rr

	185 valid_inputs \|= 1 << bit

	186 known_final_rr = final_rr

	187 # If nothing is accepted then instruction is not valid. Easy and simple.

	188 if valid_inputs == 0: return False, []

	189 # If returned value in unacceptable we'll get IndexError here and this

	190 # test will fail

	191 if known_final_rr is not None:

	192 output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr]

	193 # If collected valid_inputs are unacceptable we'll get KeyError here and

	194 # this test will fail

	195 return True, [ACCEPTABLE_X86_64_INPUTS[valid_inputs],

	196 'output_rr={}'.format(output_rr)]

	197

	198

	199 class WorkerState(object):

	200 def __init__(self, prefix, validator):

	201 self.total_instructions = 0

	202 self.num_valid = 0

	203 self.validator = validator

	204 self.output = set()

	205 self.trace = []

	206

	207

	208 def ReceiveInstruction(self, bytes):

	209 self.total_instructions += 1

	210 result, notes = ValidateInstruction(bytes, self.validator)

	211 if result:

	212 self.num_valid += 1

	213 dis = self.validator.DisassembleChunk(

	214 ''.join(map(chr, bytes)),

	215 bitness=options.bitness)

	216 for line_nr in xrange(len(dis)):

	217 dis[line_nr] = str(dis[line_nr])

	218 assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '

	219 assert dis[line_nr][-1:] == ')'

	220 dis[line_nr] = dis[line_nr][17:-1]

	221 # If %rip is involved then comment will be different depending on the

	222 # instruction length. Eliminate it.

	223 if '(%rip)' in dis[0]:

	224 dis[0] = re.sub(' # 0x[ ][0-9a-fA-F]', '', dis[0])

	225 # Zero displacements are represented as 0x0 for all instructions except

	226 # jumps where they disassembled as non-zero due to %eip/%rip-relative

	227 # addressing. We replace this displacement with %eip/%rip to simplify

	228 # compression.

	229 if ' 0x' in dis[0] and ' 0x0' not in dis[0]:

	230 for bytes in xrange(1, 16):

	231 dis[0] = re.sub(

	232 '(' + '(?:[0-9a-fA-F][0-9a-fA-F] ){' + str(bytes) + '} .* )' +

	233 hex(bytes) + '(.*)',

	234 r'\1%eip\2' if options.bitness == 32 else r'\1%rip\2',

	235 dis[0]);

	236 dis[0] = 'Instruction: ' + dis[0]

	237 dis += notes

	238 self.output.add('; '.join(dis))

	239

	240

	241 def RecordTrace(self, compressor_nr, instruction):

	242 self.trace.append((compressor_nr, instruction))

	243

	244

	245 # Compressor has three slots: regex (which picks apart given instruction),

	246 # subst (which is used to create the compressed version) and replacements (which

	247 # are used to generate set of instructions from a given code).

	248 #

	249 # Example compressor:

	250 # regex = r'.?[0-9a-fA-F]([0-7]) \w (%e(?:[abcd]x\|[sb]p\|[sd]i)).*()'

	251 # subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode')

	252 # replacements = ((0, '%eax', ''), (1, '%ecx', ''),

	253 # (2, '%edx', ''), (3, '%ebx', '')

	254 # (4, '%esp', ''), (5, '%ebp', ''),

	255 # (6, '%esi', ''), (7, '%edi', ''))

	256 #

	257 # When faced with instruction '40 inc %eax' it will capture the following

	258 # pieces of said instruction: '4{0} inc {%eax}{}'.

	259 #

	260 # Then it will replace groups to produce the following 8 instructions:

	261 # '40 inc %eax'

	262 # '41 inc %ecx'

	263 # '42 inc %edx'

	264 # '43 inc %ebx'

	265 # '44 inc %esp'

	266 # '45 inc %ebp'

	267 # '46 inc %esi'

	268 # '47 inc %edi'

	269 #

	270 # If all these instructions can be found in a set of instructions then

	271 # compressor will remove them from said set and will substitute them with

	272 # one "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'.

	273 class Compressor(object):

	274 __slots__ = [

	275 'regex',

	276 'subst',

	277 'replacements'

	278 ]

	279

	280 def __init__(self, regex, subst, replacements):

	281 self.regex = re.compile(regex)

	282 self.subst = subst

	283 self.replacements = replacements

	284

	285

	286 def CompressionTemplate(instruction, match, mark):

	287 """ Replace all match groups with the mark. """

	288 pos = 0

	289 format_str = ''

	290 for group in range(0, len(match.groups())):

	291 format_str += instruction[pos:match.start(group + 1)] + mark

	292 pos = match.end(group + 1)

	293 return format_str + instruction[pos:]

	294

	295

	296 def CompressOneMatch(instructions, instruction, match, compressor):

	297 format_str = CompressionTemplate(instruction, match, '{}')

	298 subset = set()

	299 for replacement in compressor.replacements:

	300 replacement_str = format_str.format(*replacement)

	301 if not replacement_str in instructions:

	302 return (False, instructions)

	303 subset.add(replacement_str)

	304 assert instruction in subset

	305 instructions -= subset

	306 instructions.add(format_str.format(*compressor.subst))

	307 return (True, instructions)

	308

	309

	310 def CompressOneInstruction(instructions, compressors, split, cache):

	311 sorted_instructions = (sorted(i for i in instructions if i > split) +

	312 sorted(i for i in instructions if i < split))

	313 for instruction in sorted_instructions:

	314 if instruction in cache:

	315 compressors_list = cache[instruction]

	316 for compressor_nr, match, compressor in compressors_list:

	317 result, instructions = CompressOneMatch(

	318 instructions, instruction, match, compressor)

	319 if result:

	320 return (instructions, compressor_nr, instruction)

	321 else:

	322 compressors_list = []

	323 for compressor_nr, compressor in enumerate(compressors):

	324 match = compressor.regex.match(instruction)

	325 if match:

	326 compressors_list.append((compressor_nr, match, compressor))

	327 result, instructions = CompressOneMatch(

	328 instructions, instruction, match, compressor)

	329 if result:

	330 return (instructions, compressor_nr, instruction)

	331 cache[instruction] = compressors_list

	332 return (instructions, False, False)

	333

	334

	335 def Compressed(instructions, compressors, show_progress):

	336 split = ''

	337 cache = {}

	338 while True:

	339 instructions, rule, split = CompressOneInstruction(

	340 instructions, compressors, split, cache)

	341 if rule is False: break

	342 show_progress(rule, split)

	343 return instructions

	344

	345

	346 def Worker((prefix, state_index)):

	347 worker_state = WorkerState(prefix, worker_validator)

	348

	349 try:

	350 dfa_traversal.TraverseTree(

	351 dfa.states[state_index],

	352 final_callback=worker_state.ReceiveInstruction,

	353 prefix=prefix,

	354 anyfield=0)

	355 if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions

	356 worker_state.output = Compressed(set(worker_state.output),

	357 compressors,

	358 worker_state.RecordTrace)

	359 except Exception as e:

	360 traceback.print_exc() # because multiprocessing imap swallows traceback

	361 raise

	362

	363 return (

	364 prefix,

	365 worker_state.total_instructions,

	366 worker_state.num_valid,

	367 worker_state.output,

	368 worker_state.trace)

	369

	370

	371 def ParseOptions():

	372 parser = optparse.OptionParser(usage='%prog [options] xmlfile')

	373

	374 parser.add_option('--bitness',

	375 choices=['32', '64'],

	376 help='The subarchitecture: 32 or 64')

	377 parser.add_option('--validator_dll',

	378 help='Path to librdfa_validator_dll')

	379 parser.add_option('--decoder_dll',

	380 help='Path to librdfa_decoder_dll')

	381

	382 options, args = parser.parse_args()

	383 options.bitness = int(options.bitness)

	384

	385 if len(args) != 1:

	386 parser.error('specify one xml file')

	387

	388 (xml_file, ) = args

	389

	390 return options, xml_file

	391

	392

	393 # Version suitable for use in regular expressions

	394 REGISTERS_RE = REGISTERS.copy()

	395 REGISTERS_RE['st(0)'] = [ r'st${}$'.format(N) for N in range(8) ]

	396

	397 # Index names in 'natual' order (as defined by IA32/x86-64 ABI)

	398 INDEXES = {

	399 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],

	400 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],

	401 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]

	402 }

	403 # Register which can be used as base in 64-bit mode in all their incarnations.

	404 X86_64_BASE_REGISTERS = set([

	405 '%spl', '%bpl', '%r15b',

	406 '%sp', '%bp', '%r15w',

	407 '%esp', '%ebp', '%r15d',

	408 '%rsp', '%rbp', '%r15',

	409 '%rip'

	410 ])

	411

	412

	413 def InstructionIsDangerous(input, output, register_write,

	414 writes_to, memory_accessed=False,

	415 base_text='%riz', index_text='%riz'):

	416 """ Check if instruction with given replacements will be dangerous

	417

	418 Args:

	419 input: input argument

	420 output: output argument

	421 register_write: three-state selector

	422 'sandbox' - instruction can be used to produce "restricted register"

	423 'protect' - instruction can damage output, protect "special registers"

	424 'ignore' - instruction does not affect it's operands (e.g. test) or

	425 is used with non-GP registers (X87, MMX, XMM, etc)

	426 memory_accessed: True if instruction accesses memory

	427 base: base register (if memory is accessed)

	428 index: index register (if memory is accessed)

	429

	430 Returns:

	431 True if instruction should be rejected by validator

	432 """

	433 if memory_accessed:

	434 if base_text not in X86_64_BASE_REGISTERS:

	435 return True

	436 # Surprisingly enough %r15 is considered "valid index register" by our

	437 # validator: it can not be ever produced as "restricted register" which

	438 # means that such instruction can not ever occur in real program (it's

	439 # guaranteed because ACCEPTABLE_X86_64_OUTPUT_REGISTERS excludes it), but

	440 # when instruction is tested in isolation it's allowed.

	441 if index_text in X86_64_BASE_REGISTERS - set(['%r15']):

	442 return True

	443 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	444 return True

	445 if register_write == 'sandbox' and output == '%r15d':

	446 return True

	447 if writes_to == 'both' and input in X86_64_BASE_REGISTERS:

	448 return True

	449 return False

	450

	451

	452 def AppendOperandsReplacement(replacement, rm_text, reg, modrm, writes_to):

	453 """ Appends replacement text to replacement list

	454

	455 Args:

	456 replacement: replacement list

	457 rm_text: replacement for rm field

	458 reg: register kind (or None if reg field is used as opcode extension)

	459 modrm: modrm byte

	460 writes_to: three-state selector

	461 'reg' - instruction uses rm as source, reg as destination

	462 'rm' - instruction uses reg as source, rm as destination

	463 'both' - instruction writes to both reg and rm

	464

	465 Returns:

	466 input: textual representation of input argument

	467 output: textual representation of output argument

	468

	469 Side-effect:

	470 output (if reg is None) or (input, output) tuple (if reg is not None)

	471 are added to replacement list.

	472 """

	473 if reg is None:

	474 assert writes_to == 'rm'

	475 input, output = None, rm_text

	476 replacement.append(output)

	477 else:

	478 reg_field = (modrm >> 3) & 0x07

	479 reg_text = '%' + REGISTERS[reg][reg_field]

	480 if writes_to == 'reg':

	481 input, output = rm_text, reg_text

	482 else: # rm, both

	483 input, output = reg_text, rm_text

	484 replacement.extend([input, output])

	485 return input, output

	486

	487

	488 def ModRMRegisterReplacements(rm, reg=None, writes_to='rm', opcode_bits=0,

	489 register_write='ignore', note=''):

	490 """Creates replacement tuples list for register-to-register instructions

	491

	492 Args:

	493 rm: rm operand kind (see REGISTERS array)

	494 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	495 writes_to: three-state selector

	496 'reg' - instruction uses rm as source, reg as destination

	497 'rm' - instruction uses reg as source, rm as destination

	498 'both' - instruction writes to both reg and rm

	499 opcode_bits: opcode extensions code (used when reg is None)

	500 register_write: three-state selector

	501 'sandbox' - instruction can be used to produce "restricted register"

	502 'protect' - instruction can damage output, protect "special registers"

	503 'ignore' - instruction does not affect it's operands (e.g. test) or

	504 is used with non-GP registers (X87, MMX, XMM, etc)

	505 note: note to include at the end of input instruction

	506 Returns:

	507 List of replacement tuples

	508 """

	509 # Reg field can be used either as reg or as opcode extension, but not both

	510 assert reg is None or opcode_bits == 0

	511

	512 output_key = (options.bitness, reg, rm, writes_to, opcode_bits,

	513 register_write)

	514 if output_key in ModRMRegisterReplacements.replacements:

	515 return ModRMRegisterReplacements.replacements[output_key]

	516

	517 replacements = []

	518

	519 # Two upper bits of ModR/M byte (mod field) must be equal to 11

	520 # This gives us range from 0xc0 to 0xff but we are going from the

	521 # end to make rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	522 if reg is None:

	523 # reg field is used as opcode extension

	524 byte_range = [byte

	525 for byte in range(0xff, 0xbf, -1)

	526 if (byte >> 3) & 0x7 == opcode_bits]

	527 else:

	528 byte_range = range(0xff, 0xbf, -1)

	529

	530 for modrm in byte_range:

	531 rm_field = (modrm & 0x07)

	532 rm_text = '%' + REGISTERS[rm][rm_field]

	533 byte_text = '{:02x}'.format(modrm)

	534 replacement = [byte_text]

	535 input, output = AppendOperandsReplacement(

	536 replacement, rm_text, reg, modrm, writes_to)

	537 if options.bitness == 64:

	538 replacement.append('any_nonspecial') # input_rr

	539 replacement.append(output if register_write == 'sandbox' else None)

	540 if InstructionIsDangerous(input, output, register_write, writes_to):

	541 continue

	542 replacement.append(note)

	543 replacements.append(tuple(replacement))

	544 ModRMRegisterReplacements.replacements[output_key] = tuple(replacements)

	545 return ModRMRegisterReplacements.replacements[output_key]

	546 ModRMRegisterReplacements.replacements = {}

	547

	548

	549 def BaseOnlyMemoryOperand(modrm, base):

	550 """Creates replacement tuples list for register-to-memory instructions

	551 (base only, no SIB)

	552

	553 Args:

	554 modrm: modrm byte

	555 base: register kind for base

	556 Returns:

	557 bytes_text: replacement for "bytes" group

	558 rm_text: textual representation of "rm" argument

	559 base_text: textual representation of "base" register

	560 """

	561 mod_field = (modrm >> 6) & 0x03

	562 rm_field = (modrm & 0x07)

	563 base_text = '%' + REGISTERS[base][rm_field]

	564 # If RM field == %rbp and MOD field is zero then it's absolute address

	565 # in 32-bit mode and %rip-based address in 64-bit mode

	566 if mod_field == 0 and rm_field == validator.REG_RBP:

	567 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	568 rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'

	569 base_text = '%eiz' if options.bitness == 32 else '%rip'

	570 # Memory access with just a base register

	571 elif mod_field == 0:

	572 bytes_text = '{:02x}'.format(modrm)

	573 rm_text = '({})'.format(base_text)

	574 # Memory access with base and 8bit offset

	575 elif mod_field == 1:

	576 bytes_text = '{:02x} 00'.format(modrm)

	577 rm_text = '0x0({})'.format(base_text)

	578 # Memory access with base and 32bit offset

	579 else: # mod_field == 2

	580 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	581 rm_text = '0x0({})'.format(base_text)

	582 return bytes_text, rm_text, base_text

	583

	584

	585 def SIBMemoryOperand(modrm, sib, base, index):

	586 """Creates replacement tuples list for register-to-memory instructions

	587 (base only, no SIB)

	588

	589 Args:

	590 modrm: modrm byte

	591 base: register kind for base

	592 Returns:

	593 bytes_text: replacement for "bytes" group

	594 rm_text: textual representation of "rm" argument

	595 base_text: textual representation of "base" register

	596 index_text: textual representation of "index" register

	597 """

	598 mod_field = (modrm >> 6) & 0x03

	599 scale_field = (sib >> 6) & 0x03

	600 index_field = (sib >> 3) & 0x07

	601 base_field = (sib & 0x07)

	602 index_text = '%' + INDEXES[index][index_field]

	603 base_text = '%' + REGISTERS[base][base_field]

	604 scale_text = str(1 << scale_field)

	605 # If BASE is %rbp and MOD == 0 then index with 32bit offset is used

	606 if mod_field == 0 and base_field == validator.REG_RBP:

	607 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	608 if (options.bitness == 64 and

	609 index_text == '%riz' and

	610 scale_text == '1'):

	611 rm_text = '0x0'

	612 else:

	613 rm_text = '0x0(,{},{})'.format(index_text, scale_text)

	614 # There are no base in this case

	615 base_text = '%eiz' if options.bitness == 32 else '%riz'

	616 # Memory access with base and index (no offset)

	617 elif mod_field == 0:

	618 bytes_text = '{:02x} {:02x}'.format(modrm, sib)

	619 rm_text = '({},{},{})'.format(base_text, index_text, scale_text)

	620 # Memory access with base, index and 8bit offset

	621 elif mod_field == 1:

	622 bytes_text = '{:02x} {:02x} 00'.format(modrm, sib)

	623 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	624 # Memory access with base, index and 32bit offset

	625 elif mod_field == 2:

	626 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	627 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	628 # Pretty-printing of access via %rsp (or %r12)

	629 if (base_field == validator.REG_RSP and

	630 index_text in ('%eiz', '%riz') and

	631 scale_text == '1'):

	632 if mod_field == 0: # no offset

	633 rm_text = '({})'.format(base_text)

	634 else: # 8-bit or 32-bit offset

	635 rm_text = '0x0({})'.format(base_text)

	636 return bytes_text, rm_text, base_text, index_text

	637

	638

	639 def ModRMMemoryReplacements(reg=None, writes_to='rm', opcode_bits=0,

	640 memory_accessed=True, register_write='ignore',

	641 base_r8=False, index_r8=False, note=''):

	642 """Creates replacement tuples list for register-to-memory instructions

	643

	644 Args:

	645 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	646 writes_to: three-state selector

	647 'reg' - instruction uses rm as source, reg as destination

	648 'rm' - instruction uses reg as source, rm as destination

	649 'both' - instruction writes to both reg and rm

	650 opcode_bits: opcode extensions code (used when reg is None)

	651 memory_accessed: True if instruction accesses memory

	652 register_write: three-state selector

	653 'sandbox' - instruction can be used to produce "restricted register"

	654 'protect' - instruction can damage output, protect "special registers"

	655 'ignore' - instruction does not affect it's operands (e.g. test) or

	656 is used with non-GP registers (X87, MMX, XMM, etc)

	657 index_r8: True if REX.X bit in the instruction set to 1

	658 note: note to include at the end of input instruction

	659

	660 Returns:

	661 List of replacement tuples

	662 """

	663 # Reg field can be used either as reg or as opcode extension, but not both

	664 assert reg is None or opcode_bits == 0

	665

	666 output_key = (options.bitness, reg, writes_to, opcode_bits,

	667 base_r8, index_r8, memory_accessed, register_write)

	668 if output_key in ModRMMemoryReplacements.replacements:

	669 return ModRMMemoryReplacements.replacements[output_key]

	670

	671 if options.bitness == 32:

	672 base = 'eax'

	673 index = 'eax'

	674 else:

	675 base = 'r8' if base_r8 else 'rax'

	676 index = 'r8' if index_r8 else 'rax'

	677

	678 replacements = []

	679

	680 # Two upper bits of ModR/M byte (mod field) must be equal to 00, 01, or 10

	681 # This gives us range from 0x00 to 0xbf but we are going from the end to make

	682 # rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	683 if reg is None:

	684 # reg field is used as opcode extension

	685 byte_range = [byte

	686 for byte in range(0xbf, -1, -1)

	687 if (byte >> 3) & 0x7 == opcode_bits]

	688 else:

	689 byte_range = range(0xbf, -1, -1)

	690

	691 for modrm in byte_range:

	692 # If RM field != %rsp then there are no SIB byte

	693 if (modrm & 0x07) != validator.REG_RSP:

	694 bytes_text, rm_text, base_text = BaseOnlyMemoryOperand(modrm, base)

	695 replacement = [bytes_text]

	696 input, output = AppendOperandsReplacement(

	697 replacement, rm_text, reg, modrm, writes_to)

	698 if options.bitness == 64:

	699 replacement.append('any_nonspecial')

	700 # If writes_to is equal to 'reg' then output is register

	701 if writes_to == 'reg' and register_write == 'sandbox':

	702 replacement.append(output)

	703 else:

	704 # Note that instruction like xchg could write to another register:

	705 # it's "input"! But validator currently does not support this case

	706 # thus we are ignoring it here and writing "None" in this case, too.

	707 replacement.append(None)

	708 if InstructionIsDangerous(input, output, register_write, writes_to,

	709 memory_accessed, base_text):

	710 continue

	711 replacement.append(note)

	712 replacements.append(tuple(replacement))

	713 else:

	714 # If RM field == %rsp then we have SIB byte

	715 for sib in xrange(0x100):

	716 bytes_text, rm_text, base_text, index_text = SIBMemoryOperand(

	717 modrm, sib, base, index)

	718 replacement = [bytes_text]

	719 input, output = AppendOperandsReplacement(

	720 replacement, rm_text, reg, modrm, writes_to)

	721 if options.bitness == 64:

	722 if not memory_accessed or index_text == '%riz':

	723 replacement.append('any_nonspecial')

	724 else:

	725 if index_r8:

	726 # Convert %r8 to %r8d, %r9 to %r9d, etc

	727 replacement.append(index_text + 'd')

	728 else:

	729 # Convert %rax to %eax, %rsp to %esp, etc

	730 replacement.append('%e' + index_text[2:])

	731 # If writes_to is equal to 'reg' then output is register

	732 if writes_to == 'reg' and register_write == 'sandbox':

	733 replacement.append(output)

	734 else:

	735 # Note that instruction like xchg could write to another register:

	736 # it's "input"! But validator currently does not support this case

	737 # thus we are ignoring it here and writing "None" in this case, too.

	738 replacement.append(None)

	739 if InstructionIsDangerous(input, output, register_write, writes_to,

	740 memory_accessed, base_text, index_text):

	741 continue

	742 replacement.append(note)

	743 replacements.append(tuple(replacement))

	744 ModRMMemoryReplacements.replacements[output_key] = tuple(replacements)

	745 return ModRMMemoryReplacements.replacements[output_key]

	746 ModRMMemoryReplacements.replacements = {}

	747

	748

	749 # Map from "REX bit off" group of registers to "REX bit on" group of registers

	750 r8 = {

	751 'al': 'r8b',

	752 'ax': 'r8w',

	753 'eax': 'r8d',

	754 'rax': 'r8',

	755 'mm0': 'mmalt',

	756 'xmm0': 'xmm8',

	757 'ymm0': 'ymm8'

	758 }

	759

	760

	761 def RegisterKinds():

	762 """ Return list of register kinds we process with register compressors """

	763

	764 if options.bitness == 32:

	765 return ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')

	766 else:

	767 return ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0',

	768 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8')

	769

	770

	771 def RegisterKindPairs():

	772 """ Return hand-picked pairs which we must consider in compressors """

	773

	774 if options.bitness == 32:

	775 return (

	776 ( 'al', 'al'),

	777 ( 'ax', 'al'),

	778 ( 'al', 'ax'),

	779 ( 'ax', 'ax'),

	780 ( 'eax', 'al'),

	781 ( 'al', 'eax'),

	782 ( 'eax', 'ax'),

	783 ( 'ax', 'eax'),

	784 ( 'eax', 'eax'),

	785 ( 'eax', 'mm0'),

	786 ( 'mm0', 'eax'),

	787 ( 'eax', 'xmm0'),

	788 ('xmm0', 'eax'),

	789 ( 'mm0', 'mm0'),

	790 ( 'mm0', 'xmm0'),

	791 ('xmm0', 'mm0'),

	792 ('xmm0', 'xmm0'),

	793 ('xmm0', 'ymm0'),

	794 ('ymm0', 'xmm0'),

	795 ('ymm0', 'ymm0')

	796 )

	797 else:

	798 return (

	799 ( 'al', 'al'),

	800 ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),

	801 ( 'ax', 'al'),

	802 ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),

	803 ( 'al', 'ax'),

	804 ( 'spl', 'ax'), ( 'spl', 'r8w'), ( 'r8b', 'ax'), ( 'r8b', 'r8w'),

	805 ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),

	806 ( 'eax', 'al'),

	807 ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),

	808 ( 'al', 'eax'),

	809 ( 'spl', 'eax'), ( 'spl', 'r8d'), ( 'r8b', 'eax'), ( 'r8b', 'r8d'),

	810 ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),

	811 ( 'ax', 'eax'), ( 'ax', 'r8d'), ( 'r8w', 'eax'), ( 'r8w', 'r8d'),

	812 ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),

	813 ( 'rax', 'al'),

	814 ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),

	815 ( 'al', 'rax'),

	816 ( 'spl', 'rax'), ( 'spl', 'r8'), ( 'r8b', 'rax'), ( 'r8b', 'r8'),

	817 ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'),

	818 ( 'ax', 'rax'), ( 'ax', 'r8'), ( 'r8w', 'rax'), ( 'r8w', 'r8'),

	819 ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'),

	820 ( 'eax', 'rax'), ( 'eax', 'r8'), ( 'r8d', 'rax'), ( 'r8d', 'r8'),

	821 ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),

	822 ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'),

	823 ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'),

	824 ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'),

	825 ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'),

	826 ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),

	827 ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),

	828 ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),

	829 ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),

	830 ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'),

	831 ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'),

	832 ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'),

	833 ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),

	834 ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),

	835 ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),

	836 ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')

	837 )

	838

	839

	840 def RegisterRegex(register_kind, opcode_bits = 0):

	841 """ Returns modrm regex for the first register """

	842

	843 regex_mod = '^.?({:02x})'.format(0xc0 + opcode_bits 8)

	844 regex_rm = '(%' + REGISTERS[register_kind][0] + ')'

	845 return regex_mod, regex_rm

	846

	847

	848 def MemoryRegex(base_r8, index_r8, opcode_bits = 0):

	849 """ Returns one regex for bytes and one for the textual representation. """

	850

	851 # We only need to process ModR/M+SIB '04 04' or '04 07' here. We only match

	852 # memory operands without offset, so we don't need to add zeros to the end of

	853 # regex_modrm and regex_modrm regular expressions.

	854 #

	855 # We pick version which is "always there" (in both 32bit and 64bit mode) and

	856 # which is not 0x00 (to avoid confusion with offsets and immediates).

	857 if base_r8:

	858 regex_modrm_sib = '^.?({:02x} 07)'.format(0x04 + opcode_bits 8)

	859 else:

	860 regex_modrm_sib = '^.?({:02x} 04)'.format(0x04 + opcode_bits 8)

	861 if options.bitness == 32:

	862 regex_mem = r'($%esp,%eax,1$)'

	863 elif base_r8:

	864 regex_mem = r'($%r15,%r8,1$)' if index_r8 else r'($%r15,%rax,1$)'

	865 else:

	866 regex_mem = r'($%rsp,%r8,1$)' if index_r8 else r'($%rsp,%rax,1$)'

	867 return regex_modrm_sib, regex_mem

	868

	869

	870 def RegToRmCompressors(rm, reg, writes_to, memory_accessed=True,

	871 register_write='ignore', index_r8=False,

	872 rm_compression='register', is_3Dnow=False, notes=''):

	873 """Returns a list of reg <-> rm compressors for a given set of parameters.

	874

	875 Args:

	876 rm: rm operand kind (see REGISTERS array)

	877 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	878 writes_to: three-state selector

	879 'reg' - instruction uses rm as source, reg as destination

	880 'rm' - instruction uses reg as source, rm as destination

	881 'both' - instruction writes to both reg and rm

	882 memory_accessed: True if instruction accesses memory

	883 register_write: three-state selector

	884 'sandbox' - instruction can be used to produce "restricted register"

	885 'protect' - instruction can damage output, protect "special registers"

	886 'ignore' - instruction does not affect it's operands (e.g. test) or

	887 is used with non-GP registers (X87, MMX, XMM, etc)

	888 index_r8: True if REX.X bit in the instruction set to 1

	889 rm_compression: three-state selector

	890 'register' - instruction supports register-only rm parameter

	891 'memory' - instruction supports memory-only rm parameter

	892 'register_or_memory' - instruction supports all kinds of rm parameters

	893 is_3DNow - True if instruction is 3DNow! style (opcode in immidiate)

	894 notes - Notes to add to the description

	895

	896 Returns:

	897 List of compressors

	898 """

	899

	900 start_reg = REGISTERS[reg][0]

	901 end_reg = REGISTERS[reg][-1]

	902 # Exclude r15 from the interval, if instruction can write to reg.

	903 if (end_reg.startswith('r15') and register_write != 'ignore' and

	904 writes_to in ('reg', 'both')):

	905 end_reg = REGISTERS[reg][-2]

	906 start_rm = REGISTERS[rm][0]

	907 end_rm = REGISTERS[rm][-1]

	908 # Exclude r15 from the interval, if instruction can write to rm.

	909 if (end_rm.startswith('r15') and register_write != 'ignore' and

	910 writes_to in ('rm', 'both')):

	911 end_rm = REGISTERS[rm][-2]

	912 # Prepare substitution texts

	913 subst_reg = '[%{}..%{}]'.format(start_reg, end_reg)

	914 if rm_compression == 'register':

	915 subst_rm = '[%{}..%{}]'.format(start_rm, end_rm)

	916 elif rm_compression == 'register_or_memory':

	917 subst_rm = '[%{}..%{} or memory]'.format(start_rm, end_rm)

	918 else:

	919 assert rm_compression == 'memory'

	920 subst_rm = '[memory]'

	921 base_r8 = rm in r8.values()

	922 regex, regex_reg = RegisterRegex(reg)

	923 if rm_compression == 'register':

	924 regex, regex_rm = RegisterRegex(rm)

	925 else: # memory and register_or_memory

	926 regex, regex_rm = MemoryRegex(base_r8, index_r8)

	927 if is_3Dnow:

	928 # Immediate byte is opcode extension with 3DNow! instructions.

	929 regex += ' [0-9a-fA-F][0-9a-fA-F]'

	930 else:

	931 # Accept immediate (which is always zeros in our case).

	932 regex += '(?: 00)*'

	933 # 2 spaces separate byte code from instruction name.

	934 regex += ' '

	935 # Optional "lock" prefix.

	936 regex += '(?:lock )?'

	937 # Instruction name.

	938 regex += r'\w* '

	939 # Immediate or implicit operand that can precede reg/rm pair.

	940 regex += r'(?:\$0x0,\|\$0x0,\$0x0,\|%cl,\|%xmm0,)?'

	941 regex_output_rr = None

	942 subst_output_rr = None

	943 if writes_to == 'reg':

	944 regex += regex_rm + ',' + regex_reg

	945 if register_write == 'sandbox':

	946 assert reg in ('eax', 'r8d')

	947 regex_output_rr = '%' + reg

	948 subst_output_rr = '[%eax..%edi]' if reg == 'eax' else '[%r8d..%r14d]'

	949 subst = ('XX', subst_rm, '[%{}..%{}]'.format(start_reg, end_reg))
	halyavin 2013/12/10 08:03:34 You created subst_reg which you can use here. You created subst_reg which you can use here. khim 2013/12/10 21:06:39 Done. Show quoted text On 2013/12/10 08:03:34, halyavin wrote: > You created subst_reg which you can use here. Done.
	950 else:

	951 regex += regex_reg + ',' + regex_rm

	952 if register_write == 'sandbox':

	953 assert rm in ('eax', 'r8d')

	954 # Instruction can be sandboxing one and yet produce output_rr == None

	955 # This looks strange, but it's perfectly logical because we can't ever

	956 # sandbox memory!

	957 if rm_compression == 'register':

	958 regex_output_rr = '%' + rm

	959 if rm_compression != 'memory':

	960 subst_output_rr = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	961 subst = ('XX', '[%{}..%{}]'.format(start_reg, end_reg), subst_rm)
	halyavin 2013/12/10 08:03:34 The same. The same. khim 2013/12/10 21:06:39 Done. Show quoted text On 2013/12/10 08:03:34, halyavin wrote: > The same. Done.
	962 # Skip implicit arguments (if any)

	963 regex += '.*'

	964 if options.bitness == 64:

	965 if memory_accessed and rm_compression != 'register':

	966 regex += '; input_rr=(%r8d)' if index_r8 else '; input_rr=(%eax)'

	967 subst_input_rr = '[%r8d..%r15d]' if index_r8 else '[%eax..%edi]'

	968 else:

	969 regex += '; input_rr=(any_nonspecial)'

	970 subst_input_rr = 'any_nonspecial'

	971 regex += '; output_rr=({})'.format(regex_output_rr)

	972 subst = subst + (subst_input_rr, subst_output_rr)

	973 # With "or memory" or "memory" compressors we always can see where is

	974 # reg and where is rm, but with reg to rm or rm to reg it's impossible.

	975 # Add the appropriate comment

	976 notes = () if notes == '' else (notes, )

	977 if rm_compression == 'register':

	978 notes += ('rm to reg' if writes_to == 'reg' else 'reg to rm', )

	979 notes = '; '.join(notes)

	980 if notes != '':

	981 notes = ' # ' + notes

	982 subst += (notes, )

	983 regex += '()$'

	984 replacement = ()

	985 if rm_compression != 'memory':

	986 replacement += ModRMRegisterReplacements(

	987 reg=reg, rm=rm, writes_to=writes_to, register_write=register_write)

	988 if rm_compression != 'register':

	989 replacement += ModRMMemoryReplacements(

	990 reg=reg, base_r8=base_r8, index_r8=index_r8, writes_to=writes_to,

	991 memory_accessed=memory_accessed, register_write=register_write)

	992 return Compressor(regex, subst, replacement)

	993

	994

	995 def AllRegToRmCompressors():

	996 """ Return list of all Reg to RM (and RM to Reg) compressors. """

	997

	998 compressors = []

	999

	1000 for reg, rm in RegisterKindPairs():

	1001 instruction_kinds = [

	1002 # Normal instructions with two operands (rm to reg)

	1003 {'writes_to': 'reg'},

	1004 # Normal instructions with two operands (reg to rm)

	1005 {'writes_to': 'rm'}

	1006 ]

	1007 # In 64 bit mode we have many different types of instructions depending

	1008 # on whether we are accessing memory or whether we are writing to registers.

	1009 if options.bitness == 64:

	1010 # Lea in 64 bit mode is truly unique instruction for now

	1011 if reg in ('ax', 'r8w', 'eax', 'r8d', 'rax', 'r8'):

	1012 register_write = 'sandbox' if reg in ('eax', 'r8d') else 'protect'

	1013 instruction_kinds.append(

	1014 {'writes_to': 'reg',

	1015 'memory_accessed': False,

	1016 'register_write': register_write,

	1017 'notes': 'lea'})

	1018 # There are few more forms in 64 bit case (rm to reg)

	1019 if reg in ('eax', 'r8d'):

	1020 # Zero-extending version.

	1021 instruction_kinds.append(

	1022 {'writes_to': 'reg',

	1023 'register_write': 'sandbox'})

	1024 # More forms in 64 bit case (reg to rm)

	1025 if rm in ('eax', 'r8d'):

	1026 # Zero-extending version.

	1027 instruction_kinds.append(

	1028 {'writes_to': 'rm',

	1029 'register_write': 'sandbox'})

	1030 # Zero-extending xchg/xadd

	1031 instruction_kinds.append(

	1032 {'writes_to': 'both',

	1033 'register_write': 'sandbox',

	1034 'notes': 'write to both'})

	1035 # Still more forms for 64 bit case (rm to reg).

	1036 if reg in ('al', 'spl', 'ax', 'eax', 'rax', 'r8b', 'r8w', 'r8d', 'r8'):

	1037 # Dangerous instructions (rm to reg)

	1038 instruction_kinds.append(

	1039 {'writes_to': 'reg',

	1040 'register_write': 'protect'})

	1041 # Still more forms for 64 bit case (reg to rm)

	1042 if rm in ('al', 'spl', 'ax', 'eax', 'rax', 'r8b', 'r8w', 'r8d', 'r8'):

	1043 # Dangerous instructions (reg to rm)

	1044 instruction_kinds.append(

	1045 {'writes_to': 'rm',

	1046 'register_write': 'protect'})

	1047 # Dangerous xchg/xadd

	1048 instruction_kinds.append(

	1049 {'writes_to': 'both',

	1050 'register_write': 'protect',

	1051 'notes': 'write to both'})

	1052 # 3DNow! instructions

	1053 if reg in ('mm0', 'mmalt') or rm in ('mm0', 'mmalt'):

	1054 instruction_kinds.append(

	1055 {'writes_to': 'reg',

	1056 'is_3Dnow': True})

	1057 for instruction_kind in instruction_kinds:

	1058 for rm_compression in ('register', 'memory', 'register_or_memory'):

	1059 if rm_compression == 'register':

	1060 # Register-to-register instructions can not access memory anyway

	1061 # which means special compressors are not needed.

	1062 if 'memory_accessed' in instruction_kind:

	1063 continue

	1064 elif options.bitness == 64:

	1065 # We generate 2 variants of compressors with memory operands to

	1066 # enumerate 2 possible values of index_r8 property. Register only

	1067 # instructions have only one version since they don't use index.

	1068 compressors.append(RegToRmCompressors(

	1069 reg=reg, rm=rm, index_r8=True,

	1070 rm_compression=rm_compression, **instruction_kind))

	1071 # Create "normal" compressor now

	1072 compressors.append(RegToRmCompressors(

	1073 reg=reg, rm=rm,

	1074 rm_compression=rm_compression, **instruction_kind))

	1075 return compressors

	1076

	1077

	1078 def RmCompressors(rm, opcode_bits, memory_accessed=True, index_r8=False,

	1079 rm_compression='register', register_write='ignore'):

	1080 """Returns a list of rm compressors for a given set of parameters.

	1081

	1082 Args:

	1083 rm: rm operand kind (see REGISTERS array)

	1084 memory_accessed: True if instruction accesses memory

	1085 index_r8: True if REX.X bit in the instruction set to 1

	1086 rm_compression: three-state selector

	1087 'register' - instruction supports register-only rm parameter

	1088 'memory' - instruction supports memory-only rm parameter

	1089 'register_or_memory' - instruction supports all kinds of rm parameters

	1090 register_write: three-state selector

	1091 'sandbox' - instruction can be used to produce "restricted register"

	1092 'protect' - instruction can damage output, protect "special registers"

	1093 'ignore' - instruction does not affect it's operands (e.g. test) or

	1094 is used with non-GP registers (X87, MMX, XMM, etc)

	1095

	1096 Returns:

	1097 List of compressors

	1098 """

	1099 start_rm = REGISTERS[rm][0]

	1100 end_rm = REGISTERS[rm][-1]

	1101 # Exclude r15 from the interval, if instruction can write to rm.

	1102 if end_rm.startswith('r15') and register_write != 'ignore':

	1103 end_rm = REGISTERS[rm][-2]

	1104 byte_mark = 'XX/' + str(opcode_bits)

	1105 # Prepare substitution texts

	1106 if rm_compression == 'register':

	1107 subst_rm = '[%{}..%{}]'.format(start_rm, end_rm)

	1108 elif rm_compression == 'register_or_memory':

	1109 subst_rm = '[%{}..%{} or memory]'.format(start_rm, end_rm)

	1110 else:

	1111 assert rm_compression == 'memory'

	1112 subst_rm = '[memory]'

	1113 base_r8 = rm in r8.values()

	1114 if rm_compression == 'register':

	1115 regex, regex_rm = RegisterRegex(rm, opcode_bits)

	1116 else: # memory and register_or_memory

	1117 regex, regex_rm = MemoryRegex(base_r8, index_r8, opcode_bits)

	1118 # Regex here matches everything AFTER the ModR/M (+ SIB) bytes.

	1119 regex += '(?: 00)*'

	1120 # 2 spaces separate byte code from instruction name.

	1121 regex += ' '

	1122 # Optional "lock" prefix.

	1123 regex += '(?:lock )?'

	1124 # Instruction name.

	1125 regex += r'\w* '

	1126 # Immediate or implicit operand that can precede rm argument.

	1127 regex += r'(?:\$0x0,\|%cl,)?'

	1128 # Register or memory access

	1129 regex += regex_rm

	1130 # Skip everything after that

	1131 regex += '.*'

	1132 regex_output_rr = None

	1133 subst_output_rr = None

	1134 subst = (byte_mark, subst_rm)

	1135 if options.bitness == 32:

	1136 subst += ('', )

	1137 else:

	1138 if register_write == 'sandbox':

	1139 assert rm in ('eax', 'r8d')

	1140 # Instruction can be sandboxing one and yet produce output_rr == None

	1141 # This looks strange, but it's perfectly logical because we can't ever

	1142 # sandbox memory!

	1143 if rm_compression == 'register':

	1144 regex_output_rr = '%' + rm

	1145 if rm_compression != 'memory':

	1146 subst_output_rr = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]'

	1147 if memory_accessed and rm_compression != 'register':

	1148 regex += '; input_rr=(%r8d)' if index_r8 else '; input_rr=(%eax)'

	1149 subst_input_rr = '[%r8d..%r15d]' if index_r8 else '[%eax..%edi]'

	1150 else:

	1151 regex += '; input_rr=(any_nonspecial)'

	1152 subst_input_rr = 'any_nonspecial'

	1153 regex += '; output_rr=({})'.format(regex_output_rr)

	1154 subst = subst + (subst_input_rr, subst_output_rr, '')

	1155 regex += '()'

	1156 replacement = ()

	1157 if rm_compression != 'memory':

	1158 replacement += ModRMRegisterReplacements(

	1159 rm=rm, opcode_bits=opcode_bits, register_write=register_write)

	1160 if rm_compression != 'register':

	1161 replacement += ModRMMemoryReplacements(

	1162 base_r8=base_r8, index_r8=index_r8, opcode_bits=opcode_bits,

	1163 memory_accessed=memory_accessed, register_write=register_write)

	1164 return Compressor(regex, subst, replacement)

	1165

	1166

	1167 def AllRmCompressors():

	1168 """ Return list of all RM (with reg as opcode extension) compressors. """

	1169 compressors = []

	1170

	1171 for rm in RegisterKinds():

	1172 for opcode_bits in xrange(8):

	1173 instruction_kinds = [

	1174 # The most basic form

	1175 {}

	1176 ]

	1177 # In 64 bit mode we have many different types of instructions depending

	1178 # on whether we are accessing memory or whether we are writing to

	1179 # registers.

	1180 if options.bitness == 64:

	1181 # No memory access (e.g. prefetch)

	1182 instruction_kinds.append(

	1183 {'memory_accessed':False})

	1184 # More forms in 64 bit case.

	1185 if rm in ('eax', 'r8d'):

	1186 # Zero-extending version.

	1187 instruction_kinds.append(

	1188 {'register_write':'sandbox'})

	1189 # Still more forms for 64 bit case (reg to rm).

	1190 if rm in ('al', 'spl', 'ax', 'eax', 'rax',

	1191 'r8b', 'r8w', 'r8d', 'r8'):

	1192 # Dangerous instructions.

	1193 instruction_kinds.append(

	1194 {'register_write':'protect'})

	1195 for instruction_kind in instruction_kinds:

	1196 for rm_compression in ('register', 'memory', 'register_or_memory'):

	1197 if rm_compression == 'register':

	1198 # Register-to-register instructions can not access memory anyway

	1199 # which means special compressors are not needed.

	1200 if 'memory_accessed' in instruction_kind:

	1201 continue

	1202 elif options.bitness == 64:

	1203 # We generate 2 variants of compressors with memory operands to

	1204 # enumerate 2 possible values of index_r8 property. Register only

	1205 # instructions have only one version since they don't use index.

	1206 compressors.append(RmCompressors(

	1207 rm=rm, opcode_bits=opcode_bits, index_r8=True,

	1208 rm_compression=rm_compression, **instruction_kind))

	1209 # Create "normal" compressor now

	1210 compressors.append(RmCompressors(

	1211 rm=rm, opcode_bits=opcode_bits,

	1212 rm_compression=rm_compression, **instruction_kind))

	1213 return compressors

	1214

	1215

	1216 def AllOpcodeCompressors():

	1217 """ Return "register in opcode" compressors. """

	1218 compressors = []

	1219

	1220 for reg in RegisterKinds() + ('st(0)',):

	1221 for opcode in xrange(8):

	1222 for text1, text2, nibble in (

	1223 ('[0..7]', '[8..f]', xrange(8)),

	1224 ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)),

	1225 ('[0..6]', '[8..e]', xrange(7))

	1226 ):

	1227 start_reg = REGISTERS[reg][0]

	1228 end_reg = REGISTERS[reg][-1 if 7 in nibble else -2]

	1229 subst_regs = '[%{}..%{}]'.format(start_reg, end_reg)

	1230 # Note that we use instruction with 1st register, not 0th register

	1231 # here to avoid ambiguity when opcode is 0x00

	1232 compressors.append(Compressor(

	1233 r'.?[0-9a-fA-F](1)(?: 00)'

	1234 r' \w* (?:\$0x0,\|%ax,\|%st,)?'

	1235 r'(%' + REGISTERS_RE[reg][1] + ').*',

	1236 (text1, subst_regs),

	1237 [('{:x}'.format(n), '%' + REGISTERS[reg][n])

	1238 for n in nibble]))

	1239 compressors.append(Compressor(

	1240 r'.?[0-9a-fA-F](8)(?: 00)'

	1241 r' \w* (?:\$0x0,\|%ax,\|%st,)?'

	1242 r'(%' + REGISTERS_RE[reg][0] + ').*',

	1243 (text2, subst_regs),

	1244 [('{:x}'.format(n + 8), '%' + REGISTERS[reg][n])

	1245 for n in nibble]))

	1246 # Another version for 64 bit case

	1247 if options.bitness == 64 and reg in ('eax', 'r8d'):

	1248 compressors.append(Compressor(

	1249 r'.?[0-9a-fA-F](1)(?: 00)'

	1250 r' \w* (?:\$0x0,\|%ax,\|%st,)?'

	1251 r'(%' + REGISTERS_RE[reg][1] + ').*'

	1252 r'output_rr=(%'+ REGISTERS_RE[reg][1] + ').*',

	1253 (text1, subst_regs, subst_regs),

	1254 [['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2

	1255 for n in nibble]))

	1256 compressors.append(Compressor(

	1257 r'.?[0-9a-fA-F](8)(?: 00)'

	1258 r' \w* (?:\$0x0,\|%ax,\|%st,)?'

	1259 r'(%' + REGISTERS_RE[reg][0] + ').*'

	1260 r'output_rr=(%'+ REGISTERS_RE[reg][0] + ').*',

	1261 (text2, subst_regs, subst_regs),

	1262 [['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2

	1263 for n in nibble]))

	1264 return compressors

	1265

	1266

	1267 def AllMemoryNonMemoryCompressors():

	1268 """ Return memory/nonmemory compressors. """

	1269

	1270 compressors = []

	1271

	1272 if options.bitness == 32:

	1273 letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', ''))

	1274 else:

	1275 letters_and_registers = (

	1276 ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'),

	1277 ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'),

	1278 ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'),

	1279 ('q', 'rax', 'eax'), ('q', 'r8', 'r8d')

	1280 )

	1281 for letter, reg, out_reg in letters_and_registers:

	1282 start_reg = REGISTERS[reg][0]

	1283 if reg[0:2] == 'r8':
	halyavin 2013/12/10 08:03:34 REGISTERS[reg][-1].startswith('r15') REGISTERS[reg][-1].startswith('r15') khim 2013/12/10 21:06:39 This code is removed compretely. Show quoted text On 2013/12/10 08:03:34, halyavin wrote: > REGISTERS[reg][-1].startswith('r15') This code is removed compretely.
	1284 end_regs = (REGISTERS[reg][-2], REGISTERS[reg][-1])

	1285 else:

	1286 end_regs = (REGISTERS[reg][-1], )

	1287 for end_reg in end_regs:

	1288 all_regs = '[%{}..%{}]'.format(start_reg, end_reg)

	1289 regs_mark = '[%{}..%{} or memory]'.format(start_reg, end_reg)

	1290 if options.bitness == 64:

	1291 start_out = REGISTERS[out_reg][0]

	1292 end_out = REGISTERS[out_reg][-1 if out_reg[0:2] != 'r8' else -2]
	halyavin 2013/12/10 08:03:34 Use end_out.startswith('r15') instead. Use end_out.startswith('r15') instead. khim 2013/12/10 21:06:39 This code is removed completely. Show quoted text On 2013/12/10 08:03:34, halyavin wrote: > Use end_out.startswith('r15') instead. This code is removed completely.
	1293 out_regs = '[%{}..%{}]'.format(start_out, end_out)

	1294 for notes in ('', ' # rm to reg', ' # reg to rm'):

	1295 compressors.append(Compressor(

	1296 r'.* \w(' + letter + r') .(\[memory]).*()',

	1297 ('[{}]?'.format(letter), regs_mark, ''),

	1298 ((letter, '[memory]', ''), ('', all_regs, notes))))

	1299 if options.bitness == 64:

	1300 for index_reg in ('eax', 'r8d'):

	1301 start_index = REGISTERS[index_reg][0]

	1302 end_index = REGISTERS[index_reg][-1]

	1303 index_regs = '[%{}..%{}]'.format(start_index, end_index)

	1304 for output_rrs in ((None, out_regs),

	1305 (out_regs, None),

	1306 (None, None)):

	1307 compressors.append(Compressor(

	1308 r'.* \w(' + letter + r') .(\[memory]).*; '

	1309 r'input_rr=(\[%[a-z0-9]..%[a-z0-9]\]); '

	1310 r'output_rr=(\[%[a-z0-9]..%[a-z0-9]\]\|None)()',

	1311 ('[{}]?'.format(letter), regs_mark, index_regs,

	1312 output_rrs[1] if output_rrs[0] is None else output_rrs[0],

	1313 ''),

	1314 ((letter, '[memory]', index_regs, output_rrs[0], ''),

	1315 ('', all_regs, 'any_nonspecial', output_rrs[1], notes))))

	1316 return compressors

	1317

	1318

	1319 def RexCompressor(rm, rmR15, reg, regR15, rexw, input_rr, output_rr, rm_to_reg):

	1320 """ Return REX compressor (or nothing) for a given set of paramenters.

	1321

	1322 Args:

	1323 rm: rm operand kind (see REGISTERS array) or None if rm is not used

	1324 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	1325 rmR15: True if R15 register is included

	1326 regR15: True if R15 register is included

	1327 rexw: True if REX.W should be set

	1328 input_rr: True if input_rr is used

	1329 output_rr: true if output_rr is used

	1330 rm_to_reg: True if direction is rm to reg

	1331 """

	1332

	1333 # reg and rm can be of three different possible intervals

	1334 # start_reg/rm to end_reg/rm (e.g. from %al to %bh)

	1335 # start_reg/rm to end_reg0/rm0 (e.g from %al to %dil)

	1336 # start_reg8/rm8 to end_reg8/rm8 (e.g. from %r8b to %r15b)

	1337 # First form can be observed if there are not REX, second form

	1338 # if REX.R/REX.B is not set and third form is where it's set.

	1339 if reg:

	1340 start_reg = REGISTERS[reg][0]

	1341 start_reg8 = REGISTERS[r8[reg]][0]

	1342 end_reg = REGISTERS[reg][-1]

	1343 end_reg0 = 'dil' if reg == 'al' else end_reg

	1344 end_reg8 = REGISTERS[r8[reg]][-1 if regR15 else -2]

	1345 if rexw:

	1346 regex_reg = r'\[(%' + start_reg + r'\.\.%' + end_reg0 + ')]'

	1347 else:

	1348 regex_reg = r'\[(%' + start_reg + r'\.\.%' + end_reg + ')]'

	1349 if rm:

	1350 start_rm = REGISTERS[rm][0]

	1351 start_rm8 = REGISTERS[r8[rm]][0]

	1352 end_rm = REGISTERS[rm][-1]

	1353 end_rm0 = 'dil' if rm == 'al' else end_rm

	1354 end_rm8 = REGISTERS[r8[rm]][-1 if rmR15 else -2]

	1355 if rexw:

	1356 regex_rm = (r'\[(%' + start_rm + r'\.\.%' + end_rm0 + ')'

	1357 r'(?: or memory)?]')

	1358 else:

	1359 regex_rm = (r'\[(%' + start_rm + r'\.\.%' + end_rm + ')'

	1360 r'(?: or memory)?]')

	1361

	1362 # Legacy prefixes

	1363 regex = '.:(?: 26\| 2e\| 36\| 3e\| 64\| 65\| 66\| 67\| f0\| f2\| f3)'

	1364 # REX

	1365 regex += '( 48).' if rexw else '( 40\|).'

	1366 # Replacement text

	1367 replacement_tuple = (' [REX:48..4f]' if rexw else ' [REX:40..47]?', )

	1368 if reg: replacement_regs = '%{}..%{}'.format(start_reg, end_reg8)

	1369 if rm: replacement_rms = '%{}..%{}'.format(start_rm, end_rm8)

	1370 # Instruction arguments

	1371 if not reg and not rm:

	1372 pass

	1373 elif not reg and rm:

	1374 regex += regex_rm + '.*'

	1375 replacement_tuple += (replacement_rms, )

	1376 elif reg and not rm:

	1377 regex += regex_reg + '.*'

	1378 replacement_tuple += (replacement_regs, )

	1379 elif rm_to_reg:

	1380 regex += regex_rm + ',' + regex_reg + '.*'

	1381 replacement_tuple += (replacement_rms, replacement_regs)

	1382 else:

	1383 regex += regex_reg + ',' + regex_rm + '.*'

	1384 replacement_tuple += (replacement_regs, replacement_rms)

	1385 # Input and output restricted registers

	1386 if input_rr:

	1387 regex += r'input_rr=\[(%eax\.\.%edi)].*'

	1388 replacement_tuple += ('%eax..%r15d', )

	1389 if output_rr:

	1390 regex += r'output_rr=\[(%eax\.\.%edi)].*'

	1391 replacement_tuple += ('%eax..%r14d', )

	1392 # Replacement cases

	1393 replacement_tuples = ()

	1394 for byte in (range(0x48, 0x50) if rexw else range(0x40, 0x48) + [0]):

	1395 replacement_case = (' {:02x}'.format(byte) if byte else '', )

	1396 if rm:

	1397 if byte & 0x1:

	1398 replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8)

	1399 elif byte:

	1400 replacement_rms = '%{}..%{}'.format(start_rm, end_rm0)

	1401 else:

	1402 replacement_rms = '%{}..%{}'.format(start_rm, end_rm)

	1403 if byte & 0x2:

	1404 replacement_index = '%r8d..%r15d'

	1405 else:

	1406 replacement_index = '%eax..%edi'

	1407 if reg:

	1408 if byte & 0x4:

	1409 replacement_regs = '%{}..%{}'.format(start_reg8, end_reg8)

	1410 elif byte:

	1411 replacement_regs = '%{}..%{}'.format(start_reg, end_reg0)

	1412 else:

	1413 replacement_regs = '%{}..%{}'.format(start_reg, end_reg)

	1414 if not reg and not rm:

	1415 pass

	1416 elif not reg and rm:

	1417 replacement_case += (replacement_rms, )

	1418 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1419 elif reg and not rm:

	1420 replacement_case += (replacement_regs, )

	1421 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1422 elif rm_to_reg:

	1423 replacement_case += (replacement_rms, replacement_regs)

	1424 final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi'

	1425 else:

	1426 replacement_case += (replacement_regs, replacement_rms)

	1427 final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi'

	1428 if input_rr: replacement_case += (replacement_index, )

	1429 if output_rr: replacement_case += (final_rr, )

	1430 replacement_tuples += (replacement_case, )

	1431 return Compressor(regex, replacement_tuple, replacement_tuples)

	1432

	1433

	1434 def AllRexCompressors():

	1435 """ Return "REX" compressors which combine different REX prefixes. """

	1436

	1437 if options.bitness != 64:

	1438 return []

	1439

	1440 compressors = []

	1441

	1442 # First pretty complex set of compressors to combine versions of REX with

	1443 # three lowest bits in different states.

	1444 register_kind_pairs = (

	1445 ( None, None),

	1446 ( 'al', 'al'), ( 'al', None), (None, 'al'),

	1447 ( 'ax', 'al'), ( 'al', 'ax'),

	1448 ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'),

	1449 ( 'eax', 'al'), ( 'al', 'eax'),

	1450 ( 'eax', 'ax'), ( 'ax', 'eax'),

	1451 ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'),

	1452 ( 'rax', 'al'), ( 'al', 'rax'),

	1453 ( 'rax', 'ax'), ( 'ax', 'rax'),

	1454 ( 'rax', 'eax'), ( 'eax', 'rax'),

	1455 ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'),

	1456 ( 'eax', 'mm0'), ( 'mm0', 'eax'),

	1457 ( 'rax', 'mm0'), ( 'mm0', 'rax'),

	1458 ( 'mm0', 'eax'), ( 'eax', 'mm0'),

	1459 ( 'mm0', 'rax'), ( 'rax', 'mm0'),

	1460 ( 'eax', 'xmm0'),

	1461 ( 'rax', 'xmm0'),

	1462 ('xmm0', 'eax'),

	1463 ('xmm0', 'rax'),

	1464 ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'),

	1465 ( 'mm0', 'xmm0'),

	1466 ('xmm0', 'mm0'),

	1467 ('xmm0', 'xmm0'),

	1468 ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'),

	1469 ('ymm0', 'xmm0'),

	1470 ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'),

	1471 )

	1472 for reg, rm in register_kind_pairs:

	1473 for regR15 in (True, False):

	1474 for rmR15 in (True, False):

	1475 for rexw in (True, False):

	1476 for input_rr in (True, False):

	1477 for output_rr in (True, False):

	1478 for rm_to_reg in (True, False):

	1479 # These combinations will just produce useless duplicates

	1480 if not reg and not regR15: continue

	1481 if not rm and not rmR15: continue

	1482 if not reg and not rm and (output_rr or rm_to_reg): continue

	1483 compressors.append(RexCompressor(

	1484 rm=rm, rmR15=rmR15, reg=reg, regR15=regR15,

	1485 rexw=rexw, input_rr=input_rr, output_rr=output_rr,

	1486 rm_to_reg=rm_to_reg))

	1487

	1488 # This is pretty simple compressor to combine two lines with different REX.W

	1489 # bits (only if they are otherwise identical).

	1490 compressors.append(Compressor(

	1491 r'.(\[REX:40\.\.47]\?).', ('[REX:40..4f]?', ),

	1492 (('[REX:40..47]?', ), ('[REX:48..4f]', ))))

	1493 return compressors

	1494

	1495

	1496 def AllSpecialCompressors():

	1497 """ Return all "special" compressors. """

	1498

	1499 compressors = []

	1500

	1501 # Special compressors: will handle some cosmetic issues.

	1502 #

	1503 # SETxx ignores reg field and thus are described as many separate instructions

	1504 compressors.append(Compressor(

	1505 '.0f 9[0-9a-fA-F] XX(/[0-7]) set.', ('', ),

	1506 [('/' + str(i), ) for i in range(8)]))

	1507 # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual

	1508 if options.bitness == 32:

	1509 compressors.append(Compressor(

	1510 '.(XX/1) bswap.ax.*', ('c[8..f]', ), (('XX/1', ), )))

	1511 else:

	1512 compressors.append(Compressor(

	1513 '.(XX/1) bswap.ax.*', ('c[89abef]', ), (('XX/1', ), )))

	1514 compressors.append(Compressor(

	1515 '.(XX/1) bswap.r8.*', ('c[8..e]', ), (('XX/1', ), )))

	1516 # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG

	1517 if options.bitness == 64:

	1518 compressors.append(Compressor(

	1519 r'.* (?:cmpxchg\|xadd\|xchg).%al\.\.%bh[^#]()$',

	1520 (' # write to both', ),

	1521 (('', ), )))

	1522 # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all

	1523 # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.

	1524 # Without this rule these ands comprise 2/3 of the whole output!

	1525 if options.bitness == 32:

	1526 compressors.append(Compressor(

	1527 r'.*83 (e0 01 and \$0x1,%eax)()',

	1528 ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', ' # special and'),

	1529 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]),

	1530 '')

	1531 for i in range(0x01, 0x100) for r in range(8)] +

	1532 [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', '')]))

	1533 else:

	1534 for reg in ('eax', 'r8d'):

	1535 start_reg = REGISTERS[reg][0]

	1536 end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

	1537 for index_reg in ('eax', 'r8d'):

	1538 start_index = REGISTERS[index_reg][0]

	1539 end_index = REGISTERS[index_reg][-1]

	1540 compressors.append(Compressor(

	1541 r'.83 (e0 01 and \$0x1,%' + reg + ').'

	1542 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()',

	1543 ('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1544 end_reg),

	1545 '[%{}..%{}]'.format(start_index, end_index),

	1546 '[%{}..%{}]'.format(start_reg, end_reg),

	1547 ' # special and'),

	1548 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]),

	1549 'any_nonspecial', '%' + REGISTERS[reg][r], '')

	1550 for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] +

	1551 [('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg,

	1552 end_reg),

	1553 '[%{}..%{}]'.format(start_index, end_index),

	1554 '[%{}..%{}]'.format(start_reg, end_reg),

	1555 '')]))

	1556

	1557 # "and $e0" and similar are used to align %rsp. All negative values are

	1558 # accepted by validator and there are 127 of these.

	1559 # Consolidate them into one line.

	1560 if options.bitness == 64:

	1561 compressors.append(Compressor(

	1562 r'.(?:81\|83) (?:e4\|e5) (80) (?:00 00 00 \|) and \$0x(80),%r[bs]p.()',

	1563 ('[80..ff]', '[80..ff]', ' # alignment and'),

	1564 [('{:02x}'.format(i), '{:02x}'.format(i), '')

	1565 for i in range(0x80, 0x100)]))

	1566 return compressors

	1567

	1568

	1569 def PrepareCompressors():

	1570 """ Return list of all compressors sorted from bigger ones to smaller ones """

	1571

	1572 return tuple(sorted(

	1573 AllRegToRmCompressors() +

	1574 AllRmCompressors() +

	1575 AllOpcodeCompressors() +

	1576 AllMemoryNonMemoryCompressors() +

	1577 AllRexCompressors() +

	1578 AllSpecialCompressors(),

	1579 key=lambda compressor: -len(compressor.replacements)))

	1580

	1581

	1582 def ShowProgress(rule, instruction):

	1583 if rule not in ShowProgress.rules_shown:

	1584 first_print = True

	1585 ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown)

	1586 else:

	1587 first_print = False

	1588 print >> sys.stderr, '-------- Compressed --------'

	1589 print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule]

	1590 print >> sys.stderr, '--------'

	1591 compressor = compressors[rule]

	1592 match = compressor.regex.match(instruction)

	1593 assert match

	1594 format_str = CompressionTemplate(instruction, match, '{{{}}}')

	1595 replacements = sorted(format_str.format(*replacement)

	1596 for replacement in compressor.replacements)

	1597 if len(compressor.replacements) <= 4 or first_print:

	1598 for replacement in replacements:

	1599 print >> sys.stderr, replacement

	1600 else:

	1601 print >> sys.stderr, replacements[0]

	1602 print >> sys.stderr, '...'

	1603 print >> sys.stderr, replacements[-1]

	1604 print >> sys.stderr, '--------'

	1605 print >> sys.stderr, 'Compressed', format_str.format(*compressor.subst)

	1606 ShowProgress.rules_shown = {}

	1607

	1608

	1609 def main():

	1610 # We are keeping these global to share state graph and compressors

	1611 # between workers spawned by multiprocess. Passing them every time is slow.

	1612 global options, xml_file

	1613 global dfa

	1614 global worker_validator

	1615 options, xml_file = ParseOptions()

	1616 dfa = dfa_parser.ParseXml(xml_file)

	1617 worker_validator = validator.Validator(

	1618 validator_dll=options.validator_dll,

	1619 decoder_dll=options.decoder_dll)

	1620 global compressors

	1621 compressors = PrepareCompressors()

	1622

	1623 assert dfa.initial_state.is_accepting

	1624 assert not dfa.initial_state.any_byte

	1625

	1626 print >> sys.stderr, len(dfa.states), 'states'

	1627

	1628 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)

	1629

	1630 # We can't just write 'num_suffixes[dfa.initial_state]' because

	1631 # initial state is accepting.

	1632 total_instructions = sum(

	1633 num_suffixes[t.to_state]

	1634 for t in dfa.initial_state.forward_transitions.values())

	1635 print >> sys.stderr, total_instructions, 'regular instructions total'

	1636

	1637 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)

	1638 print >> sys.stderr, len(tasks), 'tasks'

	1639

	1640 pool = multiprocessing.Pool()

	1641

	1642 results = pool.imap(Worker, tasks)

	1643

	1644 total = 0

	1645 num_valid = 0

	1646 full_output = set()

	1647 for prefix, count, valid_count, output, trace in results:

	1648 print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))

	1649 total += count

	1650 num_valid += valid_count

	1651 full_output \|= output

	1652 for rule, instruction in trace:

	1653 ShowProgress(rule, instruction)

	1654 for instruction in sorted(Compressed(full_output,

	1655 compressors,

	1656 ShowProgress)):

	1657 print instruction

	1658

	1659 print >> sys.stderr, total, 'instructions were processed'

	1660 print >> sys.stderr, num_valid, 'valid instructions'

	1661

	1662

	1663 if __name__ == '__main__':

	1664 main()

OLD	NEW

« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »