src/trusted/validator_ragel/compress_regular_instructions.py - Issue 49183002: Regular instructions golden file test.

Side by Side Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2013 The Native Client Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 """

	6 Traverse the validator's DFA, collect all "normal" instructions and then

	7 compress output. Note: "anybyte fields" (immediates and displacements)

	8 are always filled with zeros. Otherwise processing of sextillions (sic!)

	9 of possibilities will take too long.

	10

	11 Each rule is applied only when all variants are accepted by validator.

	12 The following compression rules are present:

	13

	14 1. Compress ModR/M (+SIB & displacement).

	15 Instruction: 00 00 add %al,(%rax)

	16 ...

	17 Instruction: 00 ff add %bh,%bh

	18 becomes

	19 Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]

	20

	21 1a. Compress ModR/M (+SIB & displacement) memory-only.

	22 Instruction: f0 01 00 lock add %eax,(%eax)

	23 ...

	24 Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)

	25 becomes

	26 Instruction: f0 01 XX lock add [%eax..edi],[memory]

	27

	28 1b. Compress ModR/M register only.

	29 Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax

	30 ...

	31 Instruction: 66 0f 50 ff movmskpd %xmm7,%edi

	32 becomes

	33 Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]

	34

	35 2. Compress ModR/M (+SIB & displacement) with opcode extension.

	36 Instruction: 0f 90 00 seto (%eax)

	37 ...

	38 Instruction: 0f 90 c7 seto %bh

	39 becomes

	40 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	41

	42 2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.

	43 Instruction: f0 ff 00 lock incl (%eax)

	44 ...

	45 Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)

	46 becomes

	47 Instruction: f0 ff XX/1 lock decl [memory]

	48

	49 2b. Compress ModR/M register-only with opcode extension.

	50 Instruction: 0f 71 d0 00 psrlw $0x0,%mm0

	51 ...

	52 Instruction: 0f 71 d7 00 psrlw $0x0,%mm7

	53 becomes

	54 Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7]

	55

	56 3. Compress register-in-opcode.

	57 Instruction: d9 c0 fld %st(0)

	58 ...

	59 Instruction: d9 c7 fld %st(7)

	60 becomes

	61 Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]

	62

	63 Only applies if all possible register accesses are accepted by validator.

	64

	65 4. Special compressor for "set" instruction.

	66 Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

	67 ...

	68 Instruction: 0f 90 XX/7 seto [%al..%bh or memory]

	69 becomes

	70 Instruction: 0f 90 XX seto [%al..%bh or memory]

	71 """

	72

	73 import itertools

	74 import multiprocessing

	75 import optparse

	76 import os

	77 import re

	78 import subprocess

	79 import sys

	80 import tempfile

	81 import traceback

	82

	83 import dfa_parser

	84 import dfa_traversal

	85 import validator

	86

	87

	88 # Register names in 'natual' order (as defined by IA32/x86-64 ABI)

	89 #

	90 # X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field

	91 # in opcode, ModR/M, and/or SIB bytes to encode them.

	92 #

	93 # In most cases there are 16 registers of a given kind and two such groups,

	94 # but there are couple of exceptions:

	95 # 1. There are 20 8-bit registers and three groups (two of them overlap)

	96 # 2. There are eight X87 and MMX registers thus two groups are identical

	97 #

	98 # We use typical register from a group to name the whole group. Most groups

	99 # are named by the first register but 'spl' group is named by the fifth register

	100 # because it's first four registers are the same as 'al' group.

	101 # We use mnemonic name 'mmalt' to represent the "evil mirror" of the 'mm0'

	102 # group: it the same as 'mm0', but implies use of the appropriate REX bit.

	103 REGISTERS = {

	104 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],

	105 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],

	106 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],

	107 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],

	108 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],

	109 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],

	110 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],

	111 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],

	112 'r8': [ 'r{}'.format(N) for N in range(8,16) ],

	113 'mm0': [ 'mm{}'.format(N) for N in range(8) ],

	114 'mmalt': [ 'mm{}'.format(N) for N in range(8) ],

	115 'st(0)': [ 'st({})'.format(N) for N in range(8) ],

	116 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],

	117 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],

	118 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],

	119 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]

	120 }

	121

	122

	123 NOP = 0x90

	124

	125

	126 def PadToBundleSize(bytes):

	127 assert len(bytes) <= validator.BUNDLE_SIZE

	128 return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))

	129

	130

	131 # In x86-64 mode we have so-called 'restricted register' which is used to

	132 # tie two groups together. Some instructions require particular value to

	133 # be stored in this variable, while some accept any non-special restricted

	134 # register (%ebp and %esp are special because they can only be accepted by

	135 # a few 'special' instructions).

	136 #

	137 # You can find more details in the "NaCl SFI model on x86-64 systems" manual.

	138 #

	139 # We try to feed all possible 'restricted registers' into validator and then

	140 # classify the instruction using this map. If set of acceptable 'restricted

	141 # registers' is not here, then it's an error in validator.

	142 ACCEPTABLE_X86_64_INPUTS = {

	143 0x00001: 'input_rr=%eax',

	144 0x00002: 'input_rr=%ecx',

	145 0x00004: 'input_rr=%edx',

	146 0x00008: 'input_rr=%ebx',

	147 0x00010: 'input_rr=%esp',

	148 0x00020: 'input_rr=%ebp',

	149 0x00040: 'input_rr=%esi',

	150 0x00080: 'input_rr=%edi',

	151 0x00100: 'input_rr=%r8d',

	152 0x00200: 'input_rr=%r9d',

	153 0x00400: 'input_rr=%r10d',

	154 0x00800: 'input_rr=%r11d',

	155 0x01000: 'input_rr=%r12d',

	156 0x02000: 'input_rr=%r13d',

	157 0x04000: 'input_rr=%r14d',

	158 0x08000: 'input_rr=%r15d',

	159 0x1ffcf: 'input_rr=any_nonspecial'

	160 }

	161

	162 # Any instruction must produce either None or one of fifteen registers as an

	163 # output 'restricted register' value. 'r15d' is NOT acceptable as an output.

	164 ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple(

	165 '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1])

	166

	167

	168 def ValidateInstruction(instruction, validator_inst):

	169 bundle = ''.join(map(chr, PadToBundleSize(instruction)))

	170 if options.bitness == 32:

	171 result = validator_inst.ValidateChunk(bundle, bitness=32)

	172 return result, []

	173 else:

	174 valid_inputs = 0

	175 known_final_rr = None

	176 output_rr = None

	177 # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array

	178 # above.

	179 for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]):

	180 valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(

	181 bundle, len(instruction), initial_rr)

	182 if valid:

	183 # final_rr should not depend on input_rr

	184 assert valid_inputs == 0 or known_final_rr == final_rr

	185 valid_inputs \|= 1 << bit

	186 known_final_rr = final_rr

	187 # If nothing is accepted then instruction is not valid. Easy and simple.

	188 if valid_inputs == 0: return False, []

	189 # If returned value in unacceptable we'll get IndexError here and this

	190 # test will fail

	191 if known_final_rr is not None:

	192 output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr]

	193 # If collected valid_inputs are unacceptable we'll get KeyError here and

	194 # this test will fail

	195 return True, [ACCEPTABLE_X86_64_INPUTS[valid_inputs],

	196 'output_rr={}'.format(output_rr)]

	197

	198

	199 class WorkerState(object):

	200 def __init__(self, prefix, validator):

	201 self.total_instructions = 0

	202 self.num_valid = 0

	203 self.validator = validator

	204 self.output = set()

	205 self.trace = []

	206

	207

	208 def ReceiveInstruction(self, bytes):

	209 self.total_instructions += 1

	210 result, notes = ValidateInstruction(bytes, self.validator)

	211 if result:

	212 self.num_valid += 1

	213 dis = self.validator.DisassembleChunk(

	214 ''.join(map(chr, bytes)),

	215 bitness=options.bitness)

	216 for line_nr in xrange(len(dis)):

	217 dis[line_nr] = str(dis[line_nr])

	218 assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '

	219 assert dis[line_nr][-1:] == ')'

	220 dis[line_nr] = dis[line_nr][17:-1]

	221 # If %rip is involved then comment will be different depending on the

	222 # instruction length. Eliminate it.

	223 if '(%rip)' in dis[0]:

	224 dis[0] = re.sub(' # 0x[ ][0-9a-fA-F]', '', dis[0])

	225 # Zero displacements are represented as 0x0 for all instructions except

	226 # jumps where they disassembled as non-zero due to %eip/%rip-relative

	227 # addressing. We replace this displacement with %eip/%rip to simplify

	228 # compression.

	229 if ' 0x' in dis[0] and ' 0x0' not in dis[0]:

	230 for bytes in xrange(1, 16):

	231 dis[0] = re.sub(

	232 '(' + '(?:[0-9a-fA-F][0-9a-fA-F] ){' + str(bytes) + '} .* )' +

	233 hex(bytes) + '(.*)',

	234 r'\1%eip\2' if options.bitness == 32 else r'\1%rip\2',

	235 dis[0]);

	236 dis[0] = 'Instruction: ' + dis[0]

	237 dis += notes

	238 self.output.add('; '.join(dis))

	239

	240

	241 def RecordTrace(self, compressor_nr, instruction):

	242 self.trace.append((compressor_nr, instruction))

	243

	244

	245 # Compressor has three slots: regex (which picks apart given instruction),

	246 # subst (which is used to create the compressed version) and replacements (which

	247 # are used to generate set of instructions from a given code).

	248 #

	249 # Example compressor:

	250 # regex = r'.?[0-9a-fA-F]([0-7]) \w (%e(?:[abcd]x\|[sb]p\|[sd]i)).*()'

	251 # subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode')

	252 # replacements = ((0, '%eax', ''), (1, '%ecx', ''),

	253 # (2, '%edx', ''), (3, '%ebx', '')

	254 # (4, '%esp', ''), (5, '%ebp', ''),

	255 # (6, '%esi', ''), (7, '%edi', ''))

	256 #

	257 # When faced with instruction '40 inc %eax' it will capture the following

	258 # pieces of said instruction: '4{0} inc {%eax}{}'.

	259 #

	260 # Then it will replace groups to produce the following 8 instructions:

	261 # '40 inc %eax'

	262 # '41 inc %ecx'

	263 # '42 inc %edx'

	264 # '43 inc %ebx'

	265 # '44 inc %esp'

	266 # '45 inc %ebp'

	267 # '46 inc %esi'

	268 # '47 inc %edi'

	269 #

	270 # If all these instructions can be found in a set of instructions then

	271 # compressor will remove them from said set and will substitute them with

	272 # one "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'.

	273 class Compressor(object):

	274 __slots__ = [

	275 'regex',

	276 'subst',

	277 'replacements'

	278 ]

	279

	280 def __init__(self, regex, subst, replacements):

	281 self.regex = re.compile(regex)

	282 self.subst = subst

	283 self.replacements = replacements

	284

	285

	286 def CompressionTemplate(instruction, match, mark):

	287 """ Replace all match groups with the mark. """

	288 pos = 0

	289 format_str = ''

	290 for group in range(0, len(match.groups())):

	291 format_str += instruction[pos:match.start(group + 1)] + mark

	292 pos = match.end(group + 1)

	293 return format_str + instruction[pos:]

	294

	295

	296 def CompressOneMatch(instructions, instruction, match, compressor):

	297 format_str = CompressionTemplate(instruction, match, '{}')

	298 subset = set()

	299 for replacement in compressor.replacements:

	300 replacement_str = format_str.format(*replacement)

	301 if not replacement_str in instructions:

	302 return (False, instructions)

	303 subset.add(replacement_str)

	304 assert instruction in subset

	305 instructions -= subset

	306 instructions.add(format_str.format(*compressor.subst))

	307 return (True, instructions)

	308

	309

	310 def CompressOneInstruction(instructions, compressors, split, cache):

	311 sorted_instructions = (sorted(i for i in instructions if i > split) +

	312 sorted(i for i in instructions if i < split))

	313 for instruction in sorted_instructions:

	314 if instruction in cache:

	315 compressors_list = cache[instruction]

	316 for compressor_nr, match, compressor in compressors_list:

	317 result, instructions = CompressOneMatch(

	318 instructions, instruction, match, compressor)

	319 if result:

	320 return (instructions, compressor_nr, instruction)

	321 else:

	322 compressors_list = []

	323 for compressor_nr, compressor in enumerate(compressors):

	324 match = compressor.regex.match(instruction)

	325 if match:

	326 compressors_list.append((compressor_nr, match, compressor))

	327 result, instructions = CompressOneMatch(

	328 instructions, instruction, match, compressor)

	329 if result:

	330 return (instructions, compressor_nr, instruction)

	331 cache[instruction] = compressors_list

	332 return (instructions, False, False)

	333

	334

	335 def Compressed(instructions, compressors, show_progress):

	336 split = ''

	337 cache = {}

	338 while True:

	339 instructions, rule, split = CompressOneInstruction(

	340 instructions, compressors, split, cache)

	341 if rule is False: break

	342 show_progress(rule, split)

	343 return instructions

	344

	345

	346 def Worker((prefix, state_index)):

	347 worker_state = WorkerState(prefix, worker_validator)

	348

	349 try:

	350 dfa_traversal.TraverseTree(

	351 dfa.states[state_index],

	352 final_callback=worker_state.ReceiveInstruction,

	353 prefix=prefix,

	354 anyfield=0)

	355 if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions

	356 worker_state.output = Compressed(set(worker_state.output),

	357 compressors,

	358 worker_state.RecordTrace)

	359 except Exception as e:

	360 traceback.print_exc() # because multiprocessing imap swallows traceback

	361 raise

	362

	363 return (

	364 prefix,

	365 worker_state.total_instructions,

	366 worker_state.num_valid,

	367 worker_state.output,

	368 worker_state.trace)

	369

	370

	371 def ParseOptions():

	372 parser = optparse.OptionParser(usage='%prog [options] xmlfile')

	373

	374 parser.add_option('--bitness',

	375 choices=['32', '64'],

	376 help='The subarchitecture: 32 or 64')

	377 parser.add_option('--validator_dll',

	378 help='Path to librdfa_validator_dll')

	379 parser.add_option('--decoder_dll',

	380 help='Path to librdfa_decoder_dll')

	381

	382 options, args = parser.parse_args()

	383 options.bitness = int(options.bitness)

	384

	385 if len(args) != 1:

	386 parser.error('specify one xml file')

	387

	388 (xml_file, ) = args

	389

	390 return options, xml_file

	391

	392

	393 # Version suitable for use in regular expressions

	394 REGISTERS_RE = REGISTERS.copy()

	395 REGISTERS_RE['st(0)'] = [ r'st${}$'.format(N) for N in range(8) ]

	396

	397 # Index names in 'natual' order (as defined by IA32/x86-64 ABI)

	398 INDEXES = {

	399 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],

	400 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],

	401 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]

	402 }

	403 # Register which can be used as base in 64-bit mode in all their incarnations.

	404 X86_64_BASE_REGISTERS = set([

	405 '%spl', '%bpl', '%r15b',

	406 '%sp', '%bp', '%r15w',

	407 '%esp', '%ebp', '%r15d',

	408 '%rsp', '%rbp', '%r15',

	409 '%rip'

	410 ])

	411

	412

	413 def InstructionIsDangerous(input, output, register_write,

	414 writes_to, memory_accessed=False,

	415 base_text='%riz', index_text='%riz'):

	416 """ Check if instruction with given replacements will be dangerous

	417

	418 Args:

	419 input: input argument

	420 output: output argument

	421 register_write: three-state selector

	422 'sandbox' - instruction can be used to produce "restricted register"

	423 'protect' - instruction can damage output, protect "special registers"

	424 'ignore' - instruction does not affect it's operands (e.g. test) or

	425 is used with non-GP registers (X87, MMX, XMM, etc)

	426 memory_accessed: True if instruction accesses memory

	427 base: base register (if memory is accessed)

	428 index: index register (if memory is accessed)

	429

	430 Returns:

	431 True if instruction should be rejected by validator

	432 """

	433 if memory_accessed:

	434 if base_text not in X86_64_BASE_REGISTERS:

	435 return True

	436 # Surprisingly enough %r15 is considered "valid index register" by our

	437 # validator: it can not be ever produced as "restricted register" which

	438 # means that such instruction can not ever occur in real program (it's

	439 # guaranteed because ACCEPTABLE_X86_64_OUTPUT_REGISTERS excludes it), but

	440 # when instruction is tested in isolation it's allowed.

	441 if index_text in X86_64_BASE_REGISTERS - set(['%r15']):

	442 return True

	443 if register_write == 'protect' and output in X86_64_BASE_REGISTERS:

	444 return True

	445 if register_write == 'sandbox' and output == '%r15d':

	446 return True

	447 if writes_to == 'both' and input in X86_64_BASE_REGISTERS:

	448 return True

	449 return False

	450

	451

	452 def AppendOperandsReplacement(replacement, rm_text, reg, modrm, writes_to):

	453 """ Appends replacement text to replacement list

	454

	455 Args:

	456 replacement: replacement list

	457 rm_text: replacement for rm field

	458 reg: register kind (or None if reg field is used as opcode extension)

	459 modrm: modrm byte

	460 writes_to: three-state selector

	461 'reg' - instruction uses rm as source, reg as destination

	462 'rm' - instruction uses reg as source, rm as destination

	463 'both' - instruction writes to both reg and rm

	464

	465 Returns:

	466 input: textual representation of input argument

	467 output: textual representation of output argument

	468

	469 Side-effect:

	470 output (if reg is None) or (input, output) tuple (if reg is not None)

	471 are added to replacement list.

	472 """

	473 if reg is None:

	474 assert writes_to == 'rm'

	475 input, output = None, rm_text

	476 replacement.append(output)

	477 else:

	478 reg_field = (modrm >> 3) & 0x07

	479 reg_text = '%' + REGISTERS[reg][reg_field]

	480 if writes_to == 'reg':

	481 input, output = rm_text, reg_text

	482 else: # rm, both

	483 input, output = reg_text, rm_text

	484 replacement.extend([input, output])

	485 return input, output

	486

	487

	488 def ModRMRegisterReplacements(rm, reg=None, writes_to='rm', opcode_bits=0,

	489 register_write='ignore', note=''):

	490 """Creates replacement tuples list for register-to-register instructions

	491

	492 Args:

	493 rm: rm operand kind (see REGISTERS array)

	494 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	495 writes_to: three-state selector

	496 'reg' - instruction uses rm as source, reg as destination

	497 'rm' - instruction uses reg as source, rm as destination

	498 'both' - instruction writes to both reg and rm

	499 opcode_bits: opcode extensions code (used when reg is None)

	500 register_write: three-state selector

	501 'sandbox' - instruction can be used to produce "restricted register"

	502 'protect' - instruction can damage output, protect "special registers"

	503 'ignore' - instruction does not affect it's operands (e.g. test) or

	504 is used with non-GP registers (X87, MMX, XMM, etc)

	505 note: note to include at the end of input instruction

	506 Returns:

	507 List of replacement tuples

	508 """

	509 # Reg field can be used either as reg or as opcode extension, but not both

	510 assert reg is None or opcode_bits == 0

	511

	512 output_key = (options.bitness, reg, rm, writes_to, opcode_bits,

	513 register_write)

	514 if output_key in ModRMRegisterReplacements.replacements:

	515 return ModRMRegisterReplacements.replacements[output_key]

	516

	517 replacements = []

	518

	519 # Two upper bits of ModR/M byte (mod field) must be equal to 11

	520 # This gives us range from 0xc0 to 0xff but we are going from the

	521 # end to make rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	522 if reg is None:

	523 # reg field is used as opcode extension

	524 byte_range = [byte

	525 for byte in range(0xff, 0xbf, -1)

	526 if (byte >> 3) & 0x7 == opcode_bits]

	527 else:

	528 byte_range = range(0xff, 0xbf, -1)

	529

	530 for modrm in byte_range:

	531 rm_field = (modrm & 0x07)

	532 rm_text = '%' + REGISTERS[rm][rm_field]

	533 byte_text = '{:02x}'.format(modrm)

	534 replacement = [byte_text]

	535 input, output = AppendOperandsReplacement(

	536 replacement, rm_text, reg, modrm, writes_to)

	537 if options.bitness == 64:

	538 replacement.append('any_nonspecial') # input_rr

	539 replacement.append(output if register_write == 'sandbox' else None)

	540 if InstructionIsDangerous(input, output, register_write, writes_to):

	541 continue

	542 replacement.append(note)

	543 replacements.append(tuple(replacement))

	544 ModRMRegisterReplacements.replacements[output_key] = tuple(replacements)

	545 return ModRMRegisterReplacements.replacements[output_key]

	546 ModRMRegisterReplacements.replacements = {}

	547

	548

	549 def BaseOnlyMemoryOperand(modrm, base):

	550 """Creates replacement tuples list for register-to-memory instructions

	551 (base only, no SIB)

	552

	553 Args:

	554 modrm: modrm byte

	555 base: register kind for base

	556 Returns:

	557 bytes_text: replacement for "bytes" group

	558 rm_text: textual representation of "rm" argument

	559 base_text: textual representation of "base" register

	560 """

	561 mod_field = (modrm >> 6) & 0x03

	562 rm_field = (modrm & 0x07)

	563 base_text = '%' + REGISTERS[base][rm_field]

	564 # If RM field == %rbp and MOD field is zero then it's absolute address

	565 # in 32-bit mode and %rip-based address in 64-bit mode

	566 if mod_field == 0 and rm_field == validator.REG_RBP:

	567 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	568 rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'

	569 base_text = '%eiz' if options.bitness == 32 else '%rip'

	570 # Memory access with just a base register

	571 elif mod_field == 0:

	572 bytes_text = '{:02x}'.format(modrm)

	573 rm_text = '({})'.format(base_text)

	574 # Memory access with base and 8bit offset

	575 elif mod_field == 1:

	576 bytes_text = '{:02x} 00'.format(modrm)

	577 rm_text = '0x0({})'.format(base_text)

	578 # Memory access with base and 32bit offset

	579 else: # mod_field == 2

	580 bytes_text = '{:02x} 00 00 00 00'.format(modrm)

	581 rm_text = '0x0({})'.format(base_text)

	582 return bytes_text, rm_text, base_text

	583

	584

	585 def SIBMemoryOperand(modrm, sib, base, index):

	586 """Creates replacement tuples list for register-to-memory instructions

	587 (base only, no SIB)

	588

	589 Args:

	590 modrm: modrm byte

	591 base: register kind for base

	592 Returns:

	593 bytes_text: replacement for "bytes" group

	594 rm_text: textual representation of "rm" argument

	595 base_text: textual representation of "base" register

	596 index_text: textual representation of "index" register

	597 """

	598 mod_field = (modrm >> 6) & 0x03

	599 scale_field = (sib >> 6) & 0x03

	600 index_field = (sib >> 3) & 0x07

	601 base_field = (sib & 0x07)

	602 index_text = '%' + INDEXES[index][index_field]

	603 base_text = '%' + REGISTERS[base][base_field]

	604 scale_text = str(1 << scale_field)

	605 # If BASE is %rbp and MOD == 0 then index with 32bit offset is used

	606 if mod_field == 0 and base_field == validator.REG_RBP:

	607 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	608 if (options.bitness == 64 and

	609 index_text == '%riz' and

	610 scale_text == '1'):

	611 rm_text = '0x0'

	612 else:

	613 rm_text = '0x0(,{},{})'.format(index_text, scale_text)

	614 # There are no base in this case

	615 base_text = '%eiz' if options.bitness == 32 else '%riz'

	616 # Memory access with base and index (no offset)

	617 elif mod_field == 0:

	618 bytes_text = '{:02x} {:02x}'.format(modrm, sib)

	619 rm_text = '({},{},{})'.format(base_text, index_text, scale_text)

	620 # Memory access with base, index and 8bit offset

	621 elif mod_field == 1:

	622 bytes_text = '{:02x} {:02x} 00'.format(modrm, sib)

	623 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	624 # Memory access with base, index and 32bit offset

	625 elif mod_field == 2:

	626 bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

	627 rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

	628 # Pretty-printing of access via %rsp (or %r12)

	629 if (base_field == validator.REG_RSP and

	630 index_text in ('%eiz', '%riz') and

	631 scale_text == '1'):

	632 if mod_field == 0: # no offset

	633 rm_text = '({})'.format(base_text)

	634 else: # 8-bit or 32-bit offset

	635 rm_text = '0x0({})'.format(base_text)

	636 return bytes_text, rm_text, base_text, index_text

	637

	638

	639 def ModRMMemoryReplacements(reg=None, writes_to='rm', opcode_bits=0,

	640 memory_accessed=True, register_write='ignore',

	641 base_r8=False, index_r8=False, note=''):

	642 """Creates replacement tuples list for register-to-memory instructions

	643

	644 Args:

	645 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	646 writes_to: three-state selector

	647 'reg' - instruction uses rm as source, reg as destination

	648 'rm' - instruction uses reg as source, rm as destination

	649 'both' - instruction writes to both reg and rm

	650 opcode_bits: opcode extensions code (used when reg is None)

	651 memory_accessed: True if instruction accesses memory

	652 register_write: three-state selector

	653 'sandbox' - instruction can be used to produce "restricted register"

	654 'protect' - instruction can damage output, protect "special registers"

	655 'ignore' - instruction does not affect it's operands (e.g. test) or

	656 is used with non-GP registers (X87, MMX, XMM, etc)

	657 index_r8: True if REX.X bit in the instruction set to 1

	658 note: note to include at the end of input instruction

	659

	660 Returns:

	661 List of replacement tuples

	662 """

	663 # Reg field can be used either as reg or as opcode extension, but not both

	664 assert reg is None or opcode_bits == 0

	665

	666 output_key = (options.bitness, reg, writes_to, opcode_bits,

	667 base_r8, index_r8, memory_accessed, register_write)

	668 if output_key in ModRMMemoryReplacements.replacements:

	669 return ModRMMemoryReplacements.replacements[output_key]

	670

	671 if options.bitness == 32:

	672 base = 'eax'

	673 index = 'eax'

	674 else:

	675 base = 'r8' if base_r8 else 'rax'

	676 index = 'r8' if index_r8 else 'rax'

	677

	678 replacements = []

	679

	680 # Two upper bits of ModR/M byte (mod field) must be equal to 00, 01, or 10

	681 # This gives us range from 0x00 to 0xbf but we are going from the end to make

	682 # rejection faster (%r15 is equal to 0x7 and %rbp is 0x5).

	683 if reg is None:

	684 # reg field is used as opcode extension

	685 byte_range = [byte

	686 for byte in range(0xbf, -1, -1)

	687 if (byte >> 3) & 0x7 == opcode_bits]

	688 else:

	689 byte_range = range(0xbf, -1, -1)

	690

	691 for modrm in byte_range:

	692 # If RM field != %rsp then there are no SIB byte

	693 if (modrm & 0x07) != validator.REG_RSP:

	694 bytes_text, rm_text, base_text = BaseOnlyMemoryOperand(modrm, base)

	695 replacement = [bytes_text]

	696 input, output = AppendOperandsReplacement(

	697 replacement, rm_text, reg, modrm, writes_to)

	698 if options.bitness == 64:

	699 replacement.append('any_nonspecial')

	700 # If writes_to is equal to 'reg' then output is register

	701 if writes_to == 'reg' and register_write == 'sandbox':

	702 replacement.append(output)

	703 else:

	704 # Note that instruction like xchg could write to another register:

	705 # it's "input"! But validator currently does not support this case

	706 # thus we are ignoring it here and writing "None" in this case, too.

	707 replacement.append(None)

	708 if InstructionIsDangerous(input, output, register_write, writes_to,

	709 memory_accessed, base_text):

	710 continue

	711 replacement.append(note)

	712 replacements.append(tuple(replacement))

	713 else:

	714 # If RM field == %rsp then we have SIB byte

	715 for sib in xrange(0x100):

	716 bytes_text, rm_text, base_text, index_text = SIBMemoryOperand(

	717 modrm, sib, base, index)

	718 replacement = [bytes_text]

	719 input, output = AppendOperandsReplacement(

	720 replacement, rm_text, reg, modrm, writes_to)

	721 if options.bitness == 64:

	722 if not memory_accessed or index_text == '%riz':

	723 replacement.append('any_nonspecial')

	724 else:

	725 if index_r8:

	726 # Convert %r8 to %r8d, %r9 to %r9d, etc

	727 replacement.append(index_text + 'd')

	728 else:

	729 # Convert %rax to %eax, %rsp to %esp, etc

	730 replacement.append('%e' + index_text[2:])

	731 # If writes_to is equal to 'reg' then output is register

	732 if writes_to == 'reg' and register_write == 'sandbox':

	733 replacement.append(output)

	734 else:

	735 # Note that instruction like xchg could write to another register:

	736 # it's "input"! But validator currently does not support this case

	737 # thus we are ignoring it here and writing "None" in this case, too.

	738 replacement.append(None)

	739 if InstructionIsDangerous(input, output, register_write, writes_to,

	740 memory_accessed, base_text, index_text):

	741 continue

	742 replacement.append(note)

	743 replacements.append(tuple(replacement))

	744 ModRMMemoryReplacements.replacements[output_key] = tuple(replacements)

	745 return ModRMMemoryReplacements.replacements[output_key]

	746 ModRMMemoryReplacements.replacements = {}

	747

	748

	749 # Map from "REX bit off" group of registers to "REX bit on" group of registers

	750 r8 = {

	751 'al': 'r8b',

	752 'ax': 'r8w',

	753 'eax': 'r8d',

	754 'rax': 'r8',

	755 'mm0': 'mmalt',

	756 'xmm0': 'xmm8',

	757 'ymm0': 'ymm8'

	758 }

	759

	760

	761 def RegisterKinds():

	762 """ Return list of register kinds we process with register compressors """

	763

	764 if options.bitness == 32:

	765 return ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')

	766 else:

	767 return ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0',

	768 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8')

	769

	770

	771 def RegisterKindPairs():

	772 """ Return hand-picked pairs which we must consider in compressors """

	773

	774 if options.bitness == 32:

	775 return (

	776 ( 'al', 'al'),

	777 ( 'ax', 'al'),

	778 ( 'al', 'ax'),

	779 ( 'ax', 'ax'),

	780 ( 'eax', 'al'),

	781 ( 'al', 'eax'),

	782 ( 'eax', 'ax'),

	783 ( 'ax', 'eax'),

	784 ( 'eax', 'eax'),

	785 ( 'eax', 'mm0'),

	786 ( 'mm0', 'eax'),

	787 ( 'eax', 'xmm0'),

	788 ('xmm0', 'eax'),

	789 ( 'mm0', 'mm0'),

	790 ( 'mm0', 'xmm0'),

	791 ('xmm0', 'mm0'),

	792 ('xmm0', 'xmm0'),

	793 ('xmm0', 'ymm0'),

	794 ('ymm0', 'xmm0'),

	795 ('ymm0', 'ymm0')

	796 )

	797 else:

	798 return (

	799 ( 'al', 'al'),

	800 ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),

	801 ( 'ax', 'al'),

	802 ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),

	803 ( 'al', 'ax'),

	804 ( 'spl', 'ax'), ( 'spl', 'r8w'), ( 'r8b', 'ax'), ( 'r8b', 'r8w'),

	805 ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),

	806 ( 'eax', 'al'),

	807 ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),

	808 ( 'al', 'eax'),

	809 ( 'spl', 'eax'), ( 'spl', 'r8d'), ( 'r8b', 'eax'), ( 'r8b', 'r8d'),

	810 ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),

	811 ( 'ax', 'eax'), ( 'ax', 'r8d'), ( 'r8w', 'eax'), ( 'r8w', 'r8d'),

	812 ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),

	813 ( 'rax', 'al'),

	814 ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),

	815 ( 'al', 'rax'),

	816 ( 'spl', 'rax'), ( 'spl', 'r8'), ( 'r8b', 'rax'), ( 'r8b', 'r8'),

	817 ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'),

	818 ( 'ax', 'rax'), ( 'ax', 'r8'), ( 'r8w', 'rax'), ( 'r8w', 'r8'),

	819 ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'),

	820 ( 'eax', 'rax'), ( 'eax', 'r8'), ( 'r8d', 'rax'), ( 'r8d', 'r8'),

	821 ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),

	822 ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'),

	823 ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'),

	824 ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'),

	825 ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'),

	826 ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),

	827 ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),

	828 ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),

	829 ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),

	830 ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'),

	831 ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'),

	832 ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'),

	833 ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),

	834 ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),

	835 ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),

	836 ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')

	837 )

	838

	839

	840 def ProtectIndexRegister(registers_interval):

	841 """ Remove index-forbidden regsiters from the registers_interval """

	842

	843 replacements = {

	844 '%al..%dil': '%al\|%cl\|%dl\|%bl\|%sil\|%dil',

	845 '%ax..%di': '%ax\|%cx\|%dx\|%bx\|%si\|%di',

	846 '%eax..%edi': '%eax\|%ecx\|%edx\|%ebx\|%esi\|%edi',

	847 '%rax..%rdi': '%rax\|%rcx\|%rdx\|%rbx\|%rsi\|%rdi',

	848 '%al..%r15b': '%al..%bl\|%ah..%bh\|%sil..%r15b',

	849 '%ax..%r15w': '%ax..%bx\|%si..%r15w',

	850 '%eax..%r15d': '%eax..%ebx\|%esi..%r15d',

	851 '%rax..%r15': '%rax..%rbx\|%rsi..%r15'

	852 }

	853 return replacements.get(registers_interval, registers_interval)

	854

	855

	856 def ProtectBaseRegister(registers_interval):

	857 """ Remove base-allowed from the registers_interval """

	858

	859 replacements = {

	860 '%al..%r15b': '%al..%bl\|%ah..%bh\|%sil..%r14b',

	861 '%ax..%r15w': '%ax..%bx\|%si..%r14w',

	862 '%eax..%r15d': '%eax..%ebx\|%esi..%r14d',

	863 '%rax..%r15': '%rax..%rbx\|%rsi..%r14',

	864 '%r8b..%r15b': '%r8b..%r14b',

	865 '%r8w..%r15w': '%r8w..%r14w',

	866 '%r8d..%r15d': '%r8d..%r14d',

	867 '%r8..%r15': '%r8..%r14'

	868 }

	869 if registers_interval in replacements:

	870 return replacements.get(registers_interval, registers_interval)

	871 else:

	872 return ProtectIndexRegister(registers_interval)

	873

	874

	875 def RegisterRegex(register_kind, opcode_bits = 0):

	876 """ Returns modrm regex for the first register """

	877

	878 regex_mod = '^.?({:02x})'.format(0xc0 + opcode_bits 8)

	879 regex_rm = '(%' + REGISTERS[register_kind][0] + ')'

	880 return regex_mod, regex_rm

	881

	882

	883 def MemoryRegex(base_r8, index_r8, opcode_bits = 0):

	884 """ Returns one regex for bytes and one for the textual representation. """

	885

	886 # We only need to process ModR/M+SIB '04 04' or '04 07' here. We only match

	887 # memory operands without offset, so we don't need to add zeros to the end of

	888 # regex_modrm and regex_modrm regular expressions.

	889 #

	890 # We pick version which is "always there" (in both 32bit and 64bit mode) and

	891 # which is not 0x00 (to avoid confusion with offsets and immediates).

	892 if base_r8:

	893 regex_modrm_sib = '^.?({:02x} 07)'.format(0x04 + opcode_bits 8)

	894 else:

	895 regex_modrm_sib = '^.?({:02x} 04)'.format(0x04 + opcode_bits 8)

	896 if options.bitness == 32:

	897 regex_mem = r'($%esp,%eax,1$)'

	898 elif base_r8:

	899 regex_mem = r'($%r15,%r8,1$)' if index_r8 else r'($%r15,%rax,1$)'

	900 else:

	901 regex_mem = r'($%rsp,%r8,1$)' if index_r8 else r'($%rsp,%rax,1$)'

	902 return regex_modrm_sib, regex_mem

	903

	904

	905 def RegToRmCompressor(rm, reg, writes_to, memory_accessed=True,

	906 register_write='ignore', index_r8=False,

	907 rm_compression='register', is_3Dnow=False, notes=''):

	908 """Returns a list of reg <-> rm compressors for a given set of parameters.

	909

	910 Args:

	911 rm: rm operand kind (see REGISTERS array)

	912 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	913 writes_to: three-state selector

	914 'reg' - instruction uses rm as source, reg as destination

	915 'rm' - instruction uses reg as source, rm as destination

	916 'both' - instruction writes to both reg and rm

	917 memory_accessed: True if instruction accesses memory

	918 register_write: three-state selector

	919 'sandbox' - instruction can be used to produce "restricted register"

	920 'protect' - instruction can damage output, protect "special registers"

	921 'ignore' - instruction does not affect it's operands (e.g. test) or

	922 is used with non-GP registers (X87, MMX, XMM, etc)

	923 index_r8: True if REX.X bit in the instruction set to 1

	924 rm_compression: three-state selector

	925 'register' - instruction supports register-only rm parameter

	926 'memory' - instruction supports memory-only rm parameter

	927 'register_or_memory' - instruction supports all kinds of rm parameters

	928 is_3DNow - True if instruction is 3DNow! style (opcode in immidiate)

	929 notes - Notes to add to the description

	930

	931 Returns:

	932 List of compressors

	933 """

	934

	935 start_reg = REGISTERS[reg][0]

	936 end_reg = REGISTERS[reg][-1]

	937 mark_reg = '%{}..%{}'.format(start_reg, end_reg)

	938 # Exclude rbp/rsp/r15 from the interval, if instruction can write to reg.

	939 if register_write != 'ignore' and writes_to in ('reg', 'both'):

	940 mark_reg = ProtectBaseRegister(mark_reg)

	941 start_rm = REGISTERS[rm][0]

	942 end_rm = REGISTERS[rm][-1]

	943 mark_rm = '%{}..%{}'.format(start_rm, end_rm)

	944 # Exclude rbp/rsp/r15 from the interval, if instruction can write to rm.

	945 if register_write != 'ignore' and writes_to in ('rm', 'both'):

	946 mark_rm = ProtectBaseRegister(mark_rm)

	947 # Prepare substitution texts

	948 subst_reg = '[{}]'.format(mark_reg)

	949 if rm_compression == 'register':

	950 subst_rm = '[{}]'.format(mark_rm)

	951 elif rm_compression == 'register_or_memory':

	952 subst_rm = '[{} or memory]'.format(mark_rm)

	953 else:

	954 assert rm_compression == 'memory'

	955 subst_rm = '[memory]'

	956 base_r8 = rm in r8.values()

	957 regex, regex_reg = RegisterRegex(reg)

	958 if rm_compression == 'register':

	959 regex, regex_rm = RegisterRegex(rm)

	960 else: # memory and register_or_memory

	961 regex, regex_rm = MemoryRegex(base_r8, index_r8)

	962 if is_3Dnow:

	963 # Immediate byte is opcode extension with 3DNow! instructions.

	964 regex += ' [0-9a-fA-F][0-9a-fA-F]'

	965 else:

	966 # Accept immediate (which is always zeros in our case).

	967 regex += '(?: 00)*'

	968 # 2 spaces separate byte code from instruction name.

	969 regex += ' '

	970 # Optional "lock" prefix.

	971 regex += '(?:lock )?'

	972 # Instruction name.

	973 regex += r'\w* '

	974 # Immediate or implicit operand that can precede reg/rm pair.

	975 regex += r'(?:\$0x0,\|\$0x0,\$0x0,\|%cl,\|%xmm0,)?'

	976 regex_output_rr = None

	977 subst_output_rr = None

	978 if writes_to == 'reg':

	979 regex += regex_rm + ',' + regex_reg

	980 if register_write == 'sandbox':

	981 assert reg in ('eax', 'r8d')

	982 regex_output_rr = '%' + reg

	983 subst_output_rr = subst_reg

	984 subst = ('XX', subst_rm, subst_reg)

	985 else:

	986 regex += regex_reg + ',' + regex_rm

	987 if register_write == 'sandbox':

	988 assert rm in ('eax', 'r8d')

	989 # Instruction can be sandboxing one and yet produce output_rr == None

	990 # This looks strange, but it's perfectly logical because we can't ever

	991 # sandbox memory!

	992 if rm_compression == 'register':

	993 regex_output_rr = '%' + rm

	994 if rm_compression != 'memory':

	995 subst_output_rr = '[{}]'.format(mark_rm)

	996 subst = ('XX', subst_reg, subst_rm)

	997 # Skip implicit arguments (if any)

	998 regex += '.*'

	999 if options.bitness == 64:

	1000 if memory_accessed and rm_compression != 'register':

	1001 regex += '; input_rr=(%r8d)' if index_r8 else '; input_rr=(%eax)'

	1002 subst_input_rr = '%r8d..%r15d' if index_r8 else '%eax..%edi'

	1003 subst_input_rr = '[{}]'.format(ProtectIndexRegister(subst_input_rr))

	1004 else:

	1005 regex += '; input_rr=(any_nonspecial)'

	1006 subst_input_rr = 'any_nonspecial'

	1007 regex += '; output_rr=({})'.format(regex_output_rr)

	1008 subst = subst + (subst_input_rr, subst_output_rr)

	1009 # With "or memory" or "memory" compressors we always can see where is

	1010 # reg and where is rm, but with reg to rm or rm to reg it's impossible.

	1011 # Add the appropriate comment

	1012 notes = () if notes == '' else (notes, )

	1013 if rm_compression == 'register':

	1014 notes += ('rm to reg' if writes_to == 'reg' else 'reg to rm', )

	1015 notes = '; '.join(notes)

	1016 if notes != '':

	1017 notes = ' # ' + notes

	1018 subst += (notes, )

	1019 regex += '()$'

	1020 replacement = ()

	1021 if rm_compression != 'memory':

	1022 replacement += ModRMRegisterReplacements(

	1023 reg=reg, rm=rm, writes_to=writes_to, register_write=register_write)

	1024 if rm_compression != 'register':

	1025 replacement += ModRMMemoryReplacements(

	1026 reg=reg, base_r8=base_r8, index_r8=index_r8, writes_to=writes_to,

	1027 memory_accessed=memory_accessed, register_write=register_write)

	1028 return Compressor(regex, subst, replacement)

	1029

	1030

	1031 def RegToRmCompressors():

	1032 """ Return list of all Reg to RM (and RM to Reg) compressors. """

	1033

	1034 compressors = []

	1035

	1036 for reg, rm in RegisterKindPairs():

	1037 instruction_kinds = [

	1038 # Normal instructions with two operands (rm to reg)

	1039 {'writes_to': 'reg'},

	1040 # Normal instructions with two operands (reg to rm)

	1041 {'writes_to': 'rm'}

	1042 ]

	1043 # In 64 bit mode we have many different types of instructions depending

	1044 # on whether we are accessing memory or whether we are writing to registers.

	1045 if options.bitness == 64:

	1046 # Lea in 64 bit mode is truly unique instruction for now

	1047 if reg in ('ax', 'r8w', 'eax', 'r8d', 'rax', 'r8'):

	1048 register_write = 'sandbox' if reg in ('eax', 'r8d') else 'protect'

	1049 instruction_kinds.append(

	1050 {'writes_to': 'reg',

	1051 'memory_accessed': False,

	1052 'register_write': register_write,

	1053 'notes': 'lea'})

	1054 # There are few more forms in 64 bit case (rm to reg)

	1055 if reg in ('eax', 'r8d'):

	1056 # Zero-extending version.

	1057 instruction_kinds.append(

	1058 {'writes_to': 'reg',

	1059 'register_write': 'sandbox'})

	1060 # More forms in 64 bit case (reg to rm)

	1061 if rm in ('eax', 'r8d'):

	1062 # Zero-extending version.

	1063 instruction_kinds.append(

	1064 {'writes_to': 'rm',

	1065 'register_write': 'sandbox'})

	1066 # Zero-extending xchg/xadd

	1067 instruction_kinds.append(

	1068 {'writes_to': 'both',

	1069 'register_write': 'sandbox',

	1070 'notes': 'write to both'})

	1071 # Still more forms for 64 bit case (rm to reg).

	1072 if reg in ('al', 'spl', 'ax', 'eax', 'rax', 'r8b', 'r8w', 'r8d', 'r8'):

	1073 # Dangerous instructions (rm to reg)

	1074 instruction_kinds.append(

	1075 {'writes_to': 'reg',

	1076 'register_write': 'protect'})

	1077 # Still more forms for 64 bit case (reg to rm)

	1078 if rm in ('al', 'spl', 'ax', 'eax', 'rax', 'r8b', 'r8w', 'r8d', 'r8'):

	1079 # Dangerous instructions (reg to rm)

	1080 instruction_kinds.append(

	1081 {'writes_to': 'rm',

	1082 'register_write': 'protect'})

	1083 # Dangerous xchg/xadd

	1084 instruction_kinds.append(

	1085 {'writes_to': 'both',

	1086 'register_write': 'protect',

	1087 'notes': 'write to both'})

	1088 # 3DNow! instructions

	1089 if reg in ('mm0', 'mmalt') or rm in ('mm0', 'mmalt'):

	1090 instruction_kinds.append(

	1091 {'writes_to': 'reg',

	1092 'is_3Dnow': True})

	1093 for instruction_kind in instruction_kinds:

	1094 for rm_compression in ('register', 'memory', 'register_or_memory'):

	1095 if rm_compression == 'register':

	1096 # Register-to-register instructions can not access memory anyway

	1097 # which means special compressors are not needed.

	1098 if 'memory_accessed' in instruction_kind:

	1099 continue

	1100 elif options.bitness == 64:

	1101 # We generate 2 variants of compressors with memory operands to

	1102 # enumerate 2 possible values of index_r8 property. Register only

	1103 # instructions have only one version since they don't use index.

	1104 compressors.append(RegToRmCompressor(

	1105 reg=reg, rm=rm, index_r8=True,

	1106 rm_compression=rm_compression, **instruction_kind))

	1107 # Create "normal" compressor now

	1108 compressors.append(RegToRmCompressor(

	1109 reg=reg, rm=rm,

	1110 rm_compression=rm_compression, **instruction_kind))

	1111 return compressors

	1112

	1113

	1114 def RmCompressor(rm, opcode_bits, memory_accessed=True, index_r8=False,

	1115 rm_compression='register', register_write='ignore'):

	1116 """Returns a list of rm compressors for a given set of parameters.

	1117

	1118 Args:

	1119 rm: rm operand kind (see REGISTERS array)

	1120 memory_accessed: True if instruction accesses memory

	1121 index_r8: True if REX.X bit in the instruction set to 1

	1122 rm_compression: three-state selector

	1123 'register' - instruction supports register-only rm parameter

	1124 'memory' - instruction supports memory-only rm parameter

	1125 'register_or_memory' - instruction supports all kinds of rm parameters

	1126 register_write: three-state selector

	1127 'sandbox' - instruction can be used to produce "restricted register"

	1128 'protect' - instruction can damage output, protect "special registers"

	1129 'ignore' - instruction does not affect it's operands (e.g. test) or

	1130 is used with non-GP registers (X87, MMX, XMM, etc)

	1131

	1132 Returns:

	1133 List of compressors

	1134 """

	1135 start_rm = REGISTERS[rm][0]

	1136 end_rm = REGISTERS[rm][-1]

	1137 mark_rm = '%{}..%{}'.format(start_rm, end_rm)

	1138 # Exclude rbp/rsp/r15 from the interval, if instruction can write to rm.

	1139 if register_write != 'ignore':

	1140 mark_rm = ProtectBaseRegister(mark_rm)

	1141 byte_mark = 'XX/' + str(opcode_bits)

	1142 # Prepare substitution texts

	1143 if rm_compression == 'register':

	1144 subst_rm = '[{}]'.format(mark_rm)

	1145 elif rm_compression == 'register_or_memory':

	1146 subst_rm = '[{} or memory]'.format(mark_rm)

	1147 else:

	1148 assert rm_compression == 'memory'

	1149 subst_rm = '[memory]'

	1150 base_r8 = rm in r8.values()

	1151 if rm_compression == 'register':

	1152 regex, regex_rm = RegisterRegex(rm, opcode_bits)

	1153 else: # memory and register_or_memory

	1154 regex, regex_rm = MemoryRegex(base_r8, index_r8, opcode_bits)

	1155 # Regex here matches everything AFTER the ModR/M (+ SIB) bytes.

	1156 regex += '(?: 00)*'

	1157 # 2 spaces separate byte code from instruction name.

	1158 regex += ' '

	1159 # Optional "lock" prefix.

	1160 regex += '(?:lock )?'

	1161 # Instruction name.

	1162 regex += r'\w* '

	1163 # Immediate or implicit operand that can precede rm argument.

	1164 regex += r'(?:\$0x0,\|%cl,)?'

	1165 # Register or memory access

	1166 regex += regex_rm

	1167 # Skip everything after that

	1168 regex += '.*'

	1169 regex_output_rr = None

	1170 subst_output_rr = None

	1171 subst = (byte_mark, subst_rm)

	1172 if options.bitness == 32:

	1173 subst += ('', )

	1174 else:

	1175 if register_write == 'sandbox':

	1176 assert rm in ('eax', 'r8d')

	1177 # Instruction can be sandboxing one and yet produce output_rr == None

	1178 # This looks strange, but it's perfectly logical because we can't ever

	1179 # sandbox memory!

	1180 if rm_compression == 'register':

	1181 regex_output_rr = '%' + rm

	1182 if rm_compression != 'memory':

	1183 subst_output_rr = '[{}]'.format(mark_rm)

	1184 if memory_accessed and rm_compression != 'register':

	1185 regex += '; input_rr=(%r8d)' if index_r8 else '; input_rr=(%eax)'

	1186 subst_input_rr = '%r8d..%r15d' if index_r8 else '%eax..%edi'

	1187 subst_input_rr = '[{}]'.format(ProtectIndexRegister(subst_input_rr))

	1188 else:

	1189 regex += '; input_rr=(any_nonspecial)'

	1190 subst_input_rr = 'any_nonspecial'

	1191 regex += '; output_rr=({})'.format(regex_output_rr)

	1192 subst = subst + (subst_input_rr, subst_output_rr, '')

	1193 regex += '()'

	1194 replacement = ()

	1195 if rm_compression != 'memory':

	1196 replacement += ModRMRegisterReplacements(

	1197 rm=rm, opcode_bits=opcode_bits, register_write=register_write)

	1198 if rm_compression != 'register':

	1199 replacement += ModRMMemoryReplacements(

	1200 base_r8=base_r8, index_r8=index_r8, opcode_bits=opcode_bits,

	1201 memory_accessed=memory_accessed, register_write=register_write)

	1202 return Compressor(regex, subst, replacement)

	1203

	1204

	1205 def RmCompressors():

	1206 """ Return list of all RM (with reg as opcode extension) compressors. """

	1207 compressors = []

	1208

	1209 for rm in RegisterKinds():

	1210 for opcode_bits in xrange(8):

	1211 instruction_kinds = [

	1212 # The most basic form

	1213 {}

	1214 ]

	1215 # In 64 bit mode we have many different types of instructions depending

	1216 # on whether we are accessing memory or whether we are writing to

	1217 # registers.

	1218 if options.bitness == 64:

	1219 # No memory access (e.g. prefetch)

	1220 instruction_kinds.append(

	1221 {'memory_accessed':False})

	1222 # More forms in 64 bit case.

	1223 if rm in ('eax', 'r8d'):

	1224 # Zero-extending version.

	1225 instruction_kinds.append(

	1226 {'register_write':'sandbox'})

	1227 # Still more forms for 64 bit case (reg to rm).

	1228 if rm in ('al', 'spl', 'ax', 'eax', 'rax',

	1229 'r8b', 'r8w', 'r8d', 'r8'):

	1230 # Dangerous instructions.

	1231 instruction_kinds.append(

	1232 {'register_write':'protect'})

	1233 for instruction_kind in instruction_kinds:

	1234 for rm_compression in ('register', 'memory', 'register_or_memory'):

	1235 if rm_compression == 'register':

	1236 # Register-to-register instructions can not access memory anyway

	1237 # which means special compressors are not needed.

	1238 if 'memory_accessed' in instruction_kind:

	1239 continue

	1240 elif options.bitness == 64:

	1241 # We generate 2 variants of compressors with memory operands to

	1242 # enumerate 2 possible values of index_r8 property. Register only

	1243 # instructions have only one version since they don't use index.

	1244 compressors.append(RmCompressor(

	1245 rm=rm, opcode_bits=opcode_bits, index_r8=True,

	1246 rm_compression=rm_compression, **instruction_kind))

	1247 # Create "normal" compressor now

	1248 compressors.append(RmCompressor(

	1249 rm=rm, opcode_bits=opcode_bits,

	1250 rm_compression=rm_compression, **instruction_kind))

	1251 return compressors

	1252

	1253

	1254 def OpcodeCompressors():

	1255 """ Return "register in opcode" compressors. """

	1256 compressors = []

	1257

	1258 for reg in RegisterKinds() + ('st(0)',):

	1259 for opcode in xrange(8):

	1260 for text1, text2, nibble in (

	1261 ('[0..7]', '[8..f]', xrange(8)),

	1262 ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)),

	1263 ('[0..6]', '[8..e]', xrange(7))

	1264 ):

	1265 start_reg = REGISTERS[reg][0]

	1266 end_reg = REGISTERS[reg][-1]

	1267 mark_reg = '%{}..%{}'.format(start_reg, end_reg)

	1268 if 7 not in nibble:

	1269 if ProtectBaseRegister(mark_reg) != mark_reg:

	1270 mark_reg = ProtectBaseRegister(mark_reg)

	1271 else:

	1272 continue

	1273 subst_reg = '[{}]'.format(mark_reg)

	1274 # Note that we use instruction with 1st register, not 0th register

	1275 # here to avoid ambiguity when opcode is 0x00

	1276 compressors.append(Compressor(

	1277 r'.?[0-9a-fA-F](1)(?: 00)'

	1278 r' \w* (?:\$0x0,\|%ax,\|%st,)?'

	1279 r'(%' + REGISTERS_RE[reg][1] + ').*',

	1280 (text1, subst_reg),

	1281 [('{:x}'.format(n), '%' + REGISTERS[reg][n])

	1282 for n in nibble]))

	1283 compressors.append(Compressor(

	1284 r'.?[0-9a-fA-F](8)(?: 00)'

	1285 r' \w* (?:\$0x0,\|%ax,\|%st,)?'

	1286 r'(%' + REGISTERS_RE[reg][0] + ').*',

	1287 (text2, subst_reg),

	1288 [('{:x}'.format(n + 8), '%' + REGISTERS[reg][n])

	1289 for n in nibble]))

	1290 # Another version for 64 bit case

	1291 if options.bitness == 64 and reg in ('eax', 'r8d'):

	1292 compressors.append(Compressor(

	1293 r'.?[0-9a-fA-F](1)(?: 00)'

	1294 r' \w* (?:\$0x0,\|%ax,\|%st,)?'

	1295 r'(%' + REGISTERS_RE[reg][1] + ').*'

	1296 r'output_rr=(%'+ REGISTERS_RE[reg][1] + ').*',

	1297 (text1, subst_reg, subst_reg),

	1298 [['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2

	1299 for n in nibble]))

	1300 compressors.append(Compressor(

	1301 r'.?[0-9a-fA-F](8)(?: 00)'

	1302 r' \w* (?:\$0x0,\|%ax,\|%st,)?'

	1303 r'(%' + REGISTERS_RE[reg][0] + ').*'

	1304 r'output_rr=(%'+ REGISTERS_RE[reg][0] + ').*',

	1305 (text2, subst_reg, subst_reg),

	1306 [['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2

	1307 for n in nibble]))

	1308 return compressors

	1309

	1310

	1311 def MemoryNonMemoryCompressors():

	1312 """ Return memory/nonmemory compressors. """

	1313

	1314 compressors = []

	1315

	1316 if options.bitness == 32:

	1317 letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', ''))

	1318 else:

	1319 letters_and_registers = (

	1320 ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'),

	1321 ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'),

	1322 ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'),

	1323 ('q', 'rax', 'eax'), ('q', 'r8', 'r8d')

	1324 )

	1325 for letter, reg, out_reg in letters_and_registers:

	1326 start_reg = REGISTERS[reg][0]

	1327 end_reg = REGISTERS[reg][-1]

	1328 for rmR15 in (True, False):

	1329 rm_mark = '%{}..%{}'.format(start_reg, end_reg)

	1330 if not rmR15:

	1331 rm_mark = ProtectBaseRegister(rm_mark)

	1332 all_regs = '[{}]'.format(rm_mark)

	1333 regs_mark = '[{} or memory]'.format(rm_mark)

	1334 if options.bitness == 64:

	1335 start_out = REGISTERS[out_reg][0]

	1336 end_out = REGISTERS[out_reg][-1]

	1337 mark_out = '%{}..%{}'.format(start_out, end_out)

	1338 out_regs = '[{}]'.format(ProtectBaseRegister(mark_out))

	1339 for notes in ('', ' # rm to reg', ' # reg to rm'):

	1340 compressors.append(Compressor(

	1341 r'.* \w(' + letter + r') .(\[memory]).*()',

	1342 ('[{}]?'.format(letter), regs_mark, ''),

	1343 ((letter, '[memory]', ''), ('', all_regs, notes))))

	1344 if options.bitness == 64:

	1345 for index_reg in ('eax', 'r8d'):

	1346 start_index = REGISTERS[index_reg][0]

	1347 end_index = REGISTERS[index_reg][-1]

	1348 index_mark = '%{}..%{}'.format(start_index, end_index)

	1349 index_regs = '[{}]'.format(ProtectIndexRegister(index_mark))

	1350 for output_rrs in ((None, out_regs),

	1351 (out_regs, None),

	1352 (None, None)):

	1353 compressors.append(Compressor(

	1354 r'.* \w(' + letter + r') .(\[memory]).*; '

	1355 r'input_rr=(\[(?:%[a-z0-9](?:\.\.\|\\|))+%[a-z0-9]]); '

	1356 r'output_rr=(\[(?:%[a-z0-9](?:\.\.\|\\|))+%[a-z0-9]]\|None)()',

	1357 ('[{}]?'.format(letter), regs_mark, index_regs,

	1358 output_rrs[1] if output_rrs[0] is None else output_rrs[0],

	1359 ''),

	1360 ((letter, '[memory]', index_regs, output_rrs[0], ''),

	1361 ('', all_regs, 'any_nonspecial', output_rrs[1], notes))))

	1362 return compressors

	1363

	1364

	1365 def RexCompressor(rm, rmR15, reg, regR15, rexw, input_rr, output_rr, rm_to_reg):

	1366 """ Return REX compressor (or nothing) for a given set of paramenters.

	1367

	1368 Args:

	1369 rm: rm operand kind (see REGISTERS array) or None if rm is not used

	1370 reg: reg operand kind (see REGISTERS array) or None if reg is not used

	1371 rmR15: True if R15 register is included

	1372 regR15: True if R15 register is included

	1373 rexw: True if REX.W should be set

	1374 input_rr: True if input_rr is used

	1375 output_rr: true if output_rr is used

	1376 rm_to_reg: True if direction is rm to reg

	1377 """

	1378

	1379 def MakeRegex(string):

	1380 return string.replace('.', r'\.').replace('\|', r'\\|')

	1381

	1382 # reg and rm can be of three different possible intervals

	1383 # start_reg/rm to end_reg/rm (e.g. from %al to %bh)

	1384 # start_reg/rm to end_reg0/rm0 (e.g from %al to %dil)

	1385 # start_reg8/rm8 to end_reg8/rm8 (e.g. from %r8b to %r15b)

	1386 # First form can be observed if there are not REX, second form

	1387 # if REX.R/REX.B is not set and third form is where it's set.

	1388 if reg:

	1389 start_reg = REGISTERS[reg][0]

	1390 start_reg8 = REGISTERS[r8[reg]][0]

	1391 end_reg = REGISTERS[reg][-1]

	1392 end_reg0 = 'dil' if reg == 'al' else end_reg

	1393 end_reg8 = REGISTERS[r8[reg]][-1]

	1394 if rexw:

	1395 mark_reg = '%{}..%{}'.format(start_reg, end_reg0)

	1396 else:

	1397 mark_reg = '%{}..%{}'.format(start_reg, end_reg)

	1398 if not regR15:

	1399 mark_reg = ProtectBaseRegister(mark_reg)

	1400 regex_reg = r'\[({})]'.format(MakeRegex(mark_reg))

	1401 if rm:

	1402 start_rm = REGISTERS[rm][0]

	1403 start_rm8 = REGISTERS[r8[rm]][0]

	1404 end_rm = REGISTERS[rm][-1]

	1405 end_rm0 = 'dil' if rm == 'al' else end_rm

	1406 end_rm8 = REGISTERS[r8[rm]][-1]

	1407 if rexw:

	1408 mark_rm = '%{}..%{}'.format(start_rm, end_rm0)

	1409 else:

	1410 mark_rm = '%{}..%{}'.format(start_rm, end_rm)

	1411 if not rmR15:

	1412 mark_rm = ProtectBaseRegister(mark_rm)

	1413 regex_rm = r'\[({})(?: or memory)?]'.format(MakeRegex(mark_rm))

	1414

	1415 # Legacy prefixes

	1416 regex = '.:(?: 26\| 2e\| 36\| 3e\| 64\| 65\| 66\| 67\| f0\| f2\| f3)'

	1417 # REX

	1418 regex += '( 48).' if rexw else '( 40\|).'

	1419 # Replacement text

	1420 replacement_tuple = (' [REX:48..4f]' if rexw else ' [REX:40..47]?', )

	1421 if reg:

	1422 replacement_regs = '%{}..%{}'.format(start_reg, end_reg8)

	1423 if not regR15:

	1424 replacement_regs = ProtectBaseRegister(replacement_regs)

	1425 if rm:

	1426 replacement_rms = '%{}..%{}'.format(start_rm, end_rm8)

	1427 if not rmR15:

	1428 replacement_rms = ProtectBaseRegister(replacement_rms)

	1429 # Instruction arguments

	1430 if not reg and not rm:

	1431 pass

	1432 elif not reg and rm:

	1433 regex += regex_rm + '.*'

	1434 replacement_tuple += (replacement_rms, )

	1435 elif reg and not rm:

	1436 regex += regex_reg + '.*'

	1437 replacement_tuple += (replacement_regs, )

	1438 elif rm_to_reg:

	1439 regex += regex_rm + ',' + regex_reg + '.*'

	1440 replacement_tuple += (replacement_rms, replacement_regs)

	1441 else:

	1442 regex += regex_reg + ',' + regex_rm + '.*'

	1443 replacement_tuple += (replacement_regs, replacement_rms)

	1444 # Input and output restricted registers

	1445 if input_rr:

	1446 regex += r'input_rr=\[({})].*'.format(

	1447 MakeRegex(ProtectIndexRegister('%eax..%edi')))

	1448 replacement_tuple += (ProtectIndexRegister('%eax..%r15d'), )

	1449 if output_rr:

	1450 regex += r'output_rr=\[({})].*'.format(

	1451 MakeRegex(ProtectIndexRegister('%eax..%edi')))

	1452 replacement_tuple += (ProtectBaseRegister('%eax..%r15d'), )

	1453 # Replacement cases

	1454 replacement_tuples = ()

	1455 for byte in (range(0x48, 0x50) if rexw else range(0x40, 0x48) + [0]):

	1456 replacement_case = (' {:02x}'.format(byte) if byte else '', )

	1457 if rm:

	1458 if byte & 0x1:

	1459 replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8)

	1460 elif byte:

	1461 replacement_rms = '%{}..%{}'.format(start_rm, end_rm0)

	1462 else:

	1463 replacement_rms = '%{}..%{}'.format(start_rm, end_rm)

	1464 if not rmR15:

	1465 replacement_rms = ProtectBaseRegister(replacement_rms)

	1466 if byte & 0x2:

	1467 replacement_index = '%r8d..%r15d'

	1468 else:

	1469 replacement_index = '%eax..%edi'

	1470 if reg:

	1471 if byte & 0x4:

	1472 replacement_regs = '%{}..%{}'.format(start_reg8, end_reg8)

	1473 elif byte:

	1474 replacement_regs = '%{}..%{}'.format(start_reg, end_reg0)

	1475 else:

	1476 replacement_regs = '%{}..%{}'.format(start_reg, end_reg)

	1477 if not regR15:

	1478 replacement_regs = ProtectBaseRegister(replacement_regs)

	1479 if not reg and not rm:

	1480 pass

	1481 elif not reg and rm:

	1482 replacement_case += (replacement_rms, )

	1483 final_rr = '%r8d..%r15d' if byte & 0x1 else '%eax..%edi'

	1484 elif reg and not rm:

	1485 replacement_case += (replacement_regs, )

	1486 final_rr = '%r8d..%r15d' if byte & 0x4 else '%eax..%edi'

	1487 elif rm_to_reg:

	1488 replacement_case += (replacement_rms, replacement_regs)

	1489 final_rr = '%r8d..%r15d' if byte & 0x4 else '%eax..%edi'

	1490 else:

	1491 replacement_case += (replacement_regs, replacement_rms)

	1492 final_rr = '%r8d..%r15d' if byte & 0x1 else '%eax..%edi'

	1493 if input_rr: replacement_case += (ProtectIndexRegister(replacement_index), )

	1494 if output_rr: replacement_case += (ProtectBaseRegister(final_rr), )

	1495 replacement_tuples += (replacement_case, )

	1496 return Compressor(regex, replacement_tuple, replacement_tuples)

	1497

	1498

	1499 def RexCompressors():

	1500 """ Return "REX" compressors which combine different REX prefixes. """

	1501

	1502 if options.bitness != 64:

	1503 return []

	1504

	1505 compressors = []

	1506

	1507 # First pretty complex set of compressors to combine versions of REX with

	1508 # three lowest bits in different states.

	1509 register_kind_pairs = (

	1510 ( None, None),

	1511 ( 'al', 'al'), ( 'al', None), (None, 'al'),

	1512 ( 'ax', 'al'), ( 'al', 'ax'),

	1513 ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'),

	1514 ( 'eax', 'al'), ( 'al', 'eax'),

	1515 ( 'eax', 'ax'), ( 'ax', 'eax'),

	1516 ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'),

	1517 ( 'rax', 'al'), ( 'al', 'rax'),

	1518 ( 'rax', 'ax'), ( 'ax', 'rax'),

	1519 ( 'rax', 'eax'), ( 'eax', 'rax'),

	1520 ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'),

	1521 ( 'eax', 'mm0'), ( 'mm0', 'eax'),

	1522 ( 'rax', 'mm0'), ( 'mm0', 'rax'),

	1523 ( 'mm0', 'eax'), ( 'eax', 'mm0'),

	1524 ( 'mm0', 'rax'), ( 'rax', 'mm0'),

	1525 ( 'eax', 'xmm0'),

	1526 ( 'rax', 'xmm0'),

	1527 ('xmm0', 'eax'),

	1528 ('xmm0', 'rax'),

	1529 ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'),

	1530 ( 'mm0', 'xmm0'),

	1531 ('xmm0', 'mm0'),

	1532 ('xmm0', 'xmm0'),

	1533 ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'),

	1534 ('ymm0', 'xmm0'),

	1535 ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'),

	1536 )

	1537 for reg, rm in register_kind_pairs:

	1538 for regR15 in (True, False):

	1539 for rmR15 in (True, False):

	1540 for rexw in (True, False):

	1541 for input_rr in (True, False):

	1542 for output_rr in (True, False):

	1543 for rm_to_reg in (True, False):

	1544 # These combinations will just produce useless duplicates

	1545 if not reg and not regR15: continue

	1546 if not rm and not rmR15: continue

	1547 if not reg and not rm and (output_rr or rm_to_reg): continue

	1548 compressors.append(RexCompressor(

	1549 rm=rm, rmR15=rmR15, reg=reg, regR15=regR15,

	1550 rexw=rexw, input_rr=input_rr, output_rr=output_rr,

	1551 rm_to_reg=rm_to_reg))

	1552

	1553 # This is pretty simple compressor to combine two lines with different REX.W

	1554 # bits (only if they are otherwise identical).

	1555 compressors.append(Compressor(

	1556 r'.(\[REX:40\.\.47]\?).', ('[REX:40..4f]?', ),

	1557 (('[REX:40..47]?', ), ('[REX:48..4f]', ))))

	1558 return compressors

	1559

	1560

	1561 def SpecialCompressors():

	1562 """ Return all "special" compressors. """

	1563

	1564 compressors = []

	1565

	1566 # Special compressors: will handle some cosmetic issues.

	1567 #

	1568 # SETxx ignores reg field and thus are described as many separate instructions

	1569 compressors.append(Compressor(

	1570 '.0f 9[0-9a-fA-F] XX(/[0-7]) set.', ('', ),

	1571 [('/' + str(i), ) for i in range(8)]))

	1572 # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual

	1573 if options.bitness == 32:

	1574 compressors.append(Compressor(

	1575 '.(XX/1) bswap.ax.*', ('c[8..f]', ), (('XX/1', ), )))

	1576 else:

	1577 compressors.append(Compressor(

	1578 '.(XX/1) bswap.ax.*', ('c[89abef]', ), (('XX/1', ), )))

	1579 compressors.append(Compressor(

	1580 '.(XX/1) bswap.r8.*', ('c[8..e]', ), (('XX/1', ), )))

	1581 # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG

	1582 if options.bitness == 64:

	1583 compressors.append(Compressor(

	1584 r'.* (?:cmpxchg\|xadd\|xchg).%al\.\.%bh[^#]()$',

	1585 (' # write to both', ),

	1586 (('', ), )))

	1587 # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all

	1588 # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.

	1589 # Without this rule these ands comprise 2/3 of the whole output!

	1590 if options.bitness == 32:

	1591 compressors.append(Compressor(

	1592 r'.*83 (e0 01 and \$0x1,%eax)()',

	1593 ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]',

	1594 ' # special and'),

	1595 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]),

	1596 '')

	1597 for i in range(0x01, 0x100) for r in range(8)] +

	1598 [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]',

	1599 '')]))

	1600 else:

	1601 for reg in ('eax', 'r8d'):

	1602 start_reg = REGISTERS[reg][0]

	1603 end_reg = REGISTERS[reg][-1]

	1604 mark_reg = '%{}..%{}'.format(start_reg, end_reg)

	1605 mark_reg = ProtectBaseRegister(mark_reg)

	1606 for index_reg in ('eax', 'r8d'):

	1607 start_index = REGISTERS[index_reg][0]

	1608 end_index = REGISTERS[index_reg][-1]

	1609 mark_index = '%{}..%{}'.format(start_index, end_index)

	1610 mark_index = ProtectIndexRegister(mark_index)

	1611 compressors.append(Compressor(

	1612 r'.83 (e0 01 and \$0x1,%' + reg + ').'

	1613 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()',

	1614 ('XX/4 00 and[l]? $0x0,[{} or memory]'.format(mark_reg),

	1615 '[{}]'.format(mark_index),

	1616 '[{}]'.format(mark_reg),

	1617 ' # special and'),

	1618 [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]),

	1619 'any_nonspecial', '%' + REGISTERS[reg][r], '')

	1620 for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] +

	1621 [('XX/4 00 and[l]? $0x0,[{} or memory]'.format(mark_reg),

	1622 '[{}]'.format(mark_index),

	1623 '[{}]'.format(mark_reg),

	1624 '')]))

	1625

	1626 # "and $e0" and similar are used to align %rsp. All negative values are

	1627 # accepted by validator and there are 127 of these.

	1628 # Consolidate them into one line.

	1629 if options.bitness == 64:

	1630 compressors.append(Compressor(

	1631 r'.(?:81\|83) (?:e4\|e5) (80) (?:00 00 00 \|) and \$0x(80),%r[bs]p.()',

	1632 ('[80..ff]', '[80..ff]', ' # alignment and'),

	1633 [('{:02x}'.format(i), '{:02x}'.format(i), '')

	1634 for i in range(0x80, 0x100)]))

	1635 return compressors

	1636

	1637

	1638 def PrepareCompressors():

	1639 """ Return list of all compressors sorted from bigger ones to smaller ones """

	1640

	1641 return tuple(sorted(

	1642 RegToRmCompressors() +

	1643 RmCompressors() +

	1644 OpcodeCompressors() +

	1645 MemoryNonMemoryCompressors() +

	1646 RexCompressors() +

	1647 SpecialCompressors(),

	1648 key=lambda compressor: -len(compressor.replacements)))

	1649

	1650

	1651 def ShowProgress(rule, instruction):

	1652 if rule not in ShowProgress.rules_shown:

	1653 first_print = True

	1654 ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown)

	1655 else:

	1656 first_print = False

	1657 print >> sys.stderr, '-------- Compressed --------'

	1658 print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule]

	1659 print >> sys.stderr, '--------'

	1660 compressor = compressors[rule]

	1661 match = compressor.regex.match(instruction)

	1662 assert match

	1663 format_str = CompressionTemplate(instruction, match, '{{{}}}')

	1664 replacements = sorted(format_str.format(*replacement)

	1665 for replacement in compressor.replacements)

	1666 if len(compressor.replacements) <= 4 or first_print:

	1667 for replacement in replacements:

	1668 print >> sys.stderr, replacement

	1669 else:

	1670 print >> sys.stderr, replacements[0]

	1671 print >> sys.stderr, '...'

	1672 print >> sys.stderr, replacements[-1]

	1673 print >> sys.stderr, '--------'

	1674 print >> sys.stderr, 'Compressed', format_str.format(*compressor.subst)

	1675 ShowProgress.rules_shown = {}

	1676

	1677

	1678 def main():

	1679 # We are keeping these global to share state graph and compressors

	1680 # between workers spawned by multiprocess. Passing them every time is slow.

	1681 global options, xml_file

	1682 global dfa

	1683 global worker_validator

	1684 options, xml_file = ParseOptions()

	1685 dfa = dfa_parser.ParseXml(xml_file)

	1686 worker_validator = validator.Validator(

	1687 validator_dll=options.validator_dll,

	1688 decoder_dll=options.decoder_dll)

	1689 global compressors

	1690 compressors = PrepareCompressors()

	1691

	1692 assert dfa.initial_state.is_accepting

	1693 assert not dfa.initial_state.any_byte

	1694

	1695 print >> sys.stderr, len(dfa.states), 'states'

	1696

	1697 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)

	1698

	1699 # We can't just write 'num_suffixes[dfa.initial_state]' because

	1700 # initial state is accepting.

	1701 total_instructions = sum(

	1702 num_suffixes[t.to_state]

	1703 for t in dfa.initial_state.forward_transitions.values())

	1704 print >> sys.stderr, total_instructions, 'regular instructions total'

	1705

	1706 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)

	1707 print >> sys.stderr, len(tasks), 'tasks'

	1708

	1709 pool = multiprocessing.Pool()

	1710

	1711 results = pool.imap(Worker, tasks)

	1712

	1713 total = 0

	1714 num_valid = 0

	1715 full_output = set()

	1716 for prefix, count, valid_count, output, trace in results:

	1717 print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))

	1718 total += count

	1719 num_valid += valid_count

	1720 full_output \|= output

	1721 for rule, instruction in trace:

	1722 ShowProgress(rule, instruction)

	1723 for instruction in sorted(Compressed(full_output,

	1724 compressors,

	1725 ShowProgress)):

	1726 print instruction

	1727

	1728 print >> sys.stderr, total, 'instructions were processed'

	1729 print >> sys.stderr, num_valid, 'valid instructions'

	1730

	1731

	1732 if __name__ == '__main__':

	1733 main()

OLD	NEW

« no previous file with comments | « src/trusted/validator_ragel/build.scons ('k') | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »