src/trusted/validator_ragel/compress_regular_instructions.py - Issue 49183002: Regular instructions golden file test.

Unified Diff: src/trusted/validator_ragel/compress_regular_instructions.py

Issue 49183002: Regular instructions golden file test. Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client/

Patch Set: Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/trusted/validator_ragel/compress_regular_instructions.py

===================================================================

--- src/trusted/validator_ragel/compress_regular_instructions.py (revision 0)

+++ src/trusted/validator_ragel/compress_regular_instructions.py (revision 0)

@@ -0,0 +1,998 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""

+Traverse the validator's DFA, collect all "normal" instruction and then

+compress output. Note: "anybyte fields" (immediates and displacements)

+are always filled with zeros. Otherwise processing of sextillions (sic!)

+of possibilities will take too long.

+The following compression rules are present:

+1. Compress ModR/M (+SIB & displacement).

+ Instruction: 00 00 %al,(%rax)

halyavin 2013/11/01 08:17:49 add %al,(%rax) or may be even add %al,(%eax).

khim 2013/11/05 15:04:36 Done.

+ ...

+ Instruction: 00 ff add %bh,%bh

+ becomes

+ Instruction: 00 XX add [%al..%bh],[%al..%bh or memory]

+ Only applies if all possibilities are accepted by validator.

+1a. Compress ModR/M (+SIB & displacement) memory-only.

+ Instruction: f0 01 00 lock add %eax,(%eax)

+ ...

+ Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi)

+ becomes

+ Instruction: f0 01 XX lock add [%eax..edi],[memory]

+ Only applies if all possibile memory accesses are accepted by validator.

halyavin 2013/11/01 08:17:49 possible

khim 2013/11/05 15:04:36 Done.

+1b. Compress ModR/M (+SIB & displacement) register only

+ Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax

+ ...

+ Instruction: 66 0f 50 ff movmskpd %xmm7,%edi

+ becomes

+ Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi]

+ Only applies if all possible register accesses are accepted by validator.

+2. Compress ModR/M (+SIB & displacement) with opcode extension.

+ Instruction: 0f 90 00 seto (%eax)

+ ...

+ Instruction: 0f 90 c7 seto %bh

+ becomes

+ Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

+ Only applies if all possibilities are accepted by validator.

+2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension.

+ Instruction: f0 ff 00 lock incl (%eax)

+ ...

+ Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8)

+ becomes

+ Instruction: f0 ff XX/1 lock decl [memory]

+ Only applies if all possibile memory accesses are accepted by validator.

+2b. Compress ModR/M (+SIB & displacement) register-only with opcode extension.

+ Instruction: 0f 71 d0 00 psrlw $0x0,%mm0

+ ...

+ Instruction: 0f 71 d7 00 psrlw $0x0,%mm7

+ becomes

+ Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%xmm0..%xmm7]

halyavin 2013/11/01 08:17:49 Shouldn't it be %mm0..%mm7?

khim 2013/11/05 15:04:36 Done.

+ Only applies if all possible register accesses are accepted by validator.

+3. Compress register-in-opcode.

+ Instruction: d9 c0 fld %st(0)

+ ...

+ Instruction: d9 c7 fld %st(7)

+ becomes

+ Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)]

+ Only applies if all possible register accesses are accepted by validator.

+4. Special compressor for "set" instruction.

+ Instruction: 0f 90 XX/0 seto [%al..%bh or memory]

+ ...

+ Instruction: 0f 90 XX/1 seto [%al..%bh or memory]

halyavin 2013/11/01 08:17:49 I don't understand what is the difference between

khim 2013/11/05 15:04:36 There are no difference. "set" ignores "reg" field

+ becomes

+ Instruction: 0f 90 XX seto [%al..%bh or memory]

+"""

+import itertools

+import lockfile

bsy 2013/10/31 22:51:54 unused imports. style nits.

khim 2013/11/05 15:04:36 Done.

+import multiprocessing

+import optparse

+import os

+import re

+import subprocess

+import sys

+import tempfile

+import traceback

+import dfa_parser

+import dfa_traversal

+import objdump_parser

+import validator

+import spec

+# Register names in 'natual' order (as defined by IA32/x86-64 ABI)

bsy 2013/10/31 22:51:54 are these equivalence classes? used for... compre

khimg 2013/10/31 23:08:49 These are just names of registers. They are used..

+REGISTERS = {

+ 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ],

+ 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ],

+ 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ],

+ 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ],

+ 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ],

+ 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ],

+ 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ],

+ 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ],

+ 'r8': [ 'r{}'.format(N) for N in range(8,16) ],

+ 'mm0': [ 'mm{}'.format(N) for N in range(8) ],

+ 'st(0)': [ 'st({})'.format(N) for N in range(8) ],

+ 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ],

+ 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ],

+ 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ],

+ 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ]

+NOP = 0x90

+def PadToBundleSize(bytes):

+ assert len(bytes) <= validator.BUNDLE_SIZE

+ return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes))

+ACCEPTABLE_X86_64_INPUTS = {

+ 0x00001: 'input_rr=%eax',

+ 0x00002: 'input_rr=%ecx',

+ 0x00004: 'input_rr=%edx',

+ 0x00008: 'input_rr=%ebx',

+ 0x00010: 'input_rr=%esp',

+ 0x00020: 'input_rr=%ebp',

+ 0x00040: 'input_rr=%esi',

+ 0x00080: 'input_rr=%edi',

+ 0x00100: 'input_rr=%r8d',

+ 0x00200: 'input_rr=%r9d',

+ 0x00400: 'input_rr=%r10d',

+ 0x00800: 'input_rr=%r11d',

+ 0x01000: 'input_rr=%r12d',

+ 0x02000: 'input_rr=%r13d',

+ 0x04000: 'input_rr=%r14d',

+ 0x08000: 'input_rr=%r15d',

+ 0x1ffcf: 'input_rr=any_nonspecial'

+def ValidateInstruction(instruction, validator_inst):

+ bundle = ''.join(map(chr, PadToBundleSize(instruction)))

+ if options.bitness == 32:

+ result = validator_inst.ValidateChunk(bundle, bitness=32)

+ return result

+ else:

+ valid_inputs = 0

+ known_final_rr = False

+ bit_position = 1

+ for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]):

bsy 2013/10/31 22:51:54 bit is not used, since bit_position = 1 << bit is

khim 2013/11/05 15:04:36 Because I forgot to remove "bit" when I've switche

+ valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister(

+ bundle, len(instruction), initial_rr)

+ if valid:

+ valid_inputs |= bit_position

+ # "None" here means there are no restricted register, "False" means we

+ # have no seen anything yet.

+ assert known_final_rr is False or known_final_rr == final_rr

halyavin 2013/11/01 08:17:49 We can use valid_inputs == 0 instead of known_fina

khim 2013/11/05 15:04:36 Done.

+ known_final_rr = final_rr

+ bit_position += bit_position

+ # If nothing is accepted then instruction is not valid. Easy and simple.

+ if valid_inputs == 0: return False

+ # Format output register

+ if known_final_rr is None:

halyavin 2013/11/01 08:17:49 Possible improvement: we can extract converting re

khim 2013/11/05 15:04:36 I'm not sure two-line function used just once will

+ output_rr = 'output_rr=None'

+ elif known_final_rr < validator.REG_R8:

+ output_rr = 'output_rr=%' + REGISTERS['eax'][known_final_rr]

bsy 2013/10/31 22:51:54 why not a single 16 element array?

khimg 2013/10/31 23:08:49 Because this information will eventually go into t

+ else:

+ output_rr = 'output_rr=%' + REGISTERS['r8d'][known_final_rr - 8]

bsy 2013/10/31 22:51:54 Since 2nd half of returned tuple from ValidateAndG

khimg 2013/10/31 23:08:49 Uhm... You are looking on said post-condition, rig

+ return [ACCEPTABLE_X86_64_INPUTS[valid_inputs], output_rr]

+class WorkerState(object):

+ def __init__(self, prefix, validator):

+ self.total_instructions = 0

+ self.num_valid = 0

+ self.validator = validator

+ self.output = set()

+ def ReceiveInstruction(self, bytes):

+ self.total_instructions += 1

+ result = ValidateInstruction(bytes, self.validator)

+ if result is not False:

+ self.num_valid += 1

+ dis = self.validator.DisassembleChunk(

+ ''.join(map(chr, bytes)),

+ bitness=options.bitness)

+ for line_nr in xrange(len(dis)):

+ dis[line_nr] = str(dis[line_nr])

+ assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': '

+ assert dis[line_nr][-1:] == ')'

+ dis[line_nr] = dis[line_nr][17:-1]

+ if '(%rip)' in dis[0]:

+ dis[0] = re.sub(' # 0x[ 0-9]*', '', dis[0])

+ dis[0] = 'Instruction: ' + dis[0]

+ if result is not True:

+ dis += result

+ self.output.add('; '.join(dis))

+# Compressor has three slots: regex (which picks apart given instruction),

+# subst (which is used to denote compressed version) and replacements (which

+# are used to generate set of instructions from a given code).

+# Example compressor:

+# regex = '.*?[0-9a-fA-F]([0-7]) \\w* (%e(?:[abcd]x|[sb]p|[sd]i)).*()'

halyavin 2013/11/01 08:34:29 ".*?" I don't understand to what expression "?" ap

khim 2013/11/05 15:04:36 *? is non-greedy *, look it up in python's manual

+# subst = ('[0-7]', '[%eax..%esi]', ' # register in opcode')

bsy 2013/10/31 22:51:54 7 is %edi, so this example is confusing.

khimg 2013/10/31 23:08:49 It's a typo :-( Sorry for confusion.

+# replacements = ((0, '%eax'), (1, '%ecx'), (2, '%edx'), (3, '%ebx')

+# (4, '%esp'), (5, '%ebp'), (6, '%esi'), (7, '%edi')

halyavin 2013/11/01 08:34:29 Add square brackets: replacements = [(0, '%eax'),.

khim 2013/11/05 15:04:36 It's a tuple, why would I need square brackets her

+# When faced with instriuction '40 inc %eax' it will capture the following

+# pieces of said instruction: '4[0] inc [%eax]'.

+# Then it will produce the following eigth instructions:

halyavin 2013/11/01 08:17:49 eight

khim 2013/11/05 15:04:36 Done.

+# '40 inc %eax'

+# '41 inc %ecx'

+# '42 inc %edx'

+# '43 inc %ebx'

+# '44 inc %esp'

+# '45 inc %ebp'

+# '46 inc %esi'

+# '47 inc %edi'

+# If all these instructions can be found in a set of instructions then

+# compressor will remove them from said set and will insert one replacement

+# "compressed instruction" '4[0-7] inc [%eax..%esi] # register in opcode'.

bsy 2013/10/31 22:51:54 why isn't this [%eax..%edi]?

khimg 2013/10/31 23:08:49 Copy-paste of a typo :-( Will fix that.

+# Note that last group is only used in the replacement. It's used to grab marks

+# added by previous compressors and to replace them with a new mark.

+class Compressor(object):

+ __slots__ = [

+ 'regex',

+ 'subst',

+ 'replacements'

+ ]

+ def __init__(self, regex, subst, replacements=None):

+ self.regex = regex

+ self.subst = subst

+ self.replacements = [] if replacements is None else replacements

+def Compressed(instructions):

+ for instruction in sorted(instructions):

+ for compressor in compressors:

+ match = compressor.regex.match(instruction)

+ if match:

+ pos = 0

+ format = ''

halyavin 2013/11/01 08:34:29 Rename to format_str to avoid confusion with the f

khim 2013/11/05 15:04:36 Done, although I'm not sure how can there be any c

+ for group in range(1, len(match.groups())):

+ format += instruction[pos:match.start(group)] + '{}'

+ pos = match.end(group)

+ format += instruction[pos:match.start(len(match.groups()))]

+ subset = set()

+ for replacement in compressor.replacements:

+ subset.add(format.format(*replacement))

+ if subset <= instructions:

+ instructions -= subset

+ instructions.add((format + '{}').format(*compressor.subst))

+ return Compressed(instructions)

+ return instructions

+def Worker((prefix, state_index)):

+ worker_state = WorkerState(prefix, worker_validator)

+ try:

+ dfa_traversal.TraverseTree(

+ dfa.states[state_index],

+ final_callback=worker_state.ReceiveInstruction,

+ prefix=prefix,

+ anyfield=0)

+ if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions

+ worker_state.output = Compressed(set(worker_state.output))

+ except Exception as e:

+ traceback.print_exc() # because multiprocessing imap swallows traceback

+ raise

+ return (

+ prefix,

+ worker_state.total_instructions,

+ worker_state.num_valid,

+ worker_state.output)

+def ParseOptions():

+ parser = optparse.OptionParser(usage='%prog [options] xmlfile')

+ parser.add_option('--bitness',

+ type=int,

+ help='The subarchitecture: 32 or 64')

+ parser.add_option('--validator_dll',

+ help='Path to librdfa_validator_dll')

+ parser.add_option('--decoder_dll',

+ help='Path to librdfa_decoder_dll')

+ options, args = parser.parse_args()

+ if options.bitness not in [32, 64]:

+ parser.error('specify --bitness 32 or --bitness 64')

+ if len(args) != 1:

+ parser.error('specify one xml file')

+ (xml_file,) = args

+ return options, xml_file

+# Version suitable for use in regular expressions

+REGISTERS_RE = REGISTERS.copy()

+REGISTERS_RE['st(0)'] = [ 'st\${}\$'.format(N) for N in range(8) ]

+REGISTERS_RE['st\$0\$'] = REGISTERS_RE['st(0)']

+# Index names in 'natual' order (as defined by IA32/x86-64 ABI)

+INDEXES = {

+ 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ],

+ 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ],

+ 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ]

+# Register which can not be used as base in 64-bit mode in all incarnations

+X86_64_BASE_REGISTERS = set([

+ '%spl', '%bpl', '%r15b', '%sp', '%bp', '%r15w',

+ '%esp', '%ebp', '%r15d', '%rsp', '%rbp', '%r15',

+ '%rip'

+])

+def AddModRM_Compressor(regex, subst, subst_register, subst_memory,

+ reg=None, rm=None, rm_to_reg=False, start_byte=0,

+ index_r8=False, input_rr=True, output_rr=False):

+ """Adds three compressors to the list of compressors:

+ main_compressors (register <-> register or memory instructions)

+ register_compressors (register <-> register instructions)

+ memory_compressors (regsiter <-> memory instructions)

+ Args:

+ regex: regular expressions for the compressor

+ subst: replacement for register <-> register or memory instructions

+ subst_register: replacement for register <-> register instructions

+ subst_memory: replacement for regsiter <-> memory instructions

+ reg: reg operand kind (see REGISTERS array) or None

+ rm: rm operand kind (see REGISTERS array)

+ rm_to_reg: three-state selector

+ True - instruction uses rm as source, reg as destination

halyavin 2013/11/01 14:08:57 "rm_to_reg", "reg_to_rm", "xchg".

khim 2013/11/06 14:25:15 Done.

+ False - instruction uses reg as source, rm as destination

+ None - instruction either uses both symmetrically (e.g. test or xchg)

+ start_byte: first valid byte ModR/M byte (used when reg is None)

+ input_rr: True if instruction accesses memory

+ output_rr: three-state selector

+ True - instruction can be used to produce "restricted register"

halyavin 2013/11/01 14:08:57 "sandboxing writes", "no GP register writes", "non

khim 2013/11/06 14:25:15 Done.

+ False - instruction does not affect it's operands (e.g. test)

+ None - instruction can damage output but can not be used to restrict it

+ Internal:

+ index_r8: must be called in False position (used to create two compressors

+ in 64-bit mode with index == %rax..%rdi or index == %r8..%r14)

+ Returns:

+ None

+ """

+ if options.bitness == 32:

+ base = 'eax'

+ index = 'eax'

+ expanded_regex = re.sub('{RR_NOTES}', '', regex)

+ else:

+ base = 'r8' if rm[0:2] == 'r8' else 'rax'

+ index = 'r8' if index_r8 else 'rax'

+ input = 'r8d' if index_r8 else 'eax'

+ if output_rr:

+ output_regs = reg if rm_to_reg else rm

+ assert output_regs in ('eax', 'r8d')

+ expanded_regex = re.sub('{RR_NOTES}', '; input_rr=((?:%{'+ input +

+ '}|any_nonspecial)); output_rr=(%{' + output_regs + '}|None)', regex)

+ else:

+ expanded_regex = re.sub('{RR_NOTES}', '; input_rr=((?:%{' + input +

+ '}|any_nonspecial)); output_rr=(None)', regex)

+ if 'RM_BYTE' in regex:

+ address_regex = '(?:0x0|(?:0x0)?\$(?:%{' + base + '})?\$)'

+ else:

+ address_regex = (

+ '(?:0x0|(?:0x0)?\\((?:%{' + base + '})?(?:,(?:%{' + index + '}))?'

+ '(?:,(?:1|2|4|8))?\\))')

+ # We need to process either modrm or reg

+ assert rm is not None or reg is not None

+ # If both modrm and reg are given then ModR/M

+ assert reg is None or start_byte == 0

+ # Replace RM_BYTE placeholders.

+ # Handle only cases without displacement.

+ expanded_regex = re.sub('{RM_BYTE}', '[0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub('{RM_BYTE/0}', '[048cC][0-7]', expanded_regex)

+ expanded_regex = re.sub('{RM_BYTE/1}', '[048cC][89a-fA-F]', expanded_regex)

+ expanded_regex = re.sub('{RM_BYTE/2}', '[159dD][0-7]', expanded_regex)

+ expanded_regex = re.sub('{RM_BYTE/3}', '[159dD][89a-fA-F]', expanded_regex)

+ expanded_regex = re.sub('{RM_BYTE/4}', '[26aAeE][0-7]', expanded_regex)

+ expanded_regex = re.sub('{RM_BYTE/5}', '[26aAeE][89a-fA-F]', expanded_regex)

+ expanded_regex = re.sub('{RM_BYTE/6}', '[37bBfF][0-7]', expanded_regex)

+ expanded_regex = re.sub('{RM_BYTE/7}', '[37bBfF][89a-fA-F]', expanded_regex)

+ register_regex = expanded_regex

+ # Replace RM_SIB_BYTES placeholders.

+ # Handle only cases without displacement.

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES}', '[0-b][4c] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES/0}', '[048]4 [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES/1}', '[048][cC] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES/2}', '[159]4 [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES/3}', '[159][cC] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES/4}', '[26aA]4 [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES/5}', '[26aA][cC] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES/6}', '[37bB]4 [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ expanded_regex = re.sub(

+ '{RM_SIB_BYTES/7}', '[37bB][cC] [0-9a-fA-F][0-9a-fA-F]', expanded_regex)

+ register_regex = re.sub(

+ '{RM_SIB_BYTES}', '[c-fC-F][0-9a-fA-F]', register_regex)

+ register_regex = re.sub('{RM_SIB_BYTES/0}', '[cC][0-7]', register_regex)

+ register_regex = re.sub('{RM_SIB_BYTES/1}', '[cC][8-9a-fA-F]', register_regex)

+ register_regex = re.sub('{RM_SIB_BYTES/2}', '[dD][0-7]', register_regex)

+ register_regex = re.sub('{RM_SIB_BYTES/3}', '[dD][8-9a-fA-F]', register_regex)

+ register_regex = re.sub('{RM_SIB_BYTES/4}', '[eE][0-7]', register_regex)

+ register_regex = re.sub('{RM_SIB_BYTES/5}', '[eE][8-9a-fA-F]', register_regex)

+ register_regex = re.sub('{RM_SIB_BYTES/6}', '[fF][0-7]', register_regex)

+ register_regex = re.sub('{RM_SIB_BYTES/7}', '[fF][8-9a-fA-F]', register_regex)

+ # Replace register placeholders

+ for register, value in REGISTERS_RE.iteritems():

+ expanded_regex = re.sub('{%' + register + '}',

+ '(?:%' + '|%'.join(value) + '|' + address_regex +')', expanded_regex)

+ register_regex = re.sub('{%' + register + '}',

+ '(?:%' + '|%'.join(value) +')', register_regex)

+ for register, value in REGISTERS_RE.iteritems():

+ expanded_regex = re.sub('{' + register + '}',

+ '(?:' + '|'.join(value) + ')', expanded_regex)

+ register_regex = re.sub('{' + register + '}',

+ '(?:' + '|'.join(value) + ')', register_regex)

+ expanded_regex = re.compile(expanded_regex)

+ register_regex = re.compile(register_regex)

+ # Add index_rr and output_rr fields if we are dealing with 64-bit case

+ if options.bitness == 32:

+ subst_fixed = subst

+ subst_register_fixed = subst_register

+ subst_memory_fixed = subst_memory

+ else:

+ if input_rr:

+ input_note = '[%eax..%edi]' if index == 'rax' else '[%r8d..%r15d]'

+ else:

+ input_note = 'any_nonspecial'

+ if output_rr:

+ output_note = '[%eax..%edi]' if output_regs == 'eax' else '[%r8d..%r14d]'

+ else:

+ output_note = None

+ subst_fixed = subst[0:-1] + (input_note, output_note) + subst[-1:]

+ subst_register_fixed = (

+ subst_register[0:-1] + (input_note, output_note) + subst_register[-1:])

+ subst_memory_fixed = (

+ subst_memory[0:-1] + (input_note, output_note) + subst_memory[-1:])

+ # If we already have replacements in cache then wejust reuse them.

+ output_key = (reg, rm, rm_to_reg, start_byte, index_r8, input_rr, output_rr)

+ if output_key in AddModRM_Compressor.replacements:

+ replacements = AddModRM_Compressor.replacements[output_key]

+ main_compressors.append(

+ Compressor(expanded_regex, subst_fixed, replacements[0]))

+ register_compressors.append(

+ Compressor(register_regex, subst_register_fixed, replacements[1]))

+ memory_compressors.append(

+ Compressor(expanded_regex, subst_memory_fixed, replacements[2]))

+ if options.bitness == 64 and not index_r8:

+ AddModRM_Compressor(

+ regex, subst, subst_register, subst_memory,

+ reg=reg, rm=rm, rm_to_reg=rm_to_reg, start_byte=start_byte,

+ index_r8=True, input_rr=input_rr, output_rr=output_rr)

+ return

+ # It can be memory only instruction, register only one or both

+ main_compressor = Compressor(expanded_regex, subst_fixed)

+ register_compressor = Compressor(register_regex, subst_register_fixed)

+ memory_compressor = Compressor(expanded_regex, subst_memory_fixed)

+ # Generation time!

+ if reg is None:

+ # reg field is used as opcode extension

+ byte_range = [byte for byte in xrange(256) if byte & 0x38 == start_byte]

+ else:

+ byte_range = xrange(256)

+ for modrm in byte_range:

+ # Parse ModRM

+ mod_field = (modrm & 0xc0) >> 6

+ reg_field = (modrm & 0x38) >> 3

+ rm_field = (modrm & 0x07)

+ if reg is not None:

+ reg_text = '%' + REGISTERS[reg][reg_field]

+ # If mod == 3 then it's register-to-register instruction

+ if mod_field == 3:

+ bytes = '{:02x}'.format(modrm)

+ rm_text = '%' + REGISTERS[rm][rm_field]

+ replacement = [bytes]

+ if reg is None:

+ replacement.append(rm_text)

+ else:

+ replacement.append(rm_text if rm_to_reg else reg_text)

+ replacement.append(reg_text if rm_to_reg else rm_text)

+ if options.bitness == 64:

+ replacement.append('any_nonspecial')

+ output = reg_text if rm_to_reg else rm_text

+ if output_rr:

+ replacement.append(output)

+ else:

+ replacement.append(None)

+ if output_rr is None and output in X86_64_BASE_REGISTERS: continue

+ if output_rr is True and output == '%r15d': continue

+ if rm_to_reg is None and reg_text in X86_64_BASE_REGISTERS: continue

+ replacement = tuple(replacement)

+ main_compressor.replacements.append(replacement)

+ register_compressor.replacements.append(replacement)

+ # If mod != 3 then it's register-to-memory instruction

+ else:

+ # If RM field != %rsp then there are no index

+ if rm_field != validator.REG_RSP:

+ base_text = '%' + REGISTERS[base][rm_field]

+ # If RM field == %rbp and MOD fiels is zero then it's absolute address

+ if mod_field == 0 and rm_field == validator.REG_RBP:

+ bytes = '{:02x} 00 00 00 00'.format(modrm)

+ rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)'

+ base_text = '%rip'

+ # Memory access with just a base register

+ elif mod_field == 0:

+ bytes = '{:02x}'.format(modrm)

+ rm_text = '({})'.format(base_text)

+ # Memory access with base and 8bit offset

+ elif mod_field == 1:

+ bytes = '{:02x} 00'.format(modrm)

+ rm_text = '0x0({})'.format(base_text)

+ # Memory access with base and 32bit offset

+ else: # mod_field == 2

+ bytes = '{:02x} 00 00 00 00'.format(modrm)

+ rm_text = '0x0({})'.format(base_text)

+ replacement = [bytes]

+ if reg is None:

+ replacement.append(rm_text)

+ else:

+ replacement.append(rm_text if rm_to_reg else reg_text)

+ replacement.append(reg_text if rm_to_reg else rm_text)

+ if options.bitness == 64:

+ replacement.append('any_nonspecial')

+ output = reg_text if rm_to_reg else None

+ if output_rr:

+ replacement.append(output)

+ else:

+ replacement.append(None)

+ if input_rr and base_text not in X86_64_BASE_REGISTERS: continue

+ if output_rr is None and output in X86_64_BASE_REGISTERS: continue

+ if output_rr is True and output == '%r15d': continue

+ if rm_to_reg is None and reg_text in X86_64_BASE_REGISTERS: continue

+ replacement = tuple(replacement)

+ main_compressor.replacements.append(replacement)

+ memory_compressor.replacements.append(replacement)

+ else:

+ # If RM field == %rsp then we have SIB byte

+ for sib in xrange(256):

+ scale_field = (sib & 0xc0) >> 6

+ index_field = (sib & 0x38) >> 3

+ base_field = (sib & 0x07)

+ index_text = '%' + INDEXES[index][index_field]

+ base_text = '%' + REGISTERS[base][base_field]

+ scale_text = pow(2, scale_field)

+ # If BASE is %rbp and MOD == 0 then index with 32bit offset is used

+ if mod_field == 0 and base_field == validator.REG_RBP:

+ bytes = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

+ if (options.bitness == 32 or

+ index_field != validator.REG_RSP or

+ scale_field != 0):

+ rm_text = '0x0(,{},{})'.format(index_text, scale_text)

+ else:

+ rm_text = '0x0'

+ base_text = ''

+ # Memory access with base and index (no offset)

+ elif mod_field == 0:

+ bytes = '{:02x} {:02x}'.format(modrm, sib)

+ rm_text = '({},{},{})'.format(base_text, index_text, scale_text)

+ # Memory access with base, index and 8bit offset

+ elif mod_field == 1:

+ bytes = '{:02x} {:02x} 00'.format(modrm, sib)

+ rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

+ # Memory access with base, index and 32bit offset

+ elif mod_field == 2:

+ bytes = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib)

+ rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text)

+ # Pretty-printing of access via %rsp

+ if (scale_field == 0 and index != 'r8' and

+ base_field == validator.REG_RSP and

+ index_field == validator.REG_RSP):

+ #index_text = 'any_nonspecial'

+ rm_text = ('0x0({})' if mod_field else '({})').format(base_text)

+ if index_text == "%riz":

+ index_text = 'any_nonspecial'

+ replacement = [bytes]

+ if reg is None:

+ replacement.append(rm_text)

+ else:

+ replacement.append(rm_text if rm_to_reg else reg_text)

+ replacement.append(reg_text if rm_to_reg else rm_text)

+ if options.bitness == 64:

+ if not input_rr or index_text == 'any_nonspecial':

+ replacement.append('any_nonspecial')

+ else:

+ replacement.append('%' + REGISTERS[input][index_field])

+ output = reg_text if rm_to_reg else None

+ replacement.append(output if output_rr else None)

+ if input_rr:

+ if base_text not in X86_64_BASE_REGISTERS: continue

+ if index_text in X86_64_BASE_REGISTERS - set(['%r15']): continue

+ if output_rr is None and output in X86_64_BASE_REGISTERS: continue

+ if output_rr is True and output == '%r15d': continue

+ if rm_to_reg is None and reg_text in X86_64_BASE_REGISTERS: continue

+ replacement = tuple(replacement)

+ main_compressor.replacements.append(replacement)

+ memory_compressor.replacements.append(replacement)

+ assert len(main_compressor.replacements) > 1

+ assert len(register_compressor.replacements) > 1

+ assert len(memory_compressor.replacements) > 1

+ main_compressor.replacements = tuple(main_compressor.replacements)

+ register_compressor.replacements = tuple(register_compressor.replacements)

+ memory_compressor.replacements = tuple(memory_compressor.replacements)

+ main_compressors.append(main_compressor)

+ register_compressors.append(register_compressor)

+ memory_compressors.append(memory_compressor)

+ AddModRM_Compressor.replacements[output_key] = (

+ main_compressor.replacements,

+ register_compressor.replacements,

+ memory_compressor.replacements

+ )

+ if options.bitness == 64 and not index_r8:

+ AddModRM_Compressor(

+ regex, subst, subst_register, subst_memory,

+ reg=reg, rm=rm, rm_to_reg=rm_to_reg, start_byte=start_byte,

+ index_r8=True, input_rr=input_rr, output_rr=output_rr)

+# Replacements cache.

+AddModRM_Compressor.replacements = {}

+def PrepareCompressors():

+ global compressors

+ global main_compressors

+ global register_compressors

+ global memory_compressors

+ # "Larger" compressors should be tried first, then "smaller" ones.

+ main_compressors = []

+ register_compressors = []

+ memory_compressors = []

+ extra_compressors = []

+ if options.bitness == 32:

+ register_kinds = ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0')

+ register_kind_pairs = (

+ ( 'al', 'al'),

+ ( 'ax', 'al'),

+ ( 'ax', 'ax'),

+ ( 'eax', 'al'),

+ ( 'eax', 'ax'),

+ ( 'eax', 'eax'),

+ ( 'eax', 'mm0'),

+ ( 'mm0', 'eax'),

+ ( 'eax', 'xmm0'),

+ ('xmm0', 'eax'),

+ ( 'mm0', 'mm0'),

+ ( 'mm0', 'xmm0'),

+ ('xmm0', 'mm0'),

+ ('xmm0', 'xmm0'),

+ ('xmm0', 'ymm0'),

+ ('ymm0', 'xmm0'),

+ ('ymm0', 'ymm0')

+ )

+ else:

+ register_kinds = ('al', 'spl', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0',

+ 'r8b', 'r8w', 'r8d', 'r8', 'xmm8', 'ymm8')

+ register_kind_pairs = (

+ ( 'al', 'al'),

+ ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'),

+ ( 'ax', 'al'),

+ ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'),

+ ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'),

+ ( 'eax', 'al'),

+ ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'),

+ ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'),

+ ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'),

+ ( 'rax', 'al'),

+ ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'),

+ ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'),

+ ( 'eax', 'mm0'), ( 'r8d', 'mm0'),

+ ( 'rax', 'mm0'), ( 'r8', 'mm0'),

+ ( 'mm0', 'eax'), ( 'mm0', 'r8d'),

+ ( 'mm0', 'rax'), ( 'mm0', 'r8'),

+ ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'),

+ ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'),

+ ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'),

+ ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'),

+ ( 'mm0', 'mm0'),

+ ( 'mm0', 'xmm0'), ( 'mm0', 'xmm8'),

+ ('xmm0', 'mm0'), ('xmm8', 'mm0'),

+ ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'),

+ ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'),

+ ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'),

+ ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8')

+ )

+ # Largest compressors: both reg and rm fields are used

+ for reg, rm in register_kind_pairs:

+ start_reg = REGISTERS[reg][0]

+ end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

+ start_rm = REGISTERS[rm][0]

+ end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

+ # First instruction uses just ModR/M byte in 32bit mode but both

+ # ModR/M in 64bit mode. Both approaches will work in both cases,

+ # this is just an optimization to avoid needless work.

+ if options.bitness == 32:

+ bytes = '({RM_BYTE})'

+ else:

+ bytes = '({RM_SIB_BYTES})'

+ for extra_bytes in ('', ' 00', ' 00 00', ' 00 00 00 00'):

+ # Normal instructions with two operands (reg to rm).

+ if options.bitness == 64 and rm in ('eax', 'r8d'):

+ # Zero-extending version first

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'

+ '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{}]'.format(start_rm, end_rm), ' # reg to rm'),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]', ''),

+ reg=reg, rm=rm, rm_to_reg=False, output_rr=True)

+ # Zero-extending xchg/xadd

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'

+ '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{} or memory]'.format(start_rm, end_rm),

+ ' # write to both'),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{}]'.format(start_rm, end_rm),

+ ' # reg to rm; write to both'),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]',

+ ' # write to both'),

+ reg=reg, rm=rm, rm_to_reg=None, output_rr=True)

+ if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax',

+ 'r8b', 'r8w', 'r8d', 'r8'):

+ # Dangerous next

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'

+ '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{}]'.format(start_rm, end_rm), ' # reg to rm'),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]', ''),

+ reg=reg, rm=rm, rm_to_reg=False, output_rr=None)

+ # Dangerous xchg/xadd

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'

+ '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{} or memory]'.format(start_rm, end_rm),

+ ' # write to both'),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{}]'.format(start_rm, end_rm),

+ ' # reg to rm; write to both'),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]',

+ ' # write to both'),

+ reg=reg, rm=rm, rm_to_reg=None, output_rr=None)

+ # Now normal version

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'

+ '(%{' + reg + '}),({%' + rm + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg),

+ '[%{}..%{}]'.format(start_rm, end_rm), ' # reg to rm'),

+ ('XX', '[%{}..%{}]'.format(start_reg, end_reg), '[memory]', ''),

+ reg=reg, rm=rm, rm_to_reg=False)

+ # Normal instructions with two operands (rm to reg).

+ if options.bitness == 64 and reg in ('eax', 'r8d'):

+ # Zero-extending version first

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|\\$0x0,\\$0x0,|%cl,|%xmm0,)?'

+ '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),

+ '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ ('XX', '[%{}..%{}]'.format(start_rm, end_rm),

+ '[%{}..%{}]'.format(start_reg, end_reg), ' # rm to reg'),

+ ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ reg=reg, rm=rm, rm_to_reg=True, output_rr=True)

+ if options.bitness == 64 and reg in ('al', 'spl', 'ax', 'eax', 'rax',

+ 'r8b', 'r8w', 'r8d', 'r8'):

+ # Dangerous next

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|\\$0x0,\\$0x0,|%cl,|%xmm0,)?'

+ '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),

+ '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ ('XX', '[%{}..%{}]'.format(start_rm, end_rm),

+ '[%{}..%{}]'.format(start_reg, end_reg), ' # rm to reg'),

+ ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ reg=reg, rm=rm, rm_to_reg=True, output_rr=None)

+ # Now normal version

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|\\$0x0,\\$0x0,|%cl,|%xmm0,)?'

+ '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),

+ '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ ('XX', '[%{}..%{}]'.format(start_rm, end_rm),

+ '[%{}..%{}]'.format(start_reg, end_reg), ' # rm to reg'),

+ ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ reg=reg, rm=rm, rm_to_reg=True)

+ # 3DNow! instructions. Additional byte is opcode extension.

+ AddModRM_Compressor(

+ '.*?' + bytes + ' [0-9a-fA-F][0-9a-fA-F] \\w* '

+ '({%' + rm + '}),(%{' + reg + '}).*{RR_NOTES}()',

+ ('XX', '[%{}..%{} or memory]'.format(start_rm, end_rm),

+ '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ ('XX', '[%{}..%{}]'.format(start_rm, end_rm),

+ '[%{}..%{}]'.format(start_reg, end_reg), ' # reg to rm'),

+ ('XX', '[memory]', '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ reg=reg, rm=rm, rm_to_reg=True)

+ # Smaller compressors: only rm field is used.

+ for rm in register_kinds:

+ start_rm = REGISTERS[rm][0]

+ end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2]

+ for opcode in range(8):

+ # First instruction uses just ModR/M byte in 32bit mode but both

+ # ModR/M in 64bit mode. Both approaches will work in both cases,

+ # this is just an optimization to avoid needless work.

+ if options.bitness == 32:

+ bytes = '({RM_BYTE/' + str(opcode) + '})'

+ else:

+ bytes = '({RM_SIB_BYTES/' + str(opcode) + '})'

+ if options.bitness == 64:

+ # No memory access (e.g. prefetch)

+ AddModRM_Compressor(

+ '.*?' + bytes + ' ?\\w* (?:\\$0x0,|%cl,)?({%' + rm + '}).*'

+ '{RR_NOTES}()',

+ ('XX/' + str(opcode),

+ '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),

+ ('XX/' + str(opcode), '[%{}..%{}]'.format(start_rm, end_rm), ''),

+ ('XX/' + str(opcode), '[memory]', ''),

+ reg=None, rm=rm, input_rr=False, start_byte=opcode*8)

+ for extra_bytes in ('', ' 00', ' 00 00', ' 00 00 00 00'):

+ # Part of opcode is encoded in ModR/M

+ AddModRM_Compressor(

+ '.*?' + bytes + extra_bytes +

+ ' (?:lock )?\\w* (?:\\$0x0,|%cl,)?'

+ '({%' + rm + '}).*{RR_NOTES}()',

+ ('XX/' + str(opcode),

+ '[%{}..%{} or memory]'.format(start_rm, end_rm), ''),

+ ('XX/' + str(opcode), '[%{}..%{}]'.format(start_rm, end_rm), ''),

+ ('XX/' + str(opcode), '[memory]', ''),

+ reg=None, rm=rm, start_byte=opcode*8)

+ # Even smaller compressors: only low 3 bits of opcode are used.

+ for reg in register_kinds + ('st(0)',):

+ start_reg = REGISTERS[reg][0]

+ end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

+ for opcode in range(8):

+ for extra_bytes in ('', ' 00', ' 00 00', ' 00 00 00 00'):

+ # Operand is encoded in opcode

+ extra_compressors.append(Compressor(re.compile(

+ '.*?[0-9a-fA-F]([0-7])' + extra_bytes +

+ ' \\w* (?:\\$0x0,|%ax,|%st,)?'

+ '(%(?:' + '|'.join(REGISTERS_RE[reg]) + ')).*()'),

+ ('[0..7]', '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ [('0', '%' + REGISTERS[reg][0]),

+ ('1', '%' + REGISTERS[reg][1]),

+ ('2', '%' + REGISTERS[reg][2]),

+ ('3', '%' + REGISTERS[reg][3]),

+ ('4', '%' + REGISTERS[reg][4]),

+ ('5', '%' + REGISTERS[reg][5]),

+ ('6', '%' + REGISTERS[reg][6]),

+ ('7', '%' + REGISTERS[reg][7])]))

+ extra_compressors.append(Compressor(re.compile(

+ '.*?[0-9a-fA-F]([89a-fA-F])' + extra_bytes +

+ ' \\w* (?:\\$0x0,|%ax,|%st,)?'

+ '(%(?:' + '|'.join(REGISTERS_RE[reg]) + ')).*()'),

+ ('[8..f]', '[%{}..%{}]'.format(start_reg, end_reg), ''),

+ [('8', '%' + REGISTERS[reg][0]),

+ ('9', '%' + REGISTERS[reg][1]),

+ ('a', '%' + REGISTERS[reg][2]),

+ ('b', '%' + REGISTERS[reg][3]),

+ ('c', '%' + REGISTERS[reg][4]),

+ ('d', '%' + REGISTERS[reg][5]),

+ ('e', '%' + REGISTERS[reg][6]),

+ ('f', '%' + REGISTERS[reg][7])]))

+ compressors = (main_compressors + memory_compressors + register_compressors +

+ extra_compressors)

+ # Special compressors: will handle some cosmetic issues.

+ #

+ # SETxx ignores reg field and thus are described as many separate instructions

+ compressors.append(Compressor(

+ re.compile('.*0f 9[0-9a-fA-F] XX(/[0-7]) set.*()'), ('', ''),

+ [('/' + str(i),) for i in range(8)]))

+ # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual

+ compressors.append(Compressor(

+ re.compile('.*0f (XX/1) bswap.*()'), ('c[9-f]', ''), [('XX/1',)]))

+ # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all

+ # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here.

+ # Without this rule these ands comprise 2/3 of the whole output!

+ compressors.append(Compressor(

+ re.compile('.*(83 e0 01 and \\$0x1,%eax)()'),

+ ('83 XX/0 00 add[l]? $0x0,[%eax..%edi or memory]', ' # special and'),

+ [('83 e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]),)

+ for i in range(1, 256) for r in range(8)] +

+ [('83 XX/0 00 add[l]? $0x0,[%eax..%edi or memory]',)]))

+ # Merge memory and non-memory access

+ for letter, reg in (('b', 'al'), ('w', 'ax'), ('l', 'eax')):

+ start_reg = REGISTERS[reg][0]

+ end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2]

+ for notes in ('', ' # rm to reg', ' # reg to rm'):

+ compressors.append(Compressor(re.compile(

+ '.* \w*(' + letter + ') .*(\\[memory]).*()()'),

+ ('[{}]?'.format(letter),

+ '[%{}..%{} or memory]'.format(start_reg, end_reg), '', ''),

+ [(letter, '[memory]', ''),

+ ('', '[%{}..%{}]'.format(start_reg, end_reg), notes)]))

+def main():

+ # We are keeping these global to share state graph and compressors

+ # between workers spawned by multiprocess. Passing them every time is slow.

+ global options, xml_file

+ global dfa

+ global worker_validator

+ options, xml_file = ParseOptions()

+ dfa = dfa_parser.ParseXml(xml_file)

+ worker_validator = validator.Validator(

+ validator_dll=options.validator_dll,

+ decoder_dll=options.decoder_dll)

+ PrepareCompressors()

+ assert dfa.initial_state.is_accepting

+ assert not dfa.initial_state.any_byte

+ print >> sys.stderr, len(dfa.states), 'states'

+ num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)

+ # We can't just write 'num_suffixes[dfa.initial_state]' because

+ # initial state is accepting.

+ total_instructions = sum(

+ num_suffixes[t.to_state]

+ for t in dfa.initial_state.forward_transitions.values())

+ print >> sys.stderr, total_instructions, 'regular instructions total'

+ tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)

+ print >> sys.stderr, len(tasks), 'tasks'

+ pool = multiprocessing.Pool(processes=1)

+ results = pool.imap(Worker, tasks)

+ total = 0

+ num_valid = 0

+ full_output = set()

+ for prefix, count, valid_count, output in results:

+ print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix))

+ total += count

+ num_valid += valid_count

+ full_output |= output

+ for instruction in sorted(Compressed(full_output)):

+ print instruction

+ print >> sys.stderr, total, 'instructions were processed'

+ print >> sys.stderr, num_valid, 'valid instructions'

+if __name__ == '__main__':

+ main()

« no previous file with comments | « no previous file | src/trusted/validator_ragel/testdata/32bit_regular.golden » ('j') | no next file with comments »