Chromium Code Reviews| Index: src/trusted/validator_ragel/compress_regular_instructions.py |
| =================================================================== |
| --- src/trusted/validator_ragel/compress_regular_instructions.py (revision 0) |
| +++ src/trusted/validator_ragel/compress_regular_instructions.py (revision 0) |
| @@ -0,0 +1,1463 @@ |
| +# Copyright (c) 2013 The Native Client Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +""" |
| +Traverse the validator's DFA, collect all "normal" instruction and then |
| +compress output. Note: "anybyte fields" (immediates and displacements) |
| +are always filled with zeros. Otherwise processing of sextillions (sic!) |
| +of possibilities will take too long. |
| + |
| +Each rule is applied only when all variants are accepted by validator. |
| +The following compression rules are present: |
| + |
| +1. Compress ModR/M (+SIB & displacement). |
| + Instruction: 00 00 add %al,(%rax) |
| + ... |
| + Instruction: 00 ff add %bh,%bh |
| + becomes |
| + Instruction: 00 XX add [%al..%bh],[%al..%bh or memory] |
| + |
| +1a. Compress ModR/M (+SIB & displacement) memory-only. |
| + Instruction: f0 01 00 lock add %eax,(%eax) |
| + ... |
| + Instruction: f0 01 bf 00 00 00 00 lock add %edi,0x0(%edi) |
| + becomes |
| + Instruction: f0 01 XX lock add [%eax..edi],[memory] |
| + |
| +1b. Compress ModR/M register only. |
| + Instruction: 66 0f 50 c0 movmskpd %xmm0,%eax |
| + ... |
| + Instruction: 66 0f 50 ff movmskpd %xmm7,%edi |
| + becomes |
| + Instruction: 66 0f 50 XX movmskpd [%xmm0..%xmm7],[%eax..edi] |
| + |
| +2. Compress ModR/M (+SIB & displacement) with opcode extension. |
| + Instruction: 0f 90 00 seto (%eax) |
| + ... |
| + Instruction: 0f 90 c7 seto %bh |
| + becomes |
| + Instruction: 0f 90 XX/0 seto [%al..%bh or memory] |
| + |
| +2a. Compress ModR/M (+SIB & displacement) memory-only with opcode extension. |
| + Instruction: f0 ff 00 lock incl (%eax) |
| + ... |
| + Instruction: f0 ff 84 ff 00 00 00 00 lock incl 0x0(%edi,%edi,8) |
| + becomes |
| + Instruction: f0 ff XX/1 lock decl [memory] |
| + |
| +2b. Compress ModR/M register-only with opcode extension. |
| + Instruction: 0f 71 d0 00 psrlw $0x0,%mm0 |
| + ... |
| + Instruction: 0f 71 d7 00 psrlw $0x0,%mm7 |
| + becomes |
| + Instruction: 66 0f 71 XX/2 00 psrlw $0x0,[%mm0..%mm7] |
| + |
| +3. Compress register-in-opcode. |
| + Instruction: d9 c0 fld %st(0) |
| + ... |
| + Instruction: d9 c7 fld %st(7) |
| + becomes |
| + Instruction: Instruction: d9 c[0..7] fld [%st(0)..%st(7)] |
| + |
| + Only applies if all possible register accesses are accepted by validator. |
| + |
| +4. Special compressor for "set" instruction. |
| + Instruction: 0f 90 XX/0 seto [%al..%bh or memory] |
| + ... |
| + Instruction: 0f 90 XX/7 seto [%al..%bh or memory] |
| + becomes |
| + Instruction: 0f 90 XX seto [%al..%bh or memory] |
| +""" |
| + |
| +import itertools |
| +import multiprocessing |
| +import optparse |
| +import os |
| +import re |
| +import subprocess |
| +import sys |
| +import tempfile |
| +import traceback |
| + |
| +import dfa_parser |
| +import dfa_traversal |
| +import validator |
| + |
| + |
| +# Register names in 'natual' order (as defined by IA32/x86-64 ABI) |
| +# |
| +# X86-64 ABI splits all registers in groups of 8 because it uses 3-bit field |
| +# in opcode, ModR/M, and/or SIB bytes to encode them. |
| +# |
| +# In most cases there are 16 registers of a given kind and two such groups, |
| +# but there are couple of exceptions: |
| +# 1. There are 20 8-bit registers and three groups (two of them overlap) |
| +# 2. There are eight X87 and MMX registers thus two groups are identical |
| +# |
| +# We use typical register from a group to name the whole group. Most groups |
| +# use first register, but 'spl' group uses fifth register because it's first |
| +# four registers are the same as 'al' group. We use mnemonic name 'mmalt' |
| +# to represent the "evil mirror" of the 'mm0' group. |
| +REGISTERS = { |
| + 'al': [ 'al', 'cl', 'dl', 'bl', 'ah', 'ch', 'dh', 'bh' ], |
| + 'spl': [ 'al', 'cl', 'dl', 'bl', 'spl', 'bpl', 'sil', 'dil' ], |
| + 'ax': [ 'ax', 'cx', 'dx', 'bx', 'sp', 'bp', 'si', 'di' ], |
| + 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi' ], |
| + 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'rsp', 'rbp', 'rsi', 'rdi' ], |
| + 'r8b': [ 'r{}b'.format(N) for N in range(8,16) ], |
| + 'r8w': [ 'r{}w'.format(N) for N in range(8,16) ], |
| + 'r8d': [ 'r{}d'.format(N) for N in range(8,16) ], |
| + 'r8': [ 'r{}'.format(N) for N in range(8,16) ], |
| + 'mm0': [ 'mm{}'.format(N) for N in range(8) ], |
| + 'mmalt': [ 'mm{}'.format(N) for N in range(8) ], |
| + 'st(0)': [ 'st({})'.format(N) for N in range(8) ], |
| + 'xmm0': [ 'xmm{}'.format(N) for N in range(8) ], |
| + 'xmm8': [ 'xmm{}'.format(N) for N in range(8,16) ], |
| + 'ymm0': [ 'ymm{}'.format(N) for N in range(8) ], |
| + 'ymm8': [ 'ymm{}'.format(N) for N in range(8,16) ] |
| +} |
| + |
| + |
| +NOP = 0x90 |
| + |
| + |
| +def PadToBundleSize(bytes): |
| + assert len(bytes) <= validator.BUNDLE_SIZE |
| + return bytes + [NOP] * (validator.BUNDLE_SIZE - len(bytes)) |
| + |
| + |
| +# In x86-64 mode we have so-called 'restricted register' which is used to |
| +# tie two groups together. Some instructions require particular value to |
| +# be stored in this variable, while some accept any non-special restricted |
| +# register (%ebp and %esp are special because they can only be accepted by |
| +# a few 'special' instructions). |
| +# |
| +# You can find more details in the "NaCl SFI model on x86-64 systems" manual. |
| +# |
| +# We try to feed all possible 'restricted registers' into validator and then |
| +# classify the instruction using this map. If set of acceptable 'restricted |
| +# registers' is not here, then it's an error in validator. |
| +ACCEPTABLE_X86_64_INPUTS = { |
| + 0x00001: 'input_rr=%eax', |
| + 0x00002: 'input_rr=%ecx', |
| + 0x00004: 'input_rr=%edx', |
| + 0x00008: 'input_rr=%ebx', |
| + 0x00010: 'input_rr=%esp', |
| + 0x00020: 'input_rr=%ebp', |
| + 0x00040: 'input_rr=%esi', |
| + 0x00080: 'input_rr=%edi', |
| + 0x00100: 'input_rr=%r8d', |
| + 0x00200: 'input_rr=%r9d', |
| + 0x00400: 'input_rr=%r10d', |
| + 0x00800: 'input_rr=%r11d', |
| + 0x01000: 'input_rr=%r12d', |
| + 0x02000: 'input_rr=%r13d', |
| + 0x04000: 'input_rr=%r14d', |
| + 0x08000: 'input_rr=%r15d', |
| + 0x1ffcf: 'input_rr=any_nonspecial' |
| +} |
| + |
| +# Any instruction must produce either None or one of fifteen registers as an |
| +# output 'restricted register' value. 'r15d' is NOT acceptable as an output. |
| +ACCEPTABLE_X86_64_OUTPUT_REGISTERS = tuple( |
| + '%' + reg for reg in (REGISTERS['eax'] + REGISTERS['r8d'])[0:-1]) |
| + |
| + |
| +def ValidateInstruction(instruction, validator_inst): |
| + bundle = ''.join(map(chr, PadToBundleSize(instruction))) |
| + if options.bitness == 32: |
| + result = validator_inst.ValidateChunk(bundle, bitness=32) |
| + return result, [] |
| + else: |
| + valid_inputs = 0 |
| + known_final_rr = None |
| + output_rr = None |
| + # Note that iteration order is aligned with ACCEPTABLE_X86_64_INPUTS array |
| + # above. |
| + for bit, initial_rr in enumerate(validator.ALL_REGISTERS + [None]): |
| + valid, final_rr = validator_inst.ValidateAndGetFinalRestrictedRegister( |
| + bundle, len(instruction), initial_rr) |
| + if valid: |
| + # final_rr should not depend on input_rr |
| + assert valid_inputs == 0 or known_final_rr == final_rr |
| + valid_inputs |= 1 << bit |
| + known_final_rr = final_rr |
| + # If nothing is accepted then instruction is not valid. Easy and simple. |
| + if valid_inputs == 0: return False, [] |
| + # If returned value in unacceptable we'll get IndexError here and this |
| + # test will fail |
| + if known_final_rr is not None: |
| + output_rr = ACCEPTABLE_X86_64_OUTPUT_REGISTERS[known_final_rr] |
| + # If collected valid_inputs are unacceptable we'll get KeyError here and |
| + # this test will fail |
| + return True, [ACCEPTABLE_X86_64_INPUTS[valid_inputs], |
| + 'output_rr={}'.format(output_rr)] |
| + |
| + |
| +class WorkerState(object): |
| + def __init__(self, prefix, validator): |
| + self.total_instructions = 0 |
| + self.num_valid = 0 |
| + self.validator = validator |
| + self.output = set() |
| + self.trace = [] |
| + |
| + |
| + def ReceiveInstruction(self, bytes): |
| + self.total_instructions += 1 |
| + result, notes = ValidateInstruction(bytes, self.validator) |
| + if result: |
| + self.num_valid += 1 |
| + dis = self.validator.DisassembleChunk( |
| + ''.join(map(chr, bytes)), |
| + bitness=options.bitness) |
| + for line_nr in xrange(len(dis)): |
| + dis[line_nr] = str(dis[line_nr]) |
| + assert dis[line_nr][0:17] == 'Instruction(0x' + str(line_nr) + ': ' |
| + assert dis[line_nr][-1:] == ')' |
| + dis[line_nr] = dis[line_nr][17:-1] |
| + # If %rip is involved then comment will be different depending on the |
| + # instruction length. Eliminate it. |
| + if '(%rip)' in dis[0]: |
| + dis[0] = re.sub(' # 0x[ ]*[0-9a-fA-F]*', '', dis[0]) |
| + # Zero displacements are represented as 0x0 for all instructions except |
| + # jumps where they disassembled as non-zero due to %eip/%rip-relative |
| + # addressing. We replace this displacement with %eip/%rip to simplify |
| + # compression. |
| + if ' 0x' in dis[0] and ' 0x0' not in dis[0]: |
| + for bytes in xrange(1, 16): |
| + dis[0] = re.sub( |
| + '(' + '(?:[0-9a-fA-F][0-9a-fA-F] ){' + str(bytes) + '} .* )' + |
| + hex(bytes) + '(.*)', |
| + '\\1%eip\\2' if options.bitness == 32 else '\\1%rip\\2', |
| + dis[0]); |
| + dis[0] = 'Instruction: ' + dis[0] |
| + dis += notes |
| + self.output.add('; '.join(dis)) |
| + |
| + |
| + def RecordTrace(self, compressor_nr, instruction): |
| + self.trace.append((compressor_nr, instruction)) |
| + |
| + |
| +# Compressor has three slots: regex (which picks apart given instruction), |
| +# subst (which is used to denote compressed version) and replacements (which |
| +# are used to generate set of instructions from a given code). |
| +# |
| +# Example compressor: |
| +# regex = '.*?[0-9a-fA-F]([0-7]) \\w* (%e(?:[abcd]x|[sb]p|[sd]i)).*()' |
| +# subst = ('[0-7]', '[%eax..%edi]', ' # register in opcode') |
| +# replacements = ((0, '%eax'), (1, '%ecx'), (2, '%edx'), (3, '%ebx') |
| +# (4, '%esp'), (5, '%ebp'), (6, '%esi'), (7, '%edi')) |
| +# |
| +# When faced with instriuction '40 inc %eax' it will capture the following |
| +# pieces of said instruction: '4[0] inc [%eax]'. |
| +# |
| +# Then it will produce the following eight instructions: |
| +# '40 inc %eax' |
| +# '41 inc %ecx' |
| +# '42 inc %edx' |
| +# '43 inc %ebx' |
| +# '44 inc %esp' |
| +# '45 inc %ebp' |
| +# '46 inc %esi' |
| +# '47 inc %edi' |
| +# |
| +# If all these instructions can be found in a set of instructions then |
| +# compressor will remove them from said set and will insert one replacement |
| +# "compressed instruction" '4[0-7] inc [%eax..%edi] # register in opcode'. |
| +# |
| +# Note that last group is only used in the replacement. It's used to grab marks |
| +# added by previous compressors and to replace them with a new mark. |
| +class Compressor(object): |
| + __slots__ = [ |
| + 'regex', |
| + 'subst', |
| + 'replacements' |
| + ] |
| + |
| + def __init__(self, regex, subst, replacements=None): |
| + self.regex = re.compile(regex) |
| + self.subst = subst |
| + self.replacements = [] if replacements is None else replacements |
| + |
| + |
| +def CompressionTemplate(instruction, match, mark): |
| + """ Replace all match groups with the mark. """ |
| + pos = 0 |
| + format_str = '' |
| + for group in range(1, len(match.groups())): |
| + format_str += instruction[pos:match.start(group)] + mark |
| + pos = match.end(group) |
| + return format_str + instruction[pos:match.start(len(match.groups()))] |
| + |
| + |
| +def CompressOneMatch(instructions, instruction, match, compressor): |
| + format_str = CompressionTemplate(instruction, match, '{}') |
| + subset = set() |
| + for replacement in compressor.replacements: |
| + replacement_str = format_str.format(*replacement) |
| + if not replacement_str in instructions: |
| + return (False, instructions) |
| + subset.add(replacement_str) |
| + instructions -= subset |
| + instructions.add((format_str + '{}').format(*compressor.subst)) |
| + return (True, instructions) |
| + |
| + |
| +def CompressOneInstruction(instructions, compressors, split, cache): |
| + sorted_instructions = (sorted(i for i in instructions if i > split) + |
| + sorted(i for i in instructions if i < split)) |
| + for instruction in sorted_instructions: |
| + if instruction in cache: |
| + compressors_list = cache[instruction] |
| + for compressor_nr, match, compressor in compressors_list: |
| + result, instructions = CompressOneMatch( |
| + instructions, instruction, match, compressor) |
| + if result: |
| + return (instructions, compressor_nr, instruction) |
| + else: |
| + compressors_list = [] |
| + for compressor_nr, compressor in enumerate(compressors): |
| + match = compressor.regex.match(instruction) |
| + if match: |
| + compressors_list.append((compressor_nr, match, compressor)) |
| + result, instructions = CompressOneMatch( |
| + instructions, instruction, match, compressor) |
| + if result: |
| + return (instructions, compressor_nr, instruction) |
| + cache[instruction] = compressors_list |
| + return (instructions, False, False) |
| + |
| + |
| +def Compressed(instructions, compressors, show_progress): |
| + split = '' |
| + cache = {} |
| + while True: |
| + instructions, rule, split = CompressOneInstruction( |
| + instructions, compressors, split, cache) |
| + if rule is False: break |
| + show_progress(rule, split) |
| + return instructions |
| + |
| + |
| +def Worker((prefix, state_index)): |
| + worker_state = WorkerState(prefix, worker_validator) |
| + |
| + try: |
| + dfa_traversal.TraverseTree( |
| + dfa.states[state_index], |
| + final_callback=worker_state.ReceiveInstruction, |
| + prefix=prefix, |
| + anyfield=0) |
| + if (prefix[0] != 0x0f or prefix[1] != 0x0f): # Skip 3DNow! instructions |
| + worker_state.output = Compressed(set(worker_state.output), |
| + compressors, |
| + worker_state.RecordTrace) |
| + except Exception as e: |
| + traceback.print_exc() # because multiprocessing imap swallows traceback |
| + raise |
| + |
| + return ( |
| + prefix, |
| + worker_state.total_instructions, |
| + worker_state.num_valid, |
| + worker_state.output, |
| + worker_state.trace) |
| + |
| + |
| +def ParseOptions(): |
| + parser = optparse.OptionParser(usage='%prog [options] xmlfile') |
| + |
| + parser.add_option('--bitness', |
| + choices=['32', '64'], |
| + help='The subarchitecture: 32 or 64') |
| + parser.add_option('--validator_dll', |
| + help='Path to librdfa_validator_dll') |
| + parser.add_option('--decoder_dll', |
| + help='Path to librdfa_decoder_dll') |
| + |
| + options, args = parser.parse_args() |
| + options.bitness = int(options.bitness) |
| + |
| + if len(args) != 1: |
| + parser.error('specify one xml file') |
| + |
| + (xml_file, ) = args |
| + |
| + return options, xml_file |
| + |
| + |
| +# Version suitable for use in regular expressions |
| +REGISTERS_RE = REGISTERS.copy() |
| +REGISTERS_RE['st(0)'] = [ 'st\\({}\\)'.format(N) for N in range(8) ] |
| +REGISTERS_RE['st\\(0\\)'] = REGISTERS_RE['st(0)'] |
| + |
| +# Index names in 'natual' order (as defined by IA32/x86-64 ABI) |
| +INDEXES = { |
| + 'eax': [ 'eax', 'ecx', 'edx', 'ebx', 'eiz', 'ebp', 'esi', 'edi' ], |
| + 'rax': [ 'rax', 'rcx', 'rdx', 'rbx', 'riz', 'rbp', 'rsi', 'rdi' ], |
| + 'r8': [ 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15' ] |
| +} |
| +# Register which can not be used as base in 64-bit mode in all incarnations |
| +X86_64_BASE_REGISTERS = set([ |
| + '%spl', '%bpl', '%r15b', |
| + '%sp', '%bp', '%r15w', |
| + '%esp', '%ebp', '%r15d', |
| + '%rsp', '%rbp', '%r15', |
| + '%rip' |
| +]) |
| + |
| + |
| +def InstructionIsDangerous(input, output, register_write, |
| + writes_to, memory_accessed=False, |
| + base_text='%riz', index_text='%riz'): |
| + """ Check if instruction with given replacements will be dangerous |
| + |
| + Args: |
| + input: input argument |
| + output: output argument |
| + register_write: three-state selector |
| + 'sandbox' - instruction can be used to produce "restricted register" |
| + 'protect' - instruction can damage output, protect "special registers" |
| + 'ignore' - instruction does not affect it's operands (e.g. test) or |
| + is used with non-GP registers (X87, MMX, XMM, etc) |
| + memory_accessed: True if instruction accesses memory |
| + base: base register (if memory is accessed) |
| + index: index register (if memory is accessed) |
| + |
| + Returns: |
| + True if instruction should be rejected by validator |
| + """ |
| + if memory_accessed: |
| + if base_text not in X86_64_BASE_REGISTERS: |
| + return True |
| + if index_text in X86_64_BASE_REGISTERS - set(['%r15']): |
| + return True |
| + if register_write == 'protect' and output in X86_64_BASE_REGISTERS: |
| + return True |
| + if register_write == 'sandbox' and output == '%r15d': |
| + return True |
| + if writes_to == 'both' and input in X86_64_BASE_REGISTERS: |
| + return True |
| + return False |
| + |
| + |
| +def AppendOperandsReplacement(replacement, rm_text, reg, modrm, writes_to): |
| + """ Appends replacement text to replacement list |
| + |
| + Args: |
| + replacement: replacement list |
| + rm_text: replacement for rm field |
| + reg: register kind (or None if reg field is used as opcode extension) |
| + modrm: modrm byte |
| + writes_to: three-state selector |
| + 'reg' - instruction uses rm as source, reg as destination |
| + 'rm' - instruction uses reg as source, rm as destination |
| + 'both' - instruction writes to both reg and rm |
| + |
| + Returns: |
| + input: textual representation of input argument |
| + output: textual representation of output argument |
| + |
| + Side-effect: |
| + output (if reg is None) or (input, output) tuple (if reg is not None) |
| + are added to replacement list. |
| + """ |
| + if reg is None: |
| + assert writes_to == 'rm' |
| + input, output = None, rm_text |
| + replacement.append(output) |
| + else: |
| + reg_field = (modrm >> 3) & 0x07 |
| + reg_text = '%' + REGISTERS[reg][reg_field] |
| + if writes_to == 'reg': |
| + input, output = rm_text, reg_text |
| + else: # rm, both |
| + input, output = reg_text, rm_text |
| + replacement.extend([input, output]) |
| + return input, output |
| + |
| + |
| +def ModRMRegisterReplacements(rm, reg=None, writes_to='rm', opcode_bits=0, |
| + register_write='ignore'): |
| + """Creates replacement tuples list for register-to-register instructions |
| + |
| + Args: |
| + rm: rm operand kind (see REGISTERS array) |
| + reg: reg operand kind (see REGISTERS array) or None if reg is not used |
| + writes_to: three-state selector |
| + 'reg' - instruction uses rm as source, reg as destination |
| + 'rm' - instruction uses reg as source, rm as destination |
| + 'both' - instruction writes to both reg and rm |
| + opcode_bits: opcode extensions code (used when reg is None) |
| + register_write: three-state selector |
| + 'sandbox' - instruction can be used to produce "restricted register" |
| + 'protect' - instruction can damage output, protect "special registers" |
| + 'ignore' - instruction does not affect it's operands (e.g. test) or |
| + is used with non-GP registers (X87, MMX, XMM, etc) |
| + Returns: |
| + List of replacement tuples |
| + """ |
| + # Reg field can be used either as reg or as opcode extension, but not both |
| + assert reg is None or opcode_bits == 0 |
| + |
| + output_key = (options.bitness, reg, rm, writes_to, opcode_bits, |
| + register_write) |
| + if output_key in ModRMRegisterReplacements.replacements: |
| + return ModRMRegisterReplacements.replacements[output_key] |
| + |
| + replacements = [] |
| + |
| + # Two upper bits of ModR/M byte (mod field) must be equal to 11 |
| + # This gives us range from 0xc0 to 0xff but we are going from the |
| + # end to make rejection faster (%r15 is equal to 0x7 and %rbp is 0x5). |
| + if reg is None: |
| + # reg field is used as opcode extension |
| + byte_range = [byte |
| + for byte in range(0xff, 0xbf, -1) |
| + if (byte >> 3) & 0x7 == opcode_bits] |
| + else: |
| + byte_range = range(0xff, 0xbf, -1) |
| + |
| + for modrm in byte_range: |
| + rm_field = (modrm & 0x07) |
| + rm_text = '%' + REGISTERS[rm][rm_field] |
| + byte_text = '{:02x}'.format(modrm) |
| + replacement = [byte_text] |
| + input, output = AppendOperandsReplacement( |
| + replacement, rm_text, reg, modrm, writes_to) |
| + if options.bitness == 64: |
| + replacement.append('any_nonspecial') # input_rr |
| + replacement.append(output if register_write == 'sandbox' else None) |
| + if InstructionIsDangerous(input, output, register_write, writes_to): |
| + continue |
| + replacements.append(tuple(replacement)) |
| + ModRMRegisterReplacements.replacements[output_key] = tuple(replacements) |
| + return ModRMRegisterReplacements.replacements[output_key] |
| +ModRMRegisterReplacements.replacements = {} |
| + |
| + |
| +def BaseOnlyMemoryOperand(modrm, base): |
| + """Creates replacement tuples list for register-to-memory instructions |
| + (base only, no SIB) |
| + |
| + Args: |
| + modrm: modrm byte |
| + base: register kind for base |
| + Returns: |
| + bytes_text: replacement for "bytes" group |
| + rm_text: textual representation of "rm" argument |
| + base_text: textual representation of "base" register |
| + """ |
| + mod_field = (modrm >> 6) & 0x03 |
| + rm_field = (modrm & 0x07) |
| + base_text = '%' + REGISTERS[base][rm_field] |
| + # If RM field == %rbp and MOD field is zero then it's absolute address |
| + # in 32-bit mode and %rip-based address in 64-bit mode |
| + if mod_field == 0 and rm_field == validator.REG_RBP: |
| + bytes_text = '{:02x} 00 00 00 00'.format(modrm) |
| + rm_text = '0x0' if options.bitness == 32 else '0x0(%rip)' |
| + base_text = '%eiz' if options.bitness == 32 else '%rip' |
| + # Memory access with just a base register |
| + elif mod_field == 0: |
| + bytes_text = '{:02x}'.format(modrm) |
| + rm_text = '({})'.format(base_text) |
| + # Memory access with base and 8bit offset |
| + elif mod_field == 1: |
| + bytes_text = '{:02x} 00'.format(modrm) |
| + rm_text = '0x0({})'.format(base_text) |
| + # Memory access with base and 32bit offset |
| + else: # mod_field == 2 |
| + bytes_text = '{:02x} 00 00 00 00'.format(modrm) |
| + rm_text = '0x0({})'.format(base_text) |
| + return bytes_text, rm_text, base_text |
| + |
| + |
| +def SIBMemoryOperand(modrm, sib, base, index): |
| + """Creates replacement tuples list for register-to-memory instructions |
| + (base only, no SIB) |
| + |
| + Args: |
| + modrm: modrm byte |
| + base: register kind for base |
| + Returns: |
| + bytes_text: replacement for "bytes" group |
| + rm_text: textual representation of "rm" argument |
| + base_text: textual representation of "base" register |
| + index_text: textual representation of "index" register |
| + """ |
| + mod_field = (modrm >> 6) & 0x03 |
| + scale_field = (sib >> 6) & 0x03 |
| + index_field = (sib >> 3) & 0x07 |
| + base_field = (sib & 0x07) |
| + index_text = '%' + INDEXES[index][index_field] |
| + base_text = '%' + REGISTERS[base][base_field] |
| + scale_text = str(1 << scale_field) |
| + # If BASE is %rbp and MOD == 0 then index with 32bit offset is used |
| + if mod_field == 0 and base_field == validator.REG_RBP: |
| + bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib) |
| + # In 64-bit mode this case is displayed as simple absolute address |
| + # In 32-bit mode there are another, shorter, form, but it's used |
|
halyavin
2013/11/18 14:13:10
Remove this comment since it confuses the reader.
khim
2013/11/19 09:26:48
Done.
|
| + # for %rip-relative addressing in 64-bit mode |
| + if (options.bitness == 64 and |
| + index_text == '%riz' and |
| + scale_text == '1'): |
| + rm_text = '0x0' |
| + else: |
| + rm_text = '0x0(,{},{})'.format(index_text, scale_text) |
| + # There are no base in this case |
| + base_text = '%eiz' if options.bitness == 32 else '%riz' |
| + # Memory access with base and index (no offset) |
| + elif mod_field == 0: |
| + bytes_text = '{:02x} {:02x}'.format(modrm, sib) |
| + rm_text = '({},{},{})'.format(base_text, index_text, scale_text) |
| + # Memory access with base, index and 8bit offset |
| + elif mod_field == 1: |
| + bytes_text = '{:02x} {:02x} 00'.format(modrm, sib) |
| + rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text) |
| + # Memory access with base, index and 32bit offset |
| + elif mod_field == 2: |
| + bytes_text = '{:02x} {:02x} 00 00 00 00'.format(modrm, sib) |
| + rm_text = '0x0({},{},{})'.format(base_text, index_text, scale_text) |
| + # Pretty-printing of access via %rsp (or %r12) |
| + if (base_field == validator.REG_RSP and |
| + index_text in ('%eiz', '%riz') and |
| + scale_text == '1'): |
| + if mod_field == 0: # no offset |
| + rm_text = '({})'.format(base_text) |
| + else: # 8-bit or 32-bit offset |
| + rm_text = '0x0({})'.format(base_text) |
| + return bytes_text, rm_text, base_text, index_text |
| + |
| + |
| +def ModRMMemoryReplacements(reg=None, writes_to='rm', opcode_bits=0, |
| + memory_accessed=True, register_write='ignore', |
| + base_r8=False, index_r8=False): |
| + """Creates replacement tuples list for register-to-memory instructions |
| + |
| + Args: |
| + rm: rm operand kind (see REGISTERS array) |
| + reg: reg operand kind (see REGISTERS array) or None if reg is not used |
| + writes_to: three-state selector |
| + 'reg' - instruction uses rm as source, reg as destination |
| + 'rm' - instruction uses reg as source, rm as destination |
| + 'both' - instruction writes to both reg and rm |
| + opcode_bits: opcode extensions code (used when reg is None) |
| + memory_accessed: True if instruction accesses memory |
| + register_write: three-state selector |
| + 'sandbox' - instruction can be used to produce "restricted register" |
| + 'protect' - instruction can damage output, protect "special registers" |
| + 'ignore' - instruction does not affect it's operands (e.g. test) or |
| + is used with non-GP registers (X87, MMX, XMM, etc) |
| + index_r8: True if REX.X bit in the instruction set to 1 |
| + |
| + Returns: |
| + List of replacement tuples |
| + """ |
| + # Reg field can be used either as reg or as opcode extension, but not both |
| + assert reg is None or opcode_bits == 0 |
| + |
| + output_key = (options.bitness, reg, writes_to, opcode_bits, |
| + base_r8, index_r8, memory_accessed, register_write) |
| + if output_key in ModRMMemoryReplacements.replacements: |
| + return ModRMMemoryReplacements.replacements[output_key] |
| + |
| + if options.bitness == 32: |
| + base = 'eax' |
| + index = 'eax' |
| + else: |
| + base = 'r8' if base_r8 else 'rax' |
| + index = 'r8' if index_r8 else 'rax' |
| + |
| + replacements = [] |
| + |
| + # Two upper bits of ModR/M byte (mod field) must be equal to 00, 01, or 10 |
| + # This gives us range from 0x00 to 0xbf but we are going from the end to make |
| + # rejection faster (%r15 is equal to 0x7 and %rbp is 0x5). |
| + if reg is None: |
| + # reg field is used as opcode extension |
| + byte_range = [byte |
| + for byte in range(0xbf, -1, -1) |
| + if (byte >> 3) & 0x7 == opcode_bits] |
| + else: |
| + byte_range = range(0xbf, -1, -1) |
| + |
| + for modrm in byte_range: |
| + # If RM field != %rsp then there are no SIB byte |
| + if (modrm & 0x07) != validator.REG_RSP: |
| + bytes_text, rm_text, base_text = BaseOnlyMemoryOperand(modrm, base) |
| + replacement = [bytes_text] |
| + input, output = AppendOperandsReplacement( |
| + replacement, rm_text, reg, modrm, writes_to) |
| + if options.bitness == 64: |
| + replacement.append('any_nonspecial') |
| + # xchg with memory can not be used to sandbox it's operand, only |
| + # instriuction which explicitly writes to reg operand can do that |
| + if writes_to == 'reg' and register_write == 'sandbox': |
|
halyavin
2013/11/19 08:57:50
The same as below
khim
2013/11/19 09:26:48
Done.
|
| + replacement.append(output) |
| + else: |
| + replacement.append(None) |
| + if InstructionIsDangerous(input, output, register_write, writes_to, |
| + memory_accessed, base_text): |
| + continue |
| + replacement = tuple(replacement) |
| + replacements.append(replacement) |
|
halyavin
2013/11/19 08:57:50
We wanted to join this lines.
khim
2013/11/19 09:26:48
Done.
|
| + else: |
| + # If RM field == %rsp then we have SIB byte |
| + for sib in xrange(0x100): |
| + bytes_text, rm_text, base_text, index_text = SIBMemoryOperand( |
| + modrm, sib, base, index) |
| + replacement = [bytes_text] |
| + input, output = AppendOperandsReplacement( |
| + replacement, rm_text, reg, modrm, writes_to) |
| + if options.bitness == 64: |
| + if not memory_accessed or index_text == '%riz': |
| + replacement.append('any_nonspecial') |
| + else: |
| + if index_r8: |
| + # Convert %r8 to %r8d, %r9 to %r9d, etc |
| + replacement.append(index_text + 'd') |
| + else: |
| + # Convert %rax to %eax, %rsp to %esp, etc |
| + replacement.append('%e' + index_text[2:]) |
| + # xchg with memory can not be used to sandbox it's operand, only |
|
halyavin
2013/11/18 14:13:10
Move comment to the else clause.
khim
2013/11/19 09:26:48
Done.
|
| + # instruction which explicitly writes to reg operand can do that |
| + if writes_to == 'reg' and register_write == 'sandbox': |
|
halyavin
2013/11/18 14:13:10
Add comment that writes_to == 'reg' means that out
khim
2013/11/19 09:26:48
Done.
|
| + replacement.append(output) |
| + else: |
| + replacement.append(None) |
| + if InstructionIsDangerous(input, output, register_write, writes_to, |
| + memory_accessed, base_text, index_text): |
| + continue |
| + replacements.append(tuple(replacement)) |
| + ModRMMemoryReplacements.replacements[output_key] = tuple(replacements) |
| + return ModRMMemoryReplacements.replacements[output_key] |
| +ModRMMemoryReplacements.replacements = {} |
| + |
| + |
| +def PrepareCompressors(): |
| + global compressors |
| + global main_compressors |
| + global register_compressors |
| + global memory_compressors |
| + |
| + # "Larger" compressors should be tried first, then "smaller" ones. |
| + main_compressors = [] |
| + register_compressors = [] |
| + memory_compressors = [] |
| + extra_compressors = [] |
| + |
| + # Map from "REX bit off" group of registers to "REX bit on" group of registers |
| + r8 = { |
| + 'al': 'r8b', |
| + 'ax': 'r8w', |
| + 'eax': 'r8d', |
| + 'rax': 'r8', |
| + 'mm0': 'mmalt', |
| + 'xmm0': 'xmm8', |
| + 'ymm0': 'ymm8' |
| + } |
| + |
| + if options.bitness == 32: |
| + register_kinds = ('al', 'ax', 'eax', 'mm0', 'xmm0', 'ymm0') |
| + register_kind_pairs = ( |
| + ( 'al', 'al'), |
| + ( 'ax', 'al'), |
| + ( 'ax', 'ax'), |
| + ( 'eax', 'al'), |
| + ( 'eax', 'ax'), |
| + ( 'eax', 'eax'), |
| + ( 'eax', 'mm0'), |
| + ( 'mm0', 'eax'), |
| + ( 'eax', 'xmm0'), |
| + ('xmm0', 'eax'), |
| + ( 'mm0', 'mm0'), |
| + ( 'mm0', 'xmm0'), |
| + ('xmm0', 'mm0'), |
| + ('xmm0', 'xmm0'), |
| + ('xmm0', 'ymm0'), |
| + ('ymm0', 'xmm0'), |
| + ('ymm0', 'ymm0') |
| + ) |
| + else: |
| + register_kinds = ('al', 'spl', 'ax', 'eax', 'rax', 'mm0', 'xmm0', 'ymm0', |
| + 'r8b', 'r8w', 'r8d', 'r8', 'mmalt', 'xmm8', 'ymm8') |
| + register_kind_pairs = ( |
| + ( 'al', 'al'), |
| + ( 'spl', 'spl'), ( 'spl', 'r8b'), ( 'r8b', 'spl'), ( 'r8b', 'r8b'), |
| + ( 'ax', 'al'), |
| + ( 'ax', 'spl'), ( 'ax', 'r8b'), ( 'r8w', 'spl'), ( 'r8w', 'r8b'), |
| + ( 'ax', 'ax'), ( 'ax', 'r8w'), ( 'r8w', 'ax'), ( 'r8w', 'r8w'), |
| + ( 'eax', 'al'), |
| + ( 'eax', 'spl'), ( 'eax', 'r8b'), ( 'r8d', 'spl'), ( 'r8d', 'r8b'), |
| + ( 'eax', 'ax'), ( 'eax', 'r8w'), ( 'r8d', 'ax'), ( 'r8d', 'r8w'), |
| + ( 'eax', 'eax'), ( 'eax', 'r8d'), ( 'r8d', 'eax'), ( 'r8d', 'r8d'), |
| + ( 'rax', 'al'), |
| + ( 'rax', 'spl'), ( 'rax', 'r8b'), ( 'r8', 'spl'), ( 'r8', 'r8b'), |
| + ( 'rax', 'ax'), ( 'rax', 'r8w'), ( 'r8', 'ax'), ( 'r8', 'r8w'), |
| + ( 'rax', 'eax'), ( 'rax', 'r8d'), ( 'r8', 'eax'), ( 'r8', 'r8d'), |
| + ( 'rax', 'rax'), ( 'rax', 'r8'), ( 'r8', 'rax'), ( 'r8', 'r8'), |
| + ( 'eax', 'mm0'), ( 'eax','mmalt'), ( 'r8d', 'mm0'), ( 'eax', 'mmalt'), |
| + ( 'rax', 'mm0'), ( 'rax','mmalt'), ( 'r8', 'mm0'), ( 'r8', 'mmalt'), |
| + ( 'mm0', 'eax'), ('mmalt', 'eax'), ( 'mm0', 'r8d'), ('mmalt', 'r8d'), |
| + ( 'mm0', 'rax'), ('mmalt', 'rax'), ( 'mm0', 'r8'), ('mmalt', 'r8'), |
| + ( 'eax', 'xmm0'), ( 'eax', 'xmm8'), ( 'r8d', 'xmm0'), ( 'r8d', 'xmm8'), |
| + ( 'rax', 'xmm0'), ( 'rax', 'xmm8'), ( 'r8', 'xmm0'), ( 'r8', 'xmm8'), |
| + ('xmm0', 'eax'), ('xmm0', 'r8d'), ('xmm8', 'eax'), ('xmm8', 'r8d'), |
| + ('xmm0', 'rax'), ('xmm0', 'r8'), ('xmm8', 'rax'), ('xmm8', 'r8'), |
| + ( 'mm0', 'mm0'), ('mmalt', 'mm0'), ( 'mm0','mmalt'), ('mmalt','mmalt'), |
| + ( 'mm0', 'xmm0'), ('mmalt','xmm0'), ( 'mm0', 'xmm8'), ('mmalt', 'xmm8'), |
| + ('xmm0', 'mm0'), ('xmm8', 'mm0'), ('xmm0','mmalt'), ('xmm8', 'mmalt'), |
| + ('xmm0', 'xmm0'), ('xmm0', 'xmm8'), ('xmm8', 'xmm0'), ('xmm8', 'xmm8'), |
| + ('xmm0', 'ymm0'), ('xmm0', 'ymm8'), ('xmm8', 'ymm0'), ('xmm8', 'ymm8'), |
| + ('ymm0', 'xmm0'), ('ymm0', 'xmm8'), ('ymm8', 'xmm0'), ('ymm8', 'xmm8'), |
| + ('ymm0', 'ymm0'), ('ymm0', 'ymm8'), ('ymm8', 'ymm0'), ('ymm8', 'ymm8') |
| + ) |
| + |
| + # Largest compressors: both reg and rm fields are used |
| + for reg, rm in register_kind_pairs: |
| + start_reg = REGISTERS[reg][0] |
| + end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2] |
| + start_rm = REGISTERS[rm][0] |
| + end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2] |
| + instruction_kinds = [ |
| + # Normal instructions with two operands (rm to reg) |
| + ({'writes_to':'reg'}, '', ' # rm to reg', ''), |
| + # Normal instructions with two operands (reg to rm) |
| + ({'writes_to':'rm'}, '', ' # reg to rm', '') |
| + ] |
| + # Lea in 64 bit mode is truly unique instruction for now |
| + if options.bitness == 64 and reg in ('eax', 'r8d', 'rax', 'r8'): |
| + instruction_kinds = [ |
| + ({'writes_to':'reg', 'memory_accessed':False, |
| + 'register_write':'sandbox' if reg in ('eax', 'r8d') else 'protect'}, |
| + ' # lea', ' # rm to reg; lea', ' # lea')] + instruction_kinds |
| + # There are few more forms in 64 bit case (rm to reg) |
| + if options.bitness == 64 and reg in ('eax', 'r8d'): |
| + # Zero-extending version. |
| + instruction_kinds.append( |
| + ({'writes_to':'reg', 'register_write':'sandbox'}, |
| + '', ' # rm to reg', '')) |
| + # More forms in 64 bit case (reg to rm) |
| + if options.bitness == 64 and rm in ('eax', 'r8d'): |
| + # Zero-extending version. |
| + instruction_kinds.append( |
| + ({'writes_to':'rm', 'register_write':'sandbox'}, |
| + '', ' # reg to rm', '')) |
| + # Zero-extending xchg/xadd |
| + instruction_kinds.append( |
| + ({'writes_to':'both', 'register_write':'sandbox'}, |
| + ' # write to both', |
| + ' # reg to rm; write to both', |
| + ' # write to both')) |
| + # Still more forms for 64 bit case (rm to reg). |
| + if options.bitness == 64 and reg in ('al', 'spl', 'ax', 'eax', 'rax', |
| + 'r8b', 'r8w', 'r8d', 'r8'): |
| + # Dangerous instructions (rm to reg) |
| + instruction_kinds.append( |
| + ({'writes_to':'reg', 'register_write':'protect'}, |
| + '', ' # rm to reg', '')) |
| + # Still more forms for 64 bit case (reg to rm) |
| + if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax', |
| + 'r8b', 'r8w', 'r8d', 'r8'): |
| + # Dangerous instructions (reg to rm) |
| + instruction_kinds.append( |
| + ({'writes_to':'rm', 'register_write':'protect'}, |
| + '', ' # reg to rm', '')) |
| + # Dangerous xchg/xadd |
| + instruction_kinds.append( |
| + ({'writes_to':'both', 'register_write':'protect'}, |
| + ' # write to both', |
| + ' # reg to rm; write to both', |
| + ' # write to both')) |
| + # 3DNow! instructions |
| + instruction_kinds.append( |
| + ({'writes_to':'reg', '3dnow':'yes'}, '', ' # rm to reg', '')) |
| + for args, notes, notes_register, notes_memory in instruction_kinds: |
| + regex = '(?: 00)*' |
| + # Additional byte is opcode extension with 3DNow! instructions. |
| + if '3dnow' in args: |
| + regex = ' [0-9a-fA-F][0-9a-fA-F]' |
| + args.pop('3dnow') |
| + regex += ' (?:lock )?\\w* (?:\\$0x0,|\\$0x0,\\$0x0,|%cl,|%xmm0,)?' |
| + # We only need to process ModR/M+SIB '04 04' or '04 07' here |
| + if options.bitness == 32: |
| + regex_mem = '\\(%esp,%eax,1\\)' |
| + else: |
| + regex_mem = '\\((?:%rsp|%r15),(?:%rax|%r8),1\\)' |
| + output = None |
| + output_note = None |
| + if args['writes_to'] == 'reg': |
| + regex += '(%' + REGISTERS[rm][0] + '|' + regex_mem + ')' |
| + regex += ',(%' + REGISTERS[reg][0] + ')' |
| + if 'register_write' in args and args['register_write'] == 'sandbox': |
| + assert reg in ('eax', 'r8d') |
| + output = '%' + reg + '|None' |
| + output_note = '[%eax..%edi]' if reg == 'eax' else '[%r8d..%r14d]' |
| + subst = ( |
| + 'XX', '[%{}..%{} or memory]'.format(start_rm, end_rm), |
| + '[%{}..%{}]'.format(start_reg, end_reg), notes) |
| + subst_register = ( |
| + 'XX', '[%{}..%{}]'.format(start_rm, end_rm), |
| + '[%{}..%{}]'.format(start_reg, end_reg), notes_register) |
| + subst_memory = ( |
| + 'XX', '[memory]', |
| + '[%{}..%{}]'.format(start_reg, end_reg), notes_memory) |
| + else: |
| + regex += '(%' + REGISTERS[reg][0] + ')' |
| + regex += ',(%' + REGISTERS[rm][0] + '|' + regex_mem + ')' |
| + if 'register_write'in args and args['register_write'] == 'sandbox': |
| + assert rm in ('eax', 'r8d') |
| + output = '%' + rm + '|None' |
| + output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]' |
| + subst = ( |
| + 'XX', '[%{}..%{}]'.format(start_reg, end_reg), |
| + '[%{}..%{} or memory]'.format(start_rm, end_rm), notes) |
| + subst_register = ( |
| + 'XX', '[%{}..%{}]'.format(start_reg, end_reg), |
| + '[%{}..%{}]'.format(start_rm, end_rm), notes_register) |
| + subst_memory = ( |
| + 'XX', '[%{}..%{}]'.format(start_reg, end_reg), |
| + '[memory]', notes_memory) |
| + regex += '.*' |
| + if options.bitness == 64: |
| + regex += '; input_rr=(%eax|%r8d|any_nonspecial)' |
| + regex += '; output_rr=({})'.format(output) |
| + if 'memory_accessed' in args: |
| + input_note = 'any_nonspecial' |
| + input_note_r8 = 'any_nonspecial' |
| + else: |
| + input_note = '[%eax..%edi]' |
| + input_note_r8 = '[%r8d..%r15d]' |
| + subst_r8 = subst[0:-1] + (input_note_r8, output_note) + subst[-1:] |
| + subst = subst[0:-1] + (input_note, output_note) + subst[-1:] |
| + subst_memory_r8 = subst_memory[0:-1] + ( |
| + input_note_r8, output_note) + subst_memory[-1:] |
| + subst_memory = subst_memory[0:-1] + ( |
| + input_note, output_note) + subst_memory[-1:] |
| + subst_register = subst_register[0:-1] + ( |
| + 'any_nonspecial', output_note) + subst_register[-1:] |
| + regex += '()' |
| + base_r8 = rm in r8.values() |
| + memory_replacement = ModRMMemoryReplacements( |
| + reg=reg, base_r8=base_r8, **args) |
| + memory_compressors.append(Compressor( |
| + '.*?(04 0[47])' + regex, subst_memory, memory_replacement)) |
| + if options.bitness == 64: |
| + memory_replacement_r8 = ModRMMemoryReplacements( |
| + reg=reg, base_r8=base_r8, index_r8=True, **args) |
| + memory_compressors.append(Compressor( |
| + '.*?(04 0[47])' + regex, subst_memory_r8, memory_replacement_r8)) |
| + # Instructions with no memory access are instructions which are doing |
| + # something with memory address (e.g. lea) and as such they don't have |
| + # non-memory forms. |
| + if not 'memory_accessed' in args: |
| + register_replacement = ModRMRegisterReplacements(rm=rm, reg=reg, **args) |
| + register_compressors.append(Compressor( |
| + '.*?(c0)' + regex, subst_register, register_replacement)) |
| + main_replacement = register_replacement + memory_replacement |
| + main_compressors.append(Compressor( |
| + '.*?(04 0[47])' + regex, subst, main_replacement)) |
| + if options.bitness == 64: |
| + main_replacement_r8 = register_replacement + memory_replacement_r8 |
| + main_compressors.append(Compressor( |
| + '.*?(04 0[47])' + regex, subst_r8, main_replacement_r8)) |
| + |
| + # Smaller compressors: only rm field is used. |
| + for rm in register_kinds: |
| + start_rm = REGISTERS[rm][0] |
| + end_rm = REGISTERS[rm][-1 if rm[0:2] != 'r8' else -2] |
| + for opcode_bits in xrange(8): |
| + XX_byte_mark = 'XX/' + str(opcode_bits) |
| + instruction_kinds = [ |
| + # The most basic form |
| + ({}, '', '', '') |
| + ] |
| + if options.bitness == 64: |
| + # No memory access (e.g. prefetch) |
| + instruction_kinds = [ |
| + ({'memory_accessed':False}, '', '', '')] + instruction_kinds |
| + # More forms in 64 bit case. |
| + if options.bitness == 64 and rm in ('eax', 'r8d'): |
| + # Zero-extending version. |
| + instruction_kinds.append( |
| + ({'register_write':'sandbox'}, '', '', '')) |
| + # Still more forms for 64 bit case (reg to rm). |
| + if options.bitness == 64 and rm in ('al', 'spl', 'ax', 'eax', 'rax', |
| + 'r8b', 'r8w', 'r8d', 'r8'): |
| + # Dangerous instructions. |
| + instruction_kinds.append( |
| + ({'register_write':'protect'}, '', '', '')) |
| + for args, notes, notes_register, notes_memory in instruction_kinds: |
| + subst = (XX_byte_mark, '[%{}..%{} or memory]'.format(start_rm, end_rm), |
| + notes) |
| + subst_register = (XX_byte_mark, '[%{}..%{}]'.format(start_rm, end_rm), |
| + notes_register) |
| + subst_memory = (XX_byte_mark, '[memory]', |
| + notes_memory) |
| + regex = ('(?: 00)* (?:lock )?\\w* (?:\\$0x0,|%cl,)?' |
| + '(%' + REGISTERS[rm][0] + '|' + regex_mem + ').*') |
| + output = None |
| + output_note = None |
| + if options.bitness == 64: |
| + if 'register_write' in args and args['register_write'] == 'sandbox': |
| + assert rm in ('eax', 'r8d') |
| + output = '%' + rm + '|None' |
| + output_note = '[%eax..%edi]' if rm == 'eax' else '[%r8d..%r14d]' |
| + regex += '; input_rr=(%eax|%r8d|any_nonspecial)' |
| + regex += '; output_rr=({})'.format(output) |
| + if 'memory_accessed' in args: |
| + input_note = 'any_nonspecial' |
| + input_note_r8 = 'any_nonspecial' |
| + else: |
| + input_note = '[%eax..%edi]' |
| + input_note_r8 = '[%r8d..%r15d]' |
| + subst_r8 = subst[0:-1] + (input_note_r8, output_note) + subst[-1:] |
| + subst = subst[0:-1] + (input_note, output_note) + subst[-1:] |
| + subst_memory_r8 = subst_memory[0:-1] + ( |
| + input_note_r8, output_note) + subst_memory[-1:] |
| + subst_memory = subst_memory[0:-1] + ( |
| + input_note, output_note) + subst_memory[-1:] |
| + subst_register = subst_register[0:-1] + ( |
| + 'any_nonspecial', output_note) + subst_register[-1:] |
| + regex += '()' |
| + base_r8 = rm in r8.values() |
| + memory_replacement = ModRMMemoryReplacements( |
| + reg=None, base_r8=base_r8, opcode_bits=opcode_bits, **args) |
| + memory_compressors.append(Compressor( |
| + '.*?({:02x} 0[47])'.format(0x04 + opcode_bits * 8) + regex, |
| + subst_memory, memory_replacement)) |
| + if options.bitness == 64: |
| + memory_replacement_r8 = ModRMMemoryReplacements( |
| + reg=None, base_r8=base_r8, index_r8=True, opcode_bits=opcode_bits, |
| + **args) |
| + memory_compressors.append(Compressor( |
| + '.*?({:02x} 0[47])'.format(0x04 + opcode_bits * 8) + regex, |
| + subst_memory_r8, memory_replacement_r8)) |
| + # Instructions with no memory access are instructions which are doing |
| + # something with memory address (e.g. prefetch) and as such they don't |
| + # have non-memory forms. |
| + if not 'memory_accessed' in args: |
| + register_replacement = ModRMRegisterReplacements( |
| + reg=None, rm=rm, opcode_bits=opcode_bits, **args) |
| + register_compressors.append(Compressor( |
| + '.*?({:02x})'.format(0xc0 + opcode_bits * 8) + regex, |
| + subst_register, register_replacement)) |
| + main_replacement = register_replacement + memory_replacement |
| + main_compressors.append(Compressor( |
| + '.*?({:02x} 0[47])'.format(0x04 + opcode_bits * 8) + regex, |
| + subst, main_replacement)) |
| + if options.bitness == 64: |
| + main_replacement_r8 = register_replacement + memory_replacement_r8 |
| + main_compressors.append(Compressor( |
| + '.*?({:02x} 0[47])'.format(0x04 + opcode_bits * 8) + regex, |
| + subst_r8, main_replacement_r8)) |
| + |
| + # Even smaller compressors: only low 3 bits of opcode are used. |
| + for reg in register_kinds + ('st(0)',): |
| + start_reg = REGISTERS[reg][0] |
| + end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2] |
| + for opcode in xrange(8): |
| + for text1, text2, nibble in ( |
| + ('[0..7]', '[8..f]', xrange(8)), |
| + ('[012367]', '[89abef]', (0, 1, 2, 3, 6, 7)), |
| + ('[0..6]', '[8..e]', xrange(7)) |
| + ): |
| + # Note that we use 2nd line here to avoid ambiguity when opcode is 0x00 |
| + extra_compressors.append(Compressor( |
| + '.*?[0-9a-fA-F](1)(?: 00)*' |
| + ' \\w* (?:\\$0x0,|%ax,|%st,)?' |
| + '(%(?:' + REGISTERS_RE[reg][1] + ')).*()', |
| + (text1, '[%{}..%{}]'.format(start_reg, end_reg), ''), |
| + tuple(('{:x}'.format(n), '%' + REGISTERS[reg][n]) |
| + for n in nibble))) |
| + extra_compressors.append(Compressor( |
| + '.*?[0-9a-fA-F](8)(?: 00)*' |
| + ' \\w* (?:\\$0x0,|%ax,|%st,)?' |
| + '(%(?:' + REGISTERS_RE[reg][0] + ')).*()', |
| + (text2, '[%{}..%{}]'.format(start_reg, end_reg), ''), |
| + tuple(('{:x}'.format(n + 8), '%' + REGISTERS[reg][n]) |
| + for n in nibble))) |
| + # Another version for 64 bit case |
| + if options.bitness == 64 and reg in ('eax', 'r8d'): |
| + extra_compressors.append(Compressor( |
| + '.*?[0-9a-fA-F](1)(?: 00)*' |
| + ' \\w* (?:\\$0x0,|%ax,|%st,)?' |
| + '(%(?:' + REGISTERS_RE[reg][1] + ')).*' |
| + 'output_rr=(%(?:'+ REGISTERS_RE[reg][1] + ')).*()', |
| + tuple([text1] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 + |
| + ['']), |
| + tuple(['{:x}'.format(n)] + ['%' + REGISTERS[reg][n]] * 2 |
| + for n in nibble))) |
| + extra_compressors.append(Compressor( |
| + '.*?[0-9a-fA-F](8)(?: 00)*' |
| + ' \\w* (?:\\$0x0,|%ax,|%st,)?' |
| + '(%(?:' + REGISTERS_RE[reg][0] + ')).*' |
| + 'output_rr=(%(?:'+ REGISTERS_RE[reg][0] + ')).*()', |
| + tuple([text2] + ['[%{}..%{}]'.format(start_reg, end_reg)] * 2 + |
| + ['']), |
| + tuple(['{:x}'.format(n + 8)] + ['%' + REGISTERS[reg][n]] * 2 |
| + for n in nibble))) |
| + compressors = (main_compressors + memory_compressors + register_compressors + |
| + extra_compressors) |
| + |
| + # Special compressors: will handle some cosmetic issues. |
| + # |
| + # SETxx ignores reg field and thus are described as many separate instructions |
| + compressors.append(Compressor( |
| + '.*0f 9[0-9a-fA-F] XX(/[0-7]) set.*()', ('', ''), |
| + [('/' + str(i), ) for i in range(8)])) |
| + # BSWAP is described with opcode "0f c8+r", not "0f /1" in manual |
| + if options.bitness == 32: |
| + compressors.append(Compressor( |
| + '.*(XX/1) bswap.*ax.*()', ('c[8..f]', ''), [('XX/1', )])) |
| + else: |
| + compressors.append(Compressor( |
| + '.*(XX/1) bswap.*ax.*()', ('c[89abef]', ''), [('XX/1', )])) |
| + compressors.append(Compressor( |
| + '.*(XX/1) bswap.*r8.*()', ('c[8..e]', ''), [('XX/1', )])) |
| + # Add mark '# write to both' to certain versions of CMPXCHG, XADD, and XCHG |
| + if options.bitness == 64: |
| + compressors.append(Compressor( |
| + '.* (?:cmpxchg|xadd|xchg).*%al\\.\\.%bh[^#]*()$', |
| + (' # write to both', ), ((), ))) |
| + # "and $0xe0,[%eax..%edi]" is treated specially which means that we list all |
| + # versions of and "[$0x1..$0xff],[%eax..%edi]" separately here. |
| + # Without this rule these ands comprise 2/3 of the whole output! |
| + if options.bitness == 32: |
| + compressors.append(Compressor( |
| + '.*83 (e0 01 and \\$0x1,%eax)()', |
| + ('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', ' # special and'), |
| + [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS['eax'][r]), ) |
| + for i in range(0x01, 0x100) for r in range(8)] + |
| + [('XX/4 00 and[l]? $0x0,[%eax..%edi or memory]', )])) |
| + else: |
| + for reg in ('eax', 'r8d'): |
| + start_reg = REGISTERS[reg][0] |
| + end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2] |
| + for index_reg in ('eax', 'r8d'): |
| + start_index = REGISTERS[index_reg][0] |
| + end_index = REGISTERS[index_reg][-1] |
| + compressors.append(Compressor( |
| + '.*83 (e0 01 and \\$0x1,%' + reg + ').*' |
| + 'input_rr=(any_nonspecial); output_rr=(%' + reg + ')()', |
| + ('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg, |
| + end_reg), '[%{}..%{}]'.format(start_index, end_index), |
| + '[%{}..%{}]'.format(start_reg, end_reg), |
| + ' # special and'), |
| + [('e{} {:02x} and $0x{:x},%{}'.format(r, i, i, REGISTERS[reg][r]), |
| + 'any_nonspecial', '%' + REGISTERS[reg][r]) |
| + for i in range(0x01, 0x100) for r in range(7 + (reg == 'eax'))] + |
| + [('XX/4 00 and[l]? $0x0,[%{}..%{} or memory]'.format(start_reg, |
| + end_reg), '[%{}..%{}]'.format(start_index, end_index), |
| + '[%{}..%{}]'.format(start_reg, end_reg))])) |
| + |
| + # "and $e0" and similar are used to align %rsp. All negative values are |
| + # accepted by validator and there are 127 of these. |
| + # Consolidate them into one line. |
| + if options.bitness == 64: |
| + compressors.append(Compressor( |
| + '.*(?:81|83) (?:e4|e5) (80) (?:00 00 00 |) and \\$0x(80),%r[bs]p.*()', |
| + ('[80..ff]', '[80..ff]', ' # alignment and'), |
| + [('{:02x}'.format(i), '{:02x}'.format(i)) for i in range(0x80, 0x100)])) |
| + |
| + # Merge memory and non-memory access |
| + if options.bitness == 32: |
| + letters_and_registers = (('b', 'al', ''), ('w', 'ax', ''), ('l', 'eax', '')) |
| + else: |
| + letters_and_registers = ( |
| + ('b', 'al', 'eax'), ('b', 'spl', 'eax'), ('b', 'r8b', 'r8d'), |
| + ('w', 'ax', 'eax'), ('w', 'r8w', 'r8d'), |
| + ('l', 'eax', 'eax'), ('l', 'r8d', 'r8d'), |
| + ('q', 'rax', 'eax'), ('q', 'r8', 'r8d') |
| + ) |
| + for letter, reg, out_reg in letters_and_registers: |
| + start_reg = REGISTERS[reg][0] |
| + end_reg = REGISTERS[reg][-1 if reg[0:2] != 'r8' else -2] |
| + all_regs = '[%{}..%{}]'.format(start_reg, end_reg) |
| + regs_mark = '[%{}..%{} or memory]'.format(start_reg, end_reg) |
| + if options.bitness == 64: |
| + start_out = REGISTERS[out_reg][0] |
| + end_out = REGISTERS[out_reg][-1 if out_reg[0:2] != 'r8' else -2] |
| + out_regs = '[%{}..%{}]'.format(start_out, end_out) |
| + for notes in ('', ' # rm to reg', ' # reg to rm'): |
| + compressors.append(Compressor( |
| + '.* \\w*(' + letter + ') .*(\\[memory]).*()()', |
| + ('[{}]?'.format(letter), regs_mark, '', ''), |
| + ((letter, '[memory]', ''), ('', all_regs, notes)))) |
| + if options.bitness == 64: |
| + for index_reg in ('eax', 'r8d'): |
| + start_index = REGISTERS[index_reg][0] |
| + end_index = REGISTERS[index_reg][-1] |
| + index_regs = '[%{}..%{}]'.format(start_index, end_index) |
| + for output_rrs in ((None, out_regs), (out_regs, None), (None, None)): |
| + compressors.append(Compressor( |
| + '.* \\w*(' + letter + ') .*(\\[memory]).*; ' |
| + 'input_rr=(\\[%[a-z0-9]*..%[a-z0-9]*\\]); ' |
| + 'output_rr=(\\[%[a-z0-9]*..%[a-z0-9]*\\]|None)()()', |
| + ('[{}]?'.format(letter), regs_mark, index_regs, |
| + output_rrs[0] if output_rrs[0] is not None else output_rrs[1], |
| + '', ''), |
| + ((letter, '[memory]', index_regs, output_rrs[0], ''), |
| + ('', all_regs, 'any_nonspecial', output_rrs[1], notes)))) |
| + |
| + # REX compressors |
| + if options.bitness == 64: |
| + # First pretty complex set of compressors to combine versions of REX with |
| + # three lowest bits in different states. |
| + register_kind_pairs = ( |
| + ( None, None), |
| + ( 'al', 'al'), ( 'al', None), (None, 'al'), |
| + ( 'ax', 'al'), ( 'al', 'ax'), |
| + ( 'ax', 'ax'), ( 'ax', None), (None, 'ax'), |
| + ( 'eax', 'al'), ( 'al', 'eax'), |
| + ( 'eax', 'ax'), ( 'ax', 'eax'), |
| + ( 'eax', 'eax'), ( 'eax', None), (None, 'eax'), |
| + ( 'rax', 'al'), ( 'al', 'rax'), |
| + ( 'rax', 'ax'), ( 'ax', 'rax'), |
| + ( 'rax', 'eax'), ( 'eax', 'rax'), |
| + ( 'rax', 'rax'), ( 'rax', None), (None, 'rax'), |
| + ( 'eax', 'mm0'), ( 'mm0', 'eax'), |
| + ( 'rax', 'mm0'), ( 'mm0', 'rax'), |
| + ( 'mm0', 'eax'), ( 'eax', 'mm0'), |
| + ( 'mm0', 'rax'), ( 'rax', 'mm0'), |
| + ( 'eax', 'xmm0'), |
| + ( 'rax', 'xmm0'), |
| + ('xmm0', 'eax'), |
| + ('xmm0', 'rax'), |
| + ( 'mm0', 'mm0'), ( 'mm0', None), (None, 'mm0'), |
| + ( 'mm0', 'xmm0'), |
| + ('xmm0', 'mm0'), |
| + ('xmm0', 'xmm0'), |
| + ('xmm0', 'ymm0'), ('xmm0', None), (None, 'xmm0'), |
| + ('ymm0', 'xmm0'), |
| + ('ymm0', 'ymm0'), ('ymm0', None), (None, 'ymm0'), |
| + ) |
| + for reg, rm in register_kind_pairs: |
| + for last_reg, last_rm in ((-1, -1), (-1, -2), (-2, -1), (-2, -2)): |
| + if reg: |
| + start_reg = REGISTERS[reg][0] |
| + start_reg8 = REGISTERS[r8[reg]][0] |
| + end_reg = REGISTERS[reg][-1] |
| + end_reg0 = 'dil' if reg == 'al' else end_reg |
| + end_reg8 = REGISTERS[r8[reg]][last_reg] |
| + reg_regex = '\\[(%' + start_reg + '\\.\\.%' + end_reg + ')]' |
| + reg_regex0 = '\\[(%' + start_reg + '\\.\\.%' + end_reg0 + ')]' |
| + elif last_reg == -2: |
| + continue |
| + if rm: |
| + start_rm = REGISTERS[rm][0] |
| + start_rm8 = REGISTERS[r8[rm]][0] |
| + end_rm = REGISTERS[rm][-1] |
| + end_rm0 = 'dil' if rm == 'al' else end_rm |
| + end_rm8 = REGISTERS[r8[rm]][last_rm] |
| + rm_regex = ('\\[(%' + start_rm + '\\.\\.%' + end_rm + ')' |
| + '(?: or memory)?]') |
| + rm_regex0 = ('\\[(%' + start_rm + '\\.\\.%' + end_rm0 + ')' |
| + '(?: or memory)?]') |
| + elif last_rm == -2: |
| + continue |
| + for rexw in (True, False): |
| + for input_rr in (True, False): |
| + for output_rr in (True, False) if reg or rm else (None, ): |
| + for rm_to_reg in (True, False) if reg and rm else (None, ): |
| + # Legacy prefixes |
| + regex = '.*:(?: 26| 2e| 36| 3e| 64| 65| 66| 67| f0| f2| f3)*' |
| + # REX |
| + regex += '( 48).*' if rexw else '( 40|).*' |
| + # Replacement text |
| + replacement_tuple = ( |
| + ' [REX:48..4f]' if rexw else ' [REX:40..47]?', ) |
| + if reg: |
| + replacement_regs = '%{}..%{}'.format(start_reg, end_reg8) |
| + if rm: |
| + replacement_rms = '%{}..%{}'.format(start_rm, end_rm8) |
| + # Instruction arguments |
| + if not reg and not rm: |
| + pass |
| + elif not reg and rm: |
| + if rexw: |
| + regex += rm_regex0 + '.*' |
| + else: |
| + regex += rm_regex + '.*' |
| + replacement_tuple += (replacement_rms, ) |
| + elif reg and not rm: |
| + if rexw: |
| + regex += reg_regex0 + '.*' |
| + else: |
| + regex += reg_regex + '.*' |
| + replacement_tuple += (replacement_regs, ) |
| + elif rm_to_reg: |
| + if rexw: |
| + regex += rm_regex0 + ',' + reg_regex0 + '.*' |
| + else: |
| + regex += rm_regex + ',' + reg_regex + '.*' |
| + replacement_tuple += (replacement_rms, replacement_regs) |
| + else: |
| + if rexw: |
| + regex += reg_regex0 + ',' + rm_regex0 + '.*' |
| + else: |
| + regex += reg_regex + ',' + rm_regex + '.*' |
| + replacement_tuple += (replacement_regs, replacement_rms) |
| + # Input and output restricted registers |
| + if input_rr: |
| + regex += 'input_rr=\\[(%eax\\.\\.%edi)].*' |
| + replacement_tuple += ('%eax..%r15d', ) |
| + if output_rr: |
| + regex += 'output_rr=\\[(%eax\\.\\.%edi)].*' |
| + replacement_tuple += ('%eax..%r14d', ) |
| + regex += '()' |
| + replacement_tuple += ('', ) |
| + # Replacement cases |
| + replacement_tuples = () |
| + for byte in (range(0x48, 0x50) |
| + if rexw |
| + else range(0x40, 0x48) + ['']): |
| + replacement_case = ( |
| + ' {:02x}'.format(byte) if byte else byte, ) |
| + if byte: |
| + if rm: |
| + if byte & 0x1: |
| + replacement_rms = '%{}..%{}'.format(start_rm8, end_rm8) |
| + else: |
| + replacement_rms = '%{}..%{}'.format(start_rm, end_rm0) |
| + if byte & 0x2: |
| + replacement_index = '%r8d..%r15d' |
| + else: |
| + replacement_index = '%eax..%edi' |
| + if reg: |
| + if byte & 0x4: |
| + replacement_regs = '%{}..%{}'.format(start_reg8, |
| + end_reg8) |
| + else: |
| + replacement_regs = '%{}..%{}'.format(start_reg, |
| + end_reg0) |
| + else: |
| + if rm: |
| + replacement_rms = '%{}..%{}'.format(start_rm, end_rm) |
| + replacement_index = '%eax..%edi' |
| + if reg: |
| + replacement_regs = '%{}..%{}'.format(start_reg, end_reg) |
| + if not reg and not rm: |
| + pass |
| + elif not reg and rm: |
| + replacement_case += (replacement_rms, ) |
| + if byte: |
| + final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi' |
| + else: |
| + final_rr = '%eax..%edi' |
| + elif reg and not rm: |
| + replacement_case += (replacement_regs, ) |
| + if byte: |
| + final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi' |
| + else: |
| + final_rr = '%eax..%edi' |
| + elif rm_to_reg: |
| + replacement_case += (replacement_rms, replacement_regs) |
| + if byte: |
| + final_rr = '%r8d..%r14d' if byte & 0x4 else '%eax..%edi' |
| + else: |
| + final_rr = '%eax..%edi' |
| + else: |
| + replacement_case += (replacement_regs, replacement_rms) |
| + if byte: |
| + final_rr = '%r8d..%r14d' if byte & 0x1 else '%eax..%edi' |
| + else: |
| + final_rr = '%eax..%edi' |
| + if input_rr: replacement_case += (replacement_index, ) |
| + if output_rr: replacement_case += (final_rr, ) |
| + replacement_tuples += (replacement_case, ) |
| + compressors.append(Compressor( |
| + regex, replacement_tuple, replacement_tuples)) |
| + # This is pretty simple compressor to combine two lines with different REX.W |
| + # bits (only if they are otherwise identical). |
| + compressors.append(Compressor( |
| + '.*(\\[REX:40\\.\\.47]\\?).*()', ('[REX:40..4f]?', ''), |
| + (('[REX:40..47]?', ), ('[REX:48..4f]', )))) |
| + |
| + |
| +def ShowProgress(rule, instruction): |
| + if rule not in ShowProgress.rules_shown: |
| + first_print = True |
| + ShowProgress.rules_shown[rule]=len(ShowProgress.rules_shown) |
| + else: |
| + first_print = False |
| + print >> sys.stderr, '-------- Compressed --------' |
| + print >> sys.stderr, 'Rule:', ShowProgress.rules_shown[rule] |
| + print >> sys.stderr, '--------' |
| + compressor = compressors[rule] |
| + match = compressor.regex.match(instruction) |
| + assert match |
| + format_str = CompressionTemplate(instruction, match, '{{{}}}') |
| + replacements = sorted(format_str.format(*replacement) |
| + for replacement in compressor.replacements) |
| + if len(compressor.replacements) <= 4 or first_print: |
| + for replacement in replacements: |
| + print >> sys.stderr, replacement |
| + else: |
| + print >> sys.stderr, replacements[0] |
| + print >> sys.stderr, '...' |
| + print >> sys.stderr, replacements[-1] |
| + print >> sys.stderr, '--------' |
| + print >> sys.stderr, 'Compressed', ( |
| + format_str + '{{{}}}').format(*compressor.subst) |
| +ShowProgress.rules_shown = {} |
| + |
| + |
| +def main(): |
| + # We are keeping these global to share state graph and compressors |
| + # between workers spawned by multiprocess. Passing them every time is slow. |
| + global options, xml_file |
| + global dfa |
| + global worker_validator |
| + options, xml_file = ParseOptions() |
| + dfa = dfa_parser.ParseXml(xml_file) |
| + worker_validator = validator.Validator( |
| + validator_dll=options.validator_dll, |
| + decoder_dll=options.decoder_dll) |
| + PrepareCompressors() |
| + |
| + assert dfa.initial_state.is_accepting |
| + assert not dfa.initial_state.any_byte |
| + |
| + print >> sys.stderr, len(dfa.states), 'states' |
| + |
| + num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state) |
| + |
| + # We can't just write 'num_suffixes[dfa.initial_state]' because |
| + # initial state is accepting. |
| + total_instructions = sum( |
| + num_suffixes[t.to_state] |
| + for t in dfa.initial_state.forward_transitions.values()) |
| + print >> sys.stderr, total_instructions, 'regular instructions total' |
| + |
| + tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state) |
| + print >> sys.stderr, len(tasks), 'tasks' |
| + |
| + pool = multiprocessing.Pool() |
| + |
| + results = pool.imap(Worker, tasks) |
| + |
| + total = 0 |
| + num_valid = 0 |
| + full_output = set() |
| + for prefix, count, valid_count, output, trace in results: |
| + print >> sys.stderr, 'Prefix:', ', '.join(map(hex, prefix)) |
| + total += count |
| + num_valid += valid_count |
| + full_output |= output |
| + for rule, instruction in trace: |
| + ShowProgress(rule, instruction) |
| + for instruction in sorted(Compressed(full_output, |
| + compressors, |
| + ShowProgress)): |
| + print instruction |
| + |
| + print >> sys.stderr, total, 'instructions were processed' |
| + print >> sys.stderr, num_valid, 'valid instructions' |
| + |
| + |
| +if __name__ == '__main__': |
| + main() |