| Index: src/trusted/validator_ragel/verify_regular_instructions_old.py
|
| diff --git a/src/trusted/validator_ragel/verify_regular_instructions_old.py b/src/trusted/validator_ragel/verify_regular_instructions_old.py
|
| deleted file mode 100644
|
| index bc4cc420f630ca49c91c9f19d15914e62c7986af..0000000000000000000000000000000000000000
|
| --- a/src/trusted/validator_ragel/verify_regular_instructions_old.py
|
| +++ /dev/null
|
| @@ -1,480 +0,0 @@
|
| -#!/usr/bin/python
|
| -# Copyright (c) 2013 The Native Client Authors. All rights reserved.
|
| -# Use of this source code is governed by a BSD-style license that can be
|
| -# found in the LICENSE file.
|
| -
|
| -"""
|
| -Generate all acceptable regular instructions by traversing validator DFA
|
| -and run objdump, new and old validator on them.
|
| -"""
|
| -# TODO(shcherbina): get rid of this test once text-based specification is
|
| -# complete (https://code.google.com/p/nativeclient/issues/detail?id=3453).
|
| -
|
| -import itertools
|
| -import multiprocessing
|
| -import optparse
|
| -import os
|
| -import re
|
| -import subprocess
|
| -import sys
|
| -import tempfile
|
| -import traceback
|
| -
|
| -import dfa_parser
|
| -import dfa_traversal
|
| -import objdump_parser
|
| -import validator
|
| -
|
| -
|
| -FWAIT = 0x9b
|
| -NOP = 0x90
|
| -
|
| -
|
| -def IsRexPrefix(byte):
|
| - return 0x40 <= byte < 0x50
|
| -
|
| -
|
| -def Cached(f):
|
| - cache = {}
|
| - def CachedF(*args):
|
| - args = tuple(args)
|
| - if args not in cache:
|
| - cache[args] = f(*args)
|
| - return cache[args]
|
| - return CachedF
|
| -
|
| -
|
| -class AssemblerError(Exception):
|
| - pass
|
| -
|
| -
|
| -@Cached
|
| -def Assemble(bitness, asm):
|
| - # Instead of parsing object files properly, I put two distinct sequences,
|
| - # begin_mark and end_mark, around code of interest.
|
| - # I neglect possibility that they occur somewhere else in the file.
|
| - begin_mark = 'begin mark>>>'
|
| - end_mark = '<<<end mark'
|
| -
|
| - try:
|
| - obj_file = tempfile.NamedTemporaryFile(
|
| - mode='w+b',
|
| - suffix='.o',
|
| - delete=False)
|
| -
|
| - proc = subprocess.Popen(
|
| - [options.gas,
|
| - '--%s' % bitness,
|
| - '-o', obj_file.name],
|
| - stdin=subprocess.PIPE)
|
| -
|
| - asm_content = ''
|
| - for c in begin_mark:
|
| - asm_content += '.byte %d\n' % ord(c)
|
| - asm_content += '%s\n' % asm
|
| - for c in end_mark:
|
| - asm_content += '.byte %d\n' % ord(c)
|
| -
|
| - proc.communicate(asm_content)
|
| - return_code = proc.wait()
|
| - if return_code != 0:
|
| - raise AssemblerError("Can't assemble '%s'" % asm)
|
| -
|
| - data = obj_file.read()
|
| - obj_file.close()
|
| -
|
| - # Extract the data between begin_mark and end_mark.
|
| - begin = data.find(begin_mark)
|
| - assert begin != -1, 'begin_mark is missing'
|
| - begin += len(begin_mark)
|
| - end = data.find(end_mark, begin)
|
| - assert end != -1, 'end_mark is missing'
|
| - return map(ord, data[begin:end])
|
| -
|
| - finally:
|
| - os.remove(obj_file.name)
|
| -
|
| -
|
| -class OldValidator(object):
|
| - def __init__(self):
|
| - self._bundles = []
|
| - self._errors = []
|
| - pass
|
| -
|
| - def Validate(self, bundle, comment):
|
| - self._bundles.append((bundle, comment))
|
| -
|
| - if len(self._bundles) == 40:
|
| - self._Process()
|
| -
|
| - def _Process(self):
|
| - bytes = sum((instr for instr, _ in self._bundles), [])
|
| - hex_content = ' '.join('%02x' % byte for byte in bytes).replace('0x', '')
|
| -
|
| - assert len(hex_content) < 4096
|
| -
|
| - ncval = {32: options.ncval32, 64: options.ncval64}[options.bitness]
|
| - proc = subprocess.Popen(
|
| - [ncval, '--hex_text=-', '--max_errors=-1'],
|
| - stdin=subprocess.PIPE,
|
| - stdout=subprocess.PIPE)
|
| -
|
| - stdout, stderr = proc.communicate(hex_content)
|
| - return_code = proc.wait()
|
| - assert return_code == 0, (stdout, stderr)
|
| -
|
| - if '*** <input> is safe ***' in stdout:
|
| - self._bundles = []
|
| - return
|
| -
|
| - assert '*** <input> IS UNSAFE ***' in stdout
|
| -
|
| - rejected_bundles = set()
|
| - for line in stdout.split('\n'):
|
| - line = line.strip()
|
| - if line == '':
|
| - continue
|
| - if line == '*** <input> IS UNSAFE ***':
|
| - continue
|
| - if line == 'Some instructions were replaced with HLTs.':
|
| - continue
|
| - if line.startswith(
|
| - 'VALIDATOR: Checking block alignment and jump targets'):
|
| - continue
|
| - m = re.match(r'VALIDATOR: ([0-9a-f]+): (.*)$', line, re.IGNORECASE)
|
| - assert m is not None, (line, hex_content)
|
| - error_offset = int(m.group(1), 16)
|
| - rejected_bundles.add(error_offset // validator.BUNDLE_SIZE)
|
| -
|
| - assert len(rejected_bundles) != 0
|
| - for b in sorted(rejected_bundles):
|
| - _, comment = self._bundles[b]
|
| - self._errors.append(comment)
|
| -
|
| - self._bundles = []
|
| -
|
| - def GetErrors(self):
|
| - if len(self._bundles) > 0:
|
| - self._Process()
|
| - return self._errors
|
| -
|
| -
|
| -def CheckFinalRestrictedRegister(
|
| - sandboxing,
|
| - instruction,
|
| - disassembly,
|
| - old_validator):
|
| - bundle = sandboxing + instruction
|
| - assert len(bundle) <= validator.BUNDLE_SIZE
|
| - bundle += [NOP] * (validator.BUNDLE_SIZE - len(bundle))
|
| -
|
| - final_restricted_register = [None]
|
| -
|
| - def Callback(begin, end, info):
|
| - if begin == len(sandboxing):
|
| - assert end == len(sandboxing) + len(instruction)
|
| - final_restricted_register[0] = (
|
| - (info & validator.RESTRICTED_REGISTER_MASK) >>
|
| - validator.RESTRICTED_REGISTER_SHIFT)
|
| - elif begin > len(sandboxing):
|
| - assert bundle[begin:end] == [NOP]
|
| -
|
| - result = validator_inst.ValidateChunk(
|
| - ''.join(map(chr, bundle)),
|
| - bitness=options.bitness,
|
| - callback=Callback,
|
| - on_each_instruction=True)
|
| - assert result, (disassembly, map(hex, bundle))
|
| -
|
| - (final_restricted_register,) = final_restricted_register
|
| - if final_restricted_register == validator.NO_REG:
|
| - final_restricted_register = None
|
| -
|
| - assert final_restricted_register != validator.REG_R15, (
|
| - 'restricted register can not be r15')
|
| -
|
| - if final_restricted_register is not None:
|
| - register_name = validator.REGISTER_NAMES[final_restricted_register]
|
| - memory_reference = 'mov (%%r15, %s), %%al' % register_name
|
| - bundle = sandboxing + instruction + Assemble(64, memory_reference)
|
| - assert len(bundle) <= validator.BUNDLE_SIZE
|
| - bundle += [NOP] * (validator.BUNDLE_SIZE - len(bundle))
|
| -
|
| - assert validator_inst.ValidateChunk(
|
| - ''.join(map(chr, bundle)),
|
| - bitness=options.bitness), (bundle, disassembly, memory_reference)
|
| -
|
| - old_validator.Validate(
|
| - bundle,
|
| - (disassembly + '; ' + memory_reference, instruction))
|
| -
|
| -
|
| -def ValidateInstruction(
|
| - instruction,
|
| - disassembly,
|
| - old_validator):
|
| - assert len(instruction) <= validator.BUNDLE_SIZE
|
| - bundle = instruction + [NOP] * (validator.BUNDLE_SIZE - len(instruction))
|
| -
|
| - if options.bitness == 32:
|
| - result = validator_inst.ValidateChunk(
|
| - ''.join(map(chr, bundle)),
|
| - bitness=options.bitness)
|
| -
|
| - if result:
|
| - old_validator.Validate(bundle, (disassembly, instruction))
|
| -
|
| - return result
|
| -
|
| - else:
|
| - result = validator_inst.ValidateChunk(
|
| - ''.join(map(chr, bundle)),
|
| - bitness=options.bitness)
|
| - if result:
|
| - old_validator.Validate(bundle, (disassembly, instruction))
|
| - CheckFinalRestrictedRegister([], instruction, disassembly, old_validator)
|
| -
|
| - # Additionally, we try to restrict all possible
|
| - # registers and check whether instruction would be accepted.
|
| - for register, register_name in validator.REGISTER_NAMES.items():
|
| - if register == validator.REG_R15:
|
| - continue
|
| - if validator_inst.ValidateChunk(
|
| - ''.join(map(chr, bundle)),
|
| - bitness=options.bitness,
|
| - restricted_register=register):
|
| -
|
| - # %r8 -> %r8d
|
| - # %rax -> %eax
|
| - if re.match(r'%r\d+$', register_name):
|
| - register_name += 'd'
|
| - else:
|
| - assert register_name.startswith('%r')
|
| - register_name = '%e' + register_name[2:]
|
| -
|
| - sandboxing = 'mov %%eax, %s' % register_name
|
| - CheckFinalRestrictedRegister(
|
| - Assemble(64, sandboxing),
|
| - instruction,
|
| - sandboxing + '; ' + disassembly,
|
| - old_validator)
|
| - result = True
|
| -
|
| - return result
|
| -
|
| -
|
| -class WorkerState(object):
|
| - def __init__(self, prefix):
|
| - self.total_instructions = 0
|
| - self.num_valid = 0
|
| - self._file_prefix = 'check_validator_%s_' % '_'.join(map(hex, prefix))
|
| - self._instructions = []
|
| - self.errors = []
|
| -
|
| - def ReceiveInstruction(self, bytes):
|
| - self._instructions.append(bytes)
|
| -
|
| - # Objdump prints crazy stuff when x87 instructions are prefixed with
|
| - # fwait (especially when REX prefixes are involved). To avoid that,
|
| - # we insert nops after each fwait.
|
| - if (bytes == [FWAIT] or
|
| - len(bytes) == 2 and IsRexPrefix(bytes[0]) and bytes[1] == FWAIT):
|
| - self._instructions.append([NOP])
|
| -
|
| - if len(self._instructions) >= 1000000:
|
| - self.CheckReceivedInstructions()
|
| - self._instructions = []
|
| -
|
| - def CheckReceivedInstructions(self):
|
| - # Check instructions accumulated so far and clear the list.
|
| - if len(self._instructions) == 0:
|
| - return
|
| - try:
|
| - raw_file = tempfile.NamedTemporaryFile(
|
| - mode='wb',
|
| - prefix=self._file_prefix,
|
| - suffix='.o',
|
| - delete=False)
|
| - for instr in self._instructions:
|
| - raw_file.write(''.join(map(chr, instr)))
|
| - raw_file.close()
|
| -
|
| - objdump_proc = subprocess.Popen(
|
| - [options.objdump,
|
| - '--disassemble-all', '--disassemble-zeroes',
|
| - '-b', 'binary',
|
| - '-m', 'i386'] +
|
| - {32: [], 64: ['-M', 'x86-64']}[options.bitness] +
|
| - ['--insn-width', '15',
|
| - raw_file.name],
|
| - stdout=subprocess.PIPE)
|
| -
|
| - objdump_iter = iter(objdump_parser.SkipHeader(objdump_proc.stdout))
|
| -
|
| - old_validator = OldValidator()
|
| - for instr in self._instructions:
|
| - # Objdump prints fwait with REX prefix in this ridiculous way:
|
| - # 0: 41 fwait
|
| - # 1: 9b fwait
|
| - # So in such cases we expect two lines from objdump.
|
| - # TODO(shcherbina): get rid of this special handling once
|
| - # https://code.google.com/p/nativeclient/issues/detail?id=3496 is fixed.
|
| - if len(instr) == 2 and IsRexPrefix(instr[0]) and instr[1] == FWAIT:
|
| - expected_lines = 2
|
| - else:
|
| - expected_lines = 1
|
| -
|
| - bytes = []
|
| - for _ in range(expected_lines):
|
| - line = next(objdump_iter)
|
| - # Parse tab-separated line of the form
|
| - # 0: f2 40 0f 10 00 rex movsd (%rax),%xmm0
|
| - addr, more_bytes, disassembly = line.strip().split('\t')
|
| - more_bytes = [int(b, 16) for b in more_bytes.split()]
|
| - bytes += more_bytes
|
| -
|
| - assert bytes == instr, (map(hex, bytes), map(hex, instr))
|
| - self.total_instructions += 1
|
| -
|
| - self.num_valid += ValidateInstruction(instr, disassembly, old_validator)
|
| -
|
| - # Make sure we read objdump output to the end.
|
| - end = next(objdump_iter, None)
|
| - assert end is None, end
|
| -
|
| - return_code = objdump_proc.wait()
|
| - assert return_code == 0
|
| -
|
| - finally:
|
| - os.remove(raw_file.name)
|
| -
|
| - errors = old_validator.GetErrors()
|
| - for error in errors:
|
| - print error
|
| - self.errors += errors
|
| -
|
| -
|
| -def Worker((prefix, state_index)):
|
| - worker_state = WorkerState(prefix)
|
| -
|
| - try:
|
| - dfa_traversal.TraverseTree(
|
| - dfa.states[state_index],
|
| - final_callback=worker_state.ReceiveInstruction,
|
| - prefix=prefix,
|
| - anyfield=0)
|
| - worker_state.CheckReceivedInstructions()
|
| - except Exception as e:
|
| - traceback.print_exc() # because multiprocessing imap swallows traceback
|
| - raise
|
| -
|
| - return (
|
| - prefix,
|
| - worker_state.total_instructions,
|
| - worker_state.num_valid,
|
| - worker_state.errors)
|
| -
|
| -
|
| -def ParseOptions():
|
| - parser = optparse.OptionParser(usage='%prog [options] xmlfile')
|
| -
|
| - parser.add_option('--bitness',
|
| - type=int,
|
| - help='The subarchitecture: 32 or 64')
|
| - parser.add_option('--gas',
|
| - help='Path to GNU AS executable')
|
| - parser.add_option('--objdump',
|
| - help='Path to objdump executable')
|
| - parser.add_option('--validator_dll',
|
| - help='Path to librdfa_validator_dll')
|
| - parser.add_option('--ncval32',
|
| - help='Path to old 32-bit ncval')
|
| - parser.add_option('--ncval64',
|
| - help='Path to old 64-bit ncval')
|
| - parser.add_option('--errors',
|
| - help='Where to save errors')
|
| -
|
| - options, args = parser.parse_args()
|
| -
|
| - if options.bitness not in [32, 64]:
|
| - parser.error('specify -b 32 or -b 64')
|
| -
|
| - if not (options.gas and options.objdump and options.validator_dll):
|
| - parser.error('specify path to gas, objdump, and validator_dll')
|
| -
|
| - if not (options.ncval32 and options.ncval64):
|
| - parser.error('specify path to old validator (32-bit and 64-bit versions)')
|
| -
|
| - if not options.errors:
|
| - parser.errors('specify file to save errors to')
|
| -
|
| - if not os.path.exists(options.ncval32):
|
| - print options.ncval32, 'not found (try ./scons ncval platform=x86-32)'
|
| - sys.exit(1)
|
| - if not os.path.exists(options.ncval64):
|
| - print options.ncval64, 'not found (try ./scons ncval platform=x86-64)'
|
| - sys.exit(1)
|
| -
|
| - if len(args) != 1:
|
| - parser.error('specify one xml file')
|
| -
|
| - (xml_file,) = args
|
| -
|
| - return options, xml_file
|
| -
|
| -
|
| -options, xml_file = ParseOptions()
|
| -# We are doing it here to share state graph between workers spawned by
|
| -# multiprocess. Passing it every time is slow.
|
| -dfa = dfa_parser.ParseXml(xml_file)
|
| -
|
| -validator_inst = validator.Validator(validator_dll=options.validator_dll)
|
| -
|
| -
|
| -def main():
|
| - assert dfa.initial_state.is_accepting
|
| - assert not dfa.initial_state.any_byte
|
| -
|
| - print len(dfa.states), 'states'
|
| -
|
| - num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)
|
| -
|
| - # We can't just write 'num_suffixes[dfa.initial_state]' because
|
| - # initial state is accepting.
|
| - total_instructions = sum(
|
| - num_suffixes[t.to_state]
|
| - for t in dfa.initial_state.forward_transitions.values())
|
| - print total_instructions, 'regular instructions total'
|
| -
|
| - tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)
|
| - print len(tasks), 'tasks'
|
| -
|
| - pool = multiprocessing.Pool()
|
| -
|
| - results = pool.imap(Worker, tasks)
|
| -
|
| - total = 0
|
| - num_valid = 0
|
| - errors = []
|
| - for prefix, count, valid_count, more_errors in results:
|
| - print ', '.join(map(hex, prefix))
|
| - total += count
|
| - num_valid += valid_count
|
| - errors += more_errors
|
| -
|
| - print total, 'instructions were processed'
|
| - print num_valid, 'valid instructions'
|
| -
|
| - print len(errors), 'errors'
|
| -
|
| - errors.sort()
|
| - with open(options.errors, 'w') as errors_file:
|
| - errors_file.write(
|
| - 'Instructions accepted by new validator but rejected by old one:\n')
|
| - for disassembly, bytes in errors:
|
| - hex_bytes = ' '.join('%02x' % byte for byte in bytes).replace('0x', '')
|
| - errors_file.write('%-50s %s\n' % (disassembly, hex_bytes))
|
| -
|
| -
|
| -if __name__ == '__main__':
|
| - main()
|
|
|