| Index: src/trusted/validator_ragel/unreviewed/run_ncval_tests.py
|
| diff --git a/src/trusted/validator_ragel/unreviewed/run_ncval_tests.py b/src/trusted/validator_ragel/unreviewed/run_ncval_tests.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..1b28bf0e8ddcb75267d686a86f06952fb8e9bf34
|
| --- /dev/null
|
| +++ b/src/trusted/validator_ragel/unreviewed/run_ncval_tests.py
|
| @@ -0,0 +1,456 @@
|
| +#!/usr/bin/python
|
| +# Copyright (c) 2012 The Native Client Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +"""Runs in-tree NaCl x86 validator tests against the DFA-based validator.
|
| +
|
| +Takes *.hex files as input bytes. Each test output is a union of all errors
|
| +occurred when running the input bytes through the DFA-based validator. The
|
| +latter can only detect one error per bundle without making mistakes about
|
| +offending instruction. After each run the invalid instruction is replaced with
|
| +a sequence of NOPs of the same length until the code passes the validator.
|
| +
|
| +The output from each test is then compared to golden/ files that parse_hex.py
|
| +produced.
|
| +"""
|
| +
|
| +import optparse
|
| +import os
|
| +import re
|
| +import string
|
| +import subprocess
|
| +import sys
|
| +
|
| +
|
| +def WriteFile(filename, data):
|
| + fh = open(filename, "w")
|
| + try:
|
| + fh.write(data)
|
| + finally:
|
| + fh.close()
|
| +
|
| +
|
| +def ReadFile(filename):
|
| + try:
|
| + file = open(filename, 'r')
|
| + except IOError, e:
|
| + print >> sys.stderr, ('Error reading file %s: %s' %
|
| + (filename, e.strerror))
|
| + return None
|
| + contents = file.read()
|
| + file.close()
|
| + return contents
|
| +
|
| +
|
| +def PrintError(msg):
|
| + print >> sys.stderr, 'error: %s' % msg
|
| +
|
| +
|
| +class InstByteSequence:
|
| + """Parses a sequence of instructions, generates code pieces out of them.
|
| +
|
| + Each instruction comes as a sequence of bytes in the input. It is required
|
| + that the input source has the information of instruction boundaries in the
|
| + byte stream.
|
| +
|
| + """
|
| +
|
| + def __init__(self):
|
| + self.inst_bytes = []
|
| + self.offsets = {}
|
| +
|
| + def Parse(self, hexfile):
|
| + """Read instruction bytes.
|
| +
|
| + Args:
|
| + hexfile: Name of file with instruction descriptions. Each line is a
|
| + a sequence of hex-encoded bytes separated by spaces or a comment.
|
| + """
|
| + off = 0
|
| + inst_begin = 0
|
| + for line in open(hexfile, 'r').readlines():
|
| + inst_begin = off
|
| + if line.startswith('#'):
|
| + continue
|
| + for word in line.rstrip().split(' '):
|
| + if re.match(r'^\s*$', word):
|
| + continue
|
| + assert(re.match(r'[0-9a-zA-Z][0-9a-zA-Z]', word))
|
| + self.inst_bytes.append(word)
|
| + off += 1
|
| + self.offsets[inst_begin] = off
|
| +
|
| + def HasOffset(self, offset):
|
| + """Tells if the given offset contains the first byte of some instruction."""
|
| + return offset in self.offsets
|
| +
|
| + def InstInBundle(self, inst_offset, bundle_start):
|
| + assert((bundle_start + inst_offset) in self.offsets)
|
| + if bundle_start + 32 >= self.offsets[bundle_start + inst_offset]:
|
| + return True
|
| + return False
|
| +
|
| + def OffsetBelongsToInst(self, offset, inst_start):
|
| + """Detects whether the byte at given offset is a part of an instruction.
|
| +
|
| + Args:
|
| + offset: An integer offset, the address of the given byte.
|
| + inst_start: An integer offset of the beginning of the instruction.
|
| + """
|
| + assert(inst_start in self.offsets)
|
| + if offset == inst_start:
|
| + return True
|
| + for i in xrange(inst_start, len(self.inst_bytes)):
|
| + if self.HasOffset(i):
|
| + return False
|
| + if i == offset:
|
| + return True
|
| + return False
|
| +
|
| + def StuboutInst(self, offset):
|
| + """Fill the instruction at offset with NOP bytes."""
|
| + assert(offset in self.offsets)
|
| + for off in xrange(offset, self.offsets[offset]):
|
| + self.inst_bytes[off] = '90'
|
| +
|
| + def GenAsmBundle(self, start_offset):
|
| + """Generates 32 bytes of the original instructions suitable for assembler.
|
| +
|
| + May start from arbitrary offsets, which is useful when we have replaced a
|
| + bundle-crossing instruction with NOPs. Append enough NOPs to form 32 bytes
|
| + if there are not enough instructions.
|
| +
|
| + Args:
|
| + start_offset: the offset of the first byte to output
|
| + Returns:
|
| + A pair of (asm, has_next), where:
|
| + asm: text representing code for the bundle suitable as assembler input
|
| + has_next: boolean value indicating presence of instruction bytes after
|
| + the bundle
|
| + """
|
| + off = start_offset
|
| + asm = '.text\n'
|
| + bytes_written = 0
|
| +
|
| + # Allow to start from offset that does not start an instruction.
|
| + sep = '.byte 0x'
|
| + while off < len(self.inst_bytes):
|
| + if off in self.offsets:
|
| + break
|
| + asm += sep + self.inst_bytes[off]
|
| + sep = ', 0x'
|
| + bytes_written += 1
|
| + off += 1
|
| + if bytes_written > 0:
|
| + asm += '\n'
|
| +
|
| + # Write the bytes from our source.
|
| + while bytes_written != 32 and off != len(self.inst_bytes):
|
| + sep = '.byte 0x'
|
| + inst_fully_written = True
|
| + for i in xrange(off, self.offsets[off]):
|
| + asm += sep + self.inst_bytes[i]
|
| + bytes_written += 1
|
| + sep = ', 0x'
|
| + if bytes_written == 32:
|
| + inst_fully_written = False
|
| + break
|
| + asm += '\n'
|
| + if inst_fully_written:
|
| + off = self.offsets[off]
|
| +
|
| + has_next = True
|
| + if off == len(self.inst_bytes):
|
| + has_next = False
|
| +
|
| + # Write NOPs if we did not get generate enough bytes yet.
|
| + for i in xrange((32 - (bytes_written % 32)) % 32):
|
| + asm += 'nop\n'
|
| + assert(asm)
|
| + return (asm, has_next)
|
| +
|
| + def GenAsm(self):
|
| + """Generates text for all instructions suitable for assembler."""
|
| + asm = '.text\n'
|
| + off = 0
|
| + while True:
|
| + sep = '.byte 0x'
|
| + for i in xrange(off, self.offsets[off]):
|
| + asm += sep + self.inst_bytes[i]
|
| + sep = ', 0x'
|
| + off = self.offsets[off]
|
| + asm += '\n'
|
| + if off == len(self.inst_bytes):
|
| + break
|
| + return asm
|
| +
|
| +
|
| +class TestRunner:
|
| + """Knows about naming tests, files, placement of golden files, etc."""
|
| +
|
| + def __init__(self, tmpdir, gas, decoder, validator):
|
| + self.tmp = tmpdir
|
| + self.gas = gas
|
| + self.decoder = decoder
|
| + self.validator = validator
|
| +
|
| + def CheckDecoder(self, asm, hexfile):
|
| + """Test if we are decoding correctly.
|
| +
|
| + Generate binary code from given text, disassembly it with the DFA-based
|
| + decoder, check correctness.
|
| +
|
| + Args:
|
| + asm: the code to feed into assembler
|
| + hexfile: the original file name, where asm was extracted from, useful
|
| + for grouping all artifacts from each test under the same name
|
| + prefix.
|
| + Returns:
|
| + True iff the test passes.
|
| + """
|
| + basename = os.path.basename(hexfile[:-4])
|
| + asmfile = os.path.join(self.tmp, basename + '.all.s')
|
| + objfile = os.path.join(self.tmp, basename + '.o')
|
| + WriteFile(asmfile, asm)
|
| + gas_cmd = [self.gas, asmfile, '-o', objfile]
|
| + if subprocess.call(gas_cmd) != 0:
|
| + PrintError('assembler failed to execute command: %s' % gas_cmd)
|
| + return False
|
| + decoder_process = subprocess.Popen([self.decoder, objfile],
|
| + stdout=subprocess.PIPE)
|
| + (decode_out, decode_err) = decoder_process.communicate()
|
| + WriteFile(os.path.join(self.tmp, basename + '.all.decode.out'), decode_out)
|
| + # TODO(pasko): Compare output with objdump or a golden file.
|
| + return True
|
| +
|
| + def CheckAsm(self, asm, hexfile, run_id):
|
| + """Extract the first error offset from the validator on given code.
|
| +
|
| + Args:
|
| + asm: The code to feed into assembler and then the tested validator.
|
| + hexfile: Original input file name, where the code was extracted from.
|
| + run_id: An integer identifier of the certain testing run, must be
|
| + distinct from one invocation to another.
|
| +
|
| + Returns:
|
| + A pair of (non_fatal, error_offset), where:
|
| + non_fatal: True iff testing steps did not reveal any fatal errors.
|
| + error_offset: The offset of the first instruction that the validator
|
| + rejected.
|
| + """
|
| + asmfile = os.path.basename(hexfile[:-4]) + ('_part%03d.s' % run_id)
|
| + asmfile = os.path.join(self.tmp, asmfile)
|
| + WriteFile(asmfile, asm)
|
| + basename = asmfile[:-2]
|
| + objfile = basename + '.o'
|
| + if subprocess.call([self.gas, asmfile, '-o', objfile]) != 0:
|
| + return (False, None)
|
| + validator_process = subprocess.Popen([self.validator, objfile],
|
| + stdout=subprocess.PIPE)
|
| + (val_out, val_err) = validator_process.communicate()
|
| + offsets = []
|
| + for line in string.split(val_out, '\n'):
|
| + re_match = re.match(r'offset ([^:]+):.+', line)
|
| + if not re_match:
|
| + continue
|
| + offsets.append(int(re_match.group(1), 16))
|
| + assert(len(offsets) < 2)
|
| + if len(offsets) == 0:
|
| + return (True, None)
|
| + return (True, offsets[0])
|
| +
|
| + def CompareOffsets(self, off_info, hexfile):
|
| + """Check for correctness the knowledge from analysing a single test.
|
| +
|
| + Args:
|
| + off_info: A dict mapping an integer offset to a list of string errors
|
| + encountered for this offset. The order of errors is important.
|
| + hexfile: Original input file name, where the code was extracted from.
|
| + Returns:
|
| + True iff the comparison with the golden file succeeds.
|
| + """
|
| + output = ''
|
| + for off, msg_list in sorted(off_info.iteritems()):
|
| + for msg in msg_list:
|
| + output += 'offset 0x%x: %s\n' % (off, msg)
|
| + basename = os.path.basename(hexfile[:-4])
|
| + output_file = os.path.join(self.tmp , basename + '.val.out')
|
| + WriteFile(output_file, output)
|
| + golden_file = os.path.join('golden', basename + '.val.ref')
|
| + golden = ReadFile(golden_file)
|
| + if output == golden:
|
| + return True
|
| + PrintError('files differ: %s %s' % (golden_file, output_file))
|
| + return False
|
| +
|
| + def RunTest(self, test):
|
| + """Runs the test by name. Checks the decoder and the validator.
|
| +
|
| + Each test contains a sequence of instructions described as individual hex
|
| + bytes. Checks the decoder by feeding it with the whole code sequence of the
|
| + test.
|
| +
|
| + Checks the validator by separating the input code into 32-byte chunks,
|
| + asking the validator to try validate every piece, compare the answers
|
| + against the golden output.
|
| +
|
| + Args:
|
| + test: the name of the test, used only to construct the names of the .hex
|
| + and the golden file.
|
| + Returns:
|
| + True iff the test passes.
|
| + """
|
| + hexfile = 'testdata/64/%s.hex' % test
|
| + if not os.path.exists(hexfile):
|
| + PrintError('%s: no such file' % hexfile)
|
| + return False
|
| +
|
| + # Check disassembling of the whole input.
|
| + hex_instructions = InstByteSequence()
|
| + hex_instructions.Parse(hexfile)
|
| + if not self.CheckDecoder(hex_instructions.GenAsm(), hexfile):
|
| + return False
|
| +
|
| + # Cut the input instruction sequence in bundles and run a test for each
|
| + # bundle. For instructions that cross a bundle run an additional
|
| + # test that starts from this instruction.
|
| + start_pos = 0
|
| + runs = 0
|
| + top_errors = {} # Mapping of offset to a list of error strings.
|
| + has_next = True
|
| + while has_next:
|
| + (asm, has_next) = hex_instructions.GenAsmBundle(start_pos)
|
| + # Collect validation reject offsets, stub them out, repeat until no error.
|
| + while True:
|
| + (status, err_in_bundle) = self.CheckAsm(asm, hexfile, runs)
|
| + runs += 1
|
| + if not status:
|
| + return False
|
| + if err_in_bundle == None:
|
| + break
|
| + err_offset = start_pos + err_in_bundle
|
| + if not hex_instructions.HasOffset(err_offset):
|
| + PrintError('validator returned error on offset that is not a ' +
|
| + 'start of an instruction: 0x%x' % err_offset)
|
| + return False
|
| + if hex_instructions.InstInBundle(err_in_bundle, start_pos):
|
| + top_errors[err_offset] = ['validation error']
|
| + hex_instructions.StuboutInst(err_offset)
|
| + (asm, _) = hex_instructions.GenAsmBundle(start_pos)
|
| + else:
|
| + # If the instruction crosses the bundle boundary, we check if it gets
|
| + # validated as placed at address 0mod32, then go processing the next
|
| + # bundle. Stubout the instruction if necessary.
|
| + top_errors[err_offset] = ['crosses boundary']
|
| + (asm, _) = hex_instructions.GenAsmBundle(err_offset)
|
| + (status, cross_err_off) = self.CheckAsm(asm, hexfile, runs)
|
| + runs += 1
|
| + if not status:
|
| + return False
|
| + if cross_err_off != None:
|
| + if hex_instructions.OffsetBelongsToInst(err_offset + cross_err_off,
|
| + err_offset):
|
| + top_errors[err_offset].append('validation error')
|
| + hex_instructions.StuboutInst(err_offset)
|
| + break
|
| + start_pos += 32
|
| +
|
| + # Compare the collected offsets with the golden file.
|
| + if not self.CompareOffsets(top_errors, hexfile):
|
| + return False
|
| + return True
|
| +
|
| +
|
| +def Main():
|
| + parser = optparse.OptionParser()
|
| + parser.add_option(
|
| + '-t', '--tests', dest='tests',
|
| +# new validator allows unaligned calls:
|
| +# default='call_not_aligned',
|
| +# default='call_not_aligned_16',
|
| +# reports error on instruction that follows the xchg esp, ebp, replacing it does
|
| +# not help causing an infinite loop
|
| +# default='stack_regs',
|
| +# default='mov-lea-rbp-bad-1',
|
| +# default='mov-lea-rbp-bad-2',
|
| +# default='mov-lea-rbp-bad-3',
|
| +# default='mov-lea-rbp-bad-4',
|
| +# default='mv_ebp_alone',
|
| +# the @ expansion is not yet parsed:
|
| +# default='call0',
|
| +# default='call1',
|
| +# default='call_long',
|
| +# default='call_short',
|
| +# default='jmp0',
|
| +# default='jump_not_atomic',
|
| +# default='jump_not_atomic_1',
|
| +# default='jump_overflow',
|
| +# default='jump_underflow',
|
| +# default='mv_ebp_add_crossing',
|
| +# default='return',
|
| +# default='segment_aligned',
|
| +# default='segment_not_aligned',
|
| +# default='update-rsp',
|
| +# needs a tiny fix in old validator input file:
|
| +# default='legacy',
|
| +# http://code.google.com/p/nativeclient/issues/detail?id=2529
|
| +# default='maskmov_test',
|
| +# http://code.google.com/p/nativeclient/issues/detail?id=2603
|
| +# default='bsf-mask',
|
| +# default='bsr-mask',
|
| +# http://code.google.com/p/nativeclient/issues/detail?id=2606
|
| +# default='extensions',
|
| +# http://code.google.com/p/nativeclient/issues/detail?id=2607
|
| +# default='indirect_jmp_masked',
|
| +# default='jump_atomic',
|
| +# super-instruction crosses boundary, small instruction does not:
|
| +# default='fpu',
|
| +# have .hex, but not .rval:
|
| +# default='data66prefix,rdmsr,stubseq,test_alias,test_insts,wrmsr',
|
| +# need more investigation:
|
| +# default='jump_outside,mmx,movs_test,prefix-2,prefix-single,strings,sse',
|
| +# these tests pass:
|
| + default='3DNow,add_cs_gs_prefix,add_mult_prefix,addrex,AhNotSubRsp,bt,call_aligned,call-ex,cmpxchg,cpuid,dup-prefix,hlt,incno67,indirect_jmp_not_masked,invalid_base,invalid_base_store,invalid_width_index,jmp-16,lea,lea-add-rsp,lea-rsp,mov-esi-nop-use,mov_esp_add_rsp_r15,mov-lea-rbp,mov-lea-rsp,movlps-ex,mov_rbp_2_rsp,movsbw,mv_ebp_add_rbp_r15,nops,pop-rbp,prefix-3,push-memoff,rbp67,read_const_ptr,rep_tests,rex_invalid,rex_not_last,rip-relative,segment_assign,stosd,stosd67,stosd-bad,stosdno67,sub-add-rsp,sub-rsp,ud2,valid_and_store,valid_base_only,valid_lea_store,x87,add_rsp_r15,addrex2,ambig-segment,bad66,fs_use,inc67,mov-lea-rbp-bad-5,nacl_illegal,rip67,segment_store,change-subregs,ambig-segment',
|
| + help='a comma-separated list of tests')
|
| + parser.add_option(
|
| + '-a', '--gas', dest='gas',
|
| + default=None,
|
| + help='path to assembler')
|
| + parser.add_option(
|
| + '-d', '--decoder', dest='decoder',
|
| + default=None,
|
| + help='path to decoder')
|
| + parser.add_option(
|
| + '-v', '--validator', dest='validator',
|
| + default=None,
|
| + help='path to validator')
|
| + parser.add_option(
|
| + '-p', '--tmp', dest='tmp',
|
| + default=None,
|
| + help='a directory for storing temporary files')
|
| + opt, args = parser.parse_args()
|
| + if (args or
|
| + not opt.tmp or
|
| + not opt.gas or
|
| + not opt.decoder or
|
| + not opt.validator):
|
| + parser.error('invalid arguments')
|
| + no_failures = True
|
| + tester = TestRunner(opt.tmp, opt.gas, opt.decoder, opt.validator)
|
| + for tst in string.split(opt.tests, ','):
|
| + if tester.RunTest(tst):
|
| + print '%s: PASS' % tst
|
| + else:
|
| + print '%s: FAIL' % tst
|
| + no_failures = False
|
| + if no_failures:
|
| + print 'All tests PASSed'
|
| + else:
|
| + print 'Some tests FAILed'
|
| + return 1
|
| + return 0
|
| +
|
| +
|
| +if __name__ == '__main__':
|
| + sys.exit(Main())
|
|
|