src/trusted/validator_ragel/unreviewed/run_ncval_tests.py - Issue 9423045: validator_ragel: Add ncval tests from the old validator

Unified Diff: src/trusted/validator_ragel/unreviewed/run_ncval_tests.py

Issue 9423045: validator_ragel: Add ncval tests from the old validator (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client

Patch Set: some more test classification Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/trusted/validator_ragel/unreviewed/run_ncval_tests.py

diff --git a/src/trusted/validator_ragel/unreviewed/run_ncval_tests.py b/src/trusted/validator_ragel/unreviewed/run_ncval_tests.py

new file mode 100644

index 0000000000000000000000000000000000000000..1b28bf0e8ddcb75267d686a86f06952fb8e9bf34

--- /dev/null

+++ b/src/trusted/validator_ragel/unreviewed/run_ncval_tests.py

@@ -0,0 +1,456 @@

+#!/usr/bin/python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Runs in-tree NaCl x86 validator tests against the DFA-based validator.

+Takes *.hex files as input bytes. Each test output is a union of all errors

+occurred when running the input bytes through the DFA-based validator. The

+latter can only detect one error per bundle without making mistakes about

+offending instruction. After each run the invalid instruction is replaced with

+a sequence of NOPs of the same length until the code passes the validator.

+The output from each test is then compared to golden/ files that parse_hex.py

+produced.

+"""

+import optparse

+import os

+import re

+import string

+import subprocess

+import sys

+def WriteFile(filename, data):

+ fh = open(filename, "w")

+ try:

+ fh.write(data)

+ finally:

+ fh.close()

+def ReadFile(filename):

+ try:

+ file = open(filename, 'r')

+ except IOError, e:

+ print >> sys.stderr, ('Error reading file %s: %s' %

+ (filename, e.strerror))

+ return None

+ contents = file.read()

+ file.close()

+ return contents

+def PrintError(msg):

+ print >> sys.stderr, 'error: %s' % msg

+class InstByteSequence:

+ """Parses a sequence of instructions, generates code pieces out of them.

+ Each instruction comes as a sequence of bytes in the input. It is required

+ that the input source has the information of instruction boundaries in the

+ byte stream.

+ """

+ def __init__(self):

+ self.inst_bytes = []

+ self.offsets = {}

+ def Parse(self, hexfile):

+ """Read instruction bytes.

+ Args:

+ hexfile: Name of file with instruction descriptions. Each line is a

+ a sequence of hex-encoded bytes separated by spaces or a comment.

+ """

+ off = 0

+ inst_begin = 0

+ for line in open(hexfile, 'r').readlines():

+ inst_begin = off

+ if line.startswith('#'):

+ continue

+ for word in line.rstrip().split(' '):

+ if re.match(r'^\s*$', word):

+ continue

+ assert(re.match(r'[0-9a-zA-Z][0-9a-zA-Z]', word))

+ self.inst_bytes.append(word)

+ off += 1

+ self.offsets[inst_begin] = off

+ def HasOffset(self, offset):

+ """Tells if the given offset contains the first byte of some instruction."""

+ return offset in self.offsets

+ def InstInBundle(self, inst_offset, bundle_start):

+ assert((bundle_start + inst_offset) in self.offsets)

+ if bundle_start + 32 >= self.offsets[bundle_start + inst_offset]:

+ return True

+ return False

+ def OffsetBelongsToInst(self, offset, inst_start):

+ """Detects whether the byte at given offset is a part of an instruction.

+ Args:

+ offset: An integer offset, the address of the given byte.

+ inst_start: An integer offset of the beginning of the instruction.

+ """

+ assert(inst_start in self.offsets)

+ if offset == inst_start:

+ return True

+ for i in xrange(inst_start, len(self.inst_bytes)):

+ if self.HasOffset(i):

+ return False

+ if i == offset:

+ return True

+ return False

+ def StuboutInst(self, offset):

+ """Fill the instruction at offset with NOP bytes."""

+ assert(offset in self.offsets)

+ for off in xrange(offset, self.offsets[offset]):

+ self.inst_bytes[off] = '90'

+ def GenAsmBundle(self, start_offset):

+ """Generates 32 bytes of the original instructions suitable for assembler.

+ May start from arbitrary offsets, which is useful when we have replaced a

+ bundle-crossing instruction with NOPs. Append enough NOPs to form 32 bytes

+ if there are not enough instructions.

+ Args:

+ start_offset: the offset of the first byte to output

+ Returns:

+ A pair of (asm, has_next), where:

+ asm: text representing code for the bundle suitable as assembler input

+ has_next: boolean value indicating presence of instruction bytes after

+ the bundle

+ """

+ off = start_offset

+ asm = '.text\n'

+ bytes_written = 0

+ # Allow to start from offset that does not start an instruction.

+ sep = '.byte 0x'

+ while off < len(self.inst_bytes):

+ if off in self.offsets:

+ break

+ asm += sep + self.inst_bytes[off]

+ sep = ', 0x'

+ bytes_written += 1

+ off += 1

+ if bytes_written > 0:

+ asm += '\n'

+ # Write the bytes from our source.

+ while bytes_written != 32 and off != len(self.inst_bytes):

+ sep = '.byte 0x'

+ inst_fully_written = True

+ for i in xrange(off, self.offsets[off]):

+ asm += sep + self.inst_bytes[i]

+ bytes_written += 1

+ sep = ', 0x'

+ if bytes_written == 32:

+ inst_fully_written = False

+ break

+ asm += '\n'

+ if inst_fully_written:

+ off = self.offsets[off]

+ has_next = True

+ if off == len(self.inst_bytes):

+ has_next = False

+ # Write NOPs if we did not get generate enough bytes yet.

+ for i in xrange((32 - (bytes_written % 32)) % 32):

+ asm += 'nop\n'

+ assert(asm)

+ return (asm, has_next)

+ def GenAsm(self):

+ """Generates text for all instructions suitable for assembler."""

+ asm = '.text\n'

+ off = 0

+ while True:

+ sep = '.byte 0x'

+ for i in xrange(off, self.offsets[off]):

+ asm += sep + self.inst_bytes[i]

+ sep = ', 0x'

+ off = self.offsets[off]

+ asm += '\n'

+ if off == len(self.inst_bytes):

+ break

+ return asm

+class TestRunner:

+ """Knows about naming tests, files, placement of golden files, etc."""

+ def __init__(self, tmpdir, gas, decoder, validator):

+ self.tmp = tmpdir

+ self.gas = gas

+ self.decoder = decoder

+ self.validator = validator

+ def CheckDecoder(self, asm, hexfile):

+ """Test if we are decoding correctly.

+ Generate binary code from given text, disassembly it with the DFA-based

+ decoder, check correctness.

+ Args:

+ asm: the code to feed into assembler

+ hexfile: the original file name, where asm was extracted from, useful

+ for grouping all artifacts from each test under the same name

+ prefix.

+ Returns:

+ True iff the test passes.

+ """

+ basename = os.path.basename(hexfile[:-4])

+ asmfile = os.path.join(self.tmp, basename + '.all.s')

+ objfile = os.path.join(self.tmp, basename + '.o')

+ WriteFile(asmfile, asm)

+ gas_cmd = [self.gas, asmfile, '-o', objfile]

+ if subprocess.call(gas_cmd) != 0:

+ PrintError('assembler failed to execute command: %s' % gas_cmd)

+ return False

+ decoder_process = subprocess.Popen([self.decoder, objfile],

+ stdout=subprocess.PIPE)

+ (decode_out, decode_err) = decoder_process.communicate()

+ WriteFile(os.path.join(self.tmp, basename + '.all.decode.out'), decode_out)

+ # TODO(pasko): Compare output with objdump or a golden file.

+ return True

+ def CheckAsm(self, asm, hexfile, run_id):

+ """Extract the first error offset from the validator on given code.

+ Args:

+ asm: The code to feed into assembler and then the tested validator.

+ hexfile: Original input file name, where the code was extracted from.

+ run_id: An integer identifier of the certain testing run, must be

+ distinct from one invocation to another.

+ Returns:

+ A pair of (non_fatal, error_offset), where:

+ non_fatal: True iff testing steps did not reveal any fatal errors.

+ error_offset: The offset of the first instruction that the validator

+ rejected.

+ """

+ asmfile = os.path.basename(hexfile[:-4]) + ('_part%03d.s' % run_id)

+ asmfile = os.path.join(self.tmp, asmfile)

+ WriteFile(asmfile, asm)

+ basename = asmfile[:-2]

+ objfile = basename + '.o'

+ if subprocess.call([self.gas, asmfile, '-o', objfile]) != 0:

+ return (False, None)

+ validator_process = subprocess.Popen([self.validator, objfile],

+ stdout=subprocess.PIPE)

+ (val_out, val_err) = validator_process.communicate()

+ offsets = []

+ for line in string.split(val_out, '\n'):

+ re_match = re.match(r'offset ([^:]+):.+', line)

+ if not re_match:

+ continue

+ offsets.append(int(re_match.group(1), 16))

+ assert(len(offsets) < 2)

+ if len(offsets) == 0:

+ return (True, None)

+ return (True, offsets[0])

+ def CompareOffsets(self, off_info, hexfile):

+ """Check for correctness the knowledge from analysing a single test.

+ Args:

+ off_info: A dict mapping an integer offset to a list of string errors

+ encountered for this offset. The order of errors is important.

+ hexfile: Original input file name, where the code was extracted from.

+ Returns:

+ True iff the comparison with the golden file succeeds.

+ """

+ output = ''

+ for off, msg_list in sorted(off_info.iteritems()):

+ for msg in msg_list:

+ output += 'offset 0x%x: %s\n' % (off, msg)

+ basename = os.path.basename(hexfile[:-4])

+ output_file = os.path.join(self.tmp , basename + '.val.out')

+ WriteFile(output_file, output)

+ golden_file = os.path.join('golden', basename + '.val.ref')

+ golden = ReadFile(golden_file)

+ if output == golden:

+ return True

+ PrintError('files differ: %s %s' % (golden_file, output_file))

+ return False

+ def RunTest(self, test):

+ """Runs the test by name. Checks the decoder and the validator.

+ Each test contains a sequence of instructions described as individual hex

+ bytes. Checks the decoder by feeding it with the whole code sequence of the

+ test.

+ Checks the validator by separating the input code into 32-byte chunks,

+ asking the validator to try validate every piece, compare the answers

+ against the golden output.

+ Args:

+ test: the name of the test, used only to construct the names of the .hex

+ and the golden file.

+ Returns:

+ True iff the test passes.

+ """

+ hexfile = 'testdata/64/%s.hex' % test

+ if not os.path.exists(hexfile):

+ PrintError('%s: no such file' % hexfile)

+ return False

+ # Check disassembling of the whole input.

+ hex_instructions = InstByteSequence()

+ hex_instructions.Parse(hexfile)

+ if not self.CheckDecoder(hex_instructions.GenAsm(), hexfile):

+ return False

+ # Cut the input instruction sequence in bundles and run a test for each

+ # bundle. For instructions that cross a bundle run an additional

+ # test that starts from this instruction.

+ start_pos = 0

+ runs = 0

+ top_errors = {} # Mapping of offset to a list of error strings.

+ has_next = True

+ while has_next:

+ (asm, has_next) = hex_instructions.GenAsmBundle(start_pos)

+ # Collect validation reject offsets, stub them out, repeat until no error.

+ while True:

+ (status, err_in_bundle) = self.CheckAsm(asm, hexfile, runs)

+ runs += 1

+ if not status:

+ return False

+ if err_in_bundle == None:

+ break

+ err_offset = start_pos + err_in_bundle

+ if not hex_instructions.HasOffset(err_offset):

+ PrintError('validator returned error on offset that is not a ' +

+ 'start of an instruction: 0x%x' % err_offset)

+ return False

+ if hex_instructions.InstInBundle(err_in_bundle, start_pos):

+ top_errors[err_offset] = ['validation error']

+ hex_instructions.StuboutInst(err_offset)

+ (asm, _) = hex_instructions.GenAsmBundle(start_pos)

+ else:

+ # If the instruction crosses the bundle boundary, we check if it gets

+ # validated as placed at address 0mod32, then go processing the next

+ # bundle. Stubout the instruction if necessary.

+ top_errors[err_offset] = ['crosses boundary']

+ (asm, _) = hex_instructions.GenAsmBundle(err_offset)

+ (status, cross_err_off) = self.CheckAsm(asm, hexfile, runs)

+ runs += 1

+ if not status:

+ return False

+ if cross_err_off != None:

+ if hex_instructions.OffsetBelongsToInst(err_offset + cross_err_off,

+ err_offset):

+ top_errors[err_offset].append('validation error')

+ hex_instructions.StuboutInst(err_offset)

+ break

+ start_pos += 32

+ # Compare the collected offsets with the golden file.

+ if not self.CompareOffsets(top_errors, hexfile):

+ return False

+ return True

+def Main():

+ parser = optparse.OptionParser()

+ parser.add_option(

+ '-t', '--tests', dest='tests',

+# new validator allows unaligned calls:

+# default='call_not_aligned',

+# default='call_not_aligned_16',

+# reports error on instruction that follows the xchg esp, ebp, replacing it does

+# not help causing an infinite loop

+# default='stack_regs',

+# default='mov-lea-rbp-bad-1',

+# default='mov-lea-rbp-bad-2',

+# default='mov-lea-rbp-bad-3',

+# default='mov-lea-rbp-bad-4',

+# default='mv_ebp_alone',

+# the @ expansion is not yet parsed:

+# default='call0',

+# default='call1',

+# default='call_long',

+# default='call_short',

+# default='jmp0',

+# default='jump_not_atomic',

+# default='jump_not_atomic_1',

+# default='jump_overflow',

+# default='jump_underflow',

+# default='mv_ebp_add_crossing',

+# default='return',

+# default='segment_aligned',

+# default='segment_not_aligned',

+# default='update-rsp',

+# needs a tiny fix in old validator input file:

+# default='legacy',

+# http://code.google.com/p/nativeclient/issues/detail?id=2529

+# default='maskmov_test',

+# http://code.google.com/p/nativeclient/issues/detail?id=2603

+# default='bsf-mask',

+# default='bsr-mask',

+# http://code.google.com/p/nativeclient/issues/detail?id=2606

+# default='extensions',

+# http://code.google.com/p/nativeclient/issues/detail?id=2607

+# default='indirect_jmp_masked',

+# default='jump_atomic',

+# super-instruction crosses boundary, small instruction does not:

+# default='fpu',

+# have .hex, but not .rval:

+# default='data66prefix,rdmsr,stubseq,test_alias,test_insts,wrmsr',

+# need more investigation:

+# default='jump_outside,mmx,movs_test,prefix-2,prefix-single,strings,sse',

+# these tests pass:

+ default='3DNow,add_cs_gs_prefix,add_mult_prefix,addrex,AhNotSubRsp,bt,call_aligned,call-ex,cmpxchg,cpuid,dup-prefix,hlt,incno67,indirect_jmp_not_masked,invalid_base,invalid_base_store,invalid_width_index,jmp-16,lea,lea-add-rsp,lea-rsp,mov-esi-nop-use,mov_esp_add_rsp_r15,mov-lea-rbp,mov-lea-rsp,movlps-ex,mov_rbp_2_rsp,movsbw,mv_ebp_add_rbp_r15,nops,pop-rbp,prefix-3,push-memoff,rbp67,read_const_ptr,rep_tests,rex_invalid,rex_not_last,rip-relative,segment_assign,stosd,stosd67,stosd-bad,stosdno67,sub-add-rsp,sub-rsp,ud2,valid_and_store,valid_base_only,valid_lea_store,x87,add_rsp_r15,addrex2,ambig-segment,bad66,fs_use,inc67,mov-lea-rbp-bad-5,nacl_illegal,rip67,segment_store,change-subregs,ambig-segment',

+ help='a comma-separated list of tests')

+ parser.add_option(

+ '-a', '--gas', dest='gas',

+ default=None,

+ help='path to assembler')

+ parser.add_option(

+ '-d', '--decoder', dest='decoder',

+ default=None,

+ help='path to decoder')

+ parser.add_option(

+ '-v', '--validator', dest='validator',

+ default=None,

+ help='path to validator')

+ parser.add_option(

+ '-p', '--tmp', dest='tmp',

+ default=None,

+ help='a directory for storing temporary files')

+ opt, args = parser.parse_args()

+ if (args or

+ not opt.tmp or

+ not opt.gas or

+ not opt.decoder or

+ not opt.validator):

+ parser.error('invalid arguments')

+ no_failures = True

+ tester = TestRunner(opt.tmp, opt.gas, opt.decoder, opt.validator)

+ for tst in string.split(opt.tests, ','):

+ if tester.RunTest(tst):

+ print '%s: PASS' % tst

+ else:

+ print '%s: FAIL' % tst

+ no_failures = False

+ if no_failures:

+ print 'All tests PASSed'

+ else:

+ print 'Some tests FAILed'

+ return 1

+ return 0

+if __name__ == '__main__':

+ sys.exit(Main())

« no previous file with comments | « src/trusted/validator_ragel/unreviewed/parse_hex.py ('k') | no next file » | no next file with comments »