src/trusted/validator_ragel/unreviewed/run_ncval_tests.py - Issue 9423045: validator_ragel: Add ncval tests from the old validator

Side by Side Diff: src/trusted/validator_ragel/unreviewed/run_ncval_tests.py

Issue 9423045: validator_ragel: Add ncval tests from the old validator (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client

Patch Set: some more test classification Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 #!/usr/bin/python

	2 # Copyright (c) 2012 The Native Client Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Runs in-tree NaCl x86 validator tests against the DFA-based validator.

	7

	8 Takes *.hex files as input bytes. Each test output is a union of all errors

	9 occurred when running the input bytes through the DFA-based validator. The

	10 latter can only detect one error per bundle without making mistakes about

	11 offending instruction. After each run the invalid instruction is replaced with

	12 a sequence of NOPs of the same length until the code passes the validator.

	13

	14 The output from each test is then compared to golden/ files that parse_hex.py

	15 produced.

	16 """

	17

	18 import optparse

	19 import os

	20 import re

	21 import string

	22 import subprocess

	23 import sys

	24

	25

	26 def WriteFile(filename, data):

	27 fh = open(filename, "w")

	28 try:

	29 fh.write(data)

	30 finally:

	31 fh.close()

	32

	33

	34 def ReadFile(filename):

	35 try:

	36 file = open(filename, 'r')

	37 except IOError, e:

	38 print >> sys.stderr, ('Error reading file %s: %s' %

	39 (filename, e.strerror))

	40 return None

	41 contents = file.read()

	42 file.close()

	43 return contents

	44

	45

	46 def PrintError(msg):

	47 print >> sys.stderr, 'error: %s' % msg

	48

	49

	50 class InstByteSequence:

	51 """Parses a sequence of instructions, generates code pieces out of them.

	52

	53 Each instruction comes as a sequence of bytes in the input. It is required

	54 that the input source has the information of instruction boundaries in the

	55 byte stream.

	56

	57 """

	58

	59 def __init__(self):

	60 self.inst_bytes = []

	61 self.offsets = {}

	62

	63 def Parse(self, hexfile):

	64 """Read instruction bytes.

	65

	66 Args:

	67 hexfile: Name of file with instruction descriptions. Each line is a

	68 a sequence of hex-encoded bytes separated by spaces or a comment.

	69 """

	70 off = 0

	71 inst_begin = 0

	72 for line in open(hexfile, 'r').readlines():

	73 inst_begin = off

	74 if line.startswith('#'):

	75 continue

	76 for word in line.rstrip().split(' '):

	77 if re.match(r'^\s*$', word):

	78 continue

	79 assert(re.match(r'[0-9a-zA-Z][0-9a-zA-Z]', word))

	80 self.inst_bytes.append(word)

	81 off += 1

	82 self.offsets[inst_begin] = off

	83

	84 def HasOffset(self, offset):

	85 """Tells if the given offset contains the first byte of some instruction."""

	86 return offset in self.offsets

	87

	88 def InstInBundle(self, inst_offset, bundle_start):

	89 assert((bundle_start + inst_offset) in self.offsets)

	90 if bundle_start + 32 >= self.offsets[bundle_start + inst_offset]:

	91 return True

	92 return False

	93

	94 def OffsetBelongsToInst(self, offset, inst_start):

	95 """Detects whether the byte at given offset is a part of an instruction.

	96

	97 Args:

	98 offset: An integer offset, the address of the given byte.

	99 inst_start: An integer offset of the beginning of the instruction.

	100 """

	101 assert(inst_start in self.offsets)

	102 if offset == inst_start:

	103 return True

	104 for i in xrange(inst_start, len(self.inst_bytes)):

	105 if self.HasOffset(i):

	106 return False

	107 if i == offset:

	108 return True

	109 return False

	110

	111 def StuboutInst(self, offset):

	112 """Fill the instruction at offset with NOP bytes."""

	113 assert(offset in self.offsets)

	114 for off in xrange(offset, self.offsets[offset]):

	115 self.inst_bytes[off] = '90'

	116

	117 def GenAsmBundle(self, start_offset):

	118 """Generates 32 bytes of the original instructions suitable for assembler.

	119

	120 May start from arbitrary offsets, which is useful when we have replaced a

	121 bundle-crossing instruction with NOPs. Append enough NOPs to form 32 bytes

	122 if there are not enough instructions.

	123

	124 Args:

	125 start_offset: the offset of the first byte to output

	126 Returns:

	127 A pair of (asm, has_next), where:

	128 asm: text representing code for the bundle suitable as assembler input

	129 has_next: boolean value indicating presence of instruction bytes after

	130 the bundle

	131 """

	132 off = start_offset

	133 asm = '.text\n'

	134 bytes_written = 0

	135

	136 # Allow to start from offset that does not start an instruction.

	137 sep = '.byte 0x'

	138 while off < len(self.inst_bytes):

	139 if off in self.offsets:

	140 break

	141 asm += sep + self.inst_bytes[off]

	142 sep = ', 0x'

	143 bytes_written += 1

	144 off += 1

	145 if bytes_written > 0:

	146 asm += '\n'

	147

	148 # Write the bytes from our source.

	149 while bytes_written != 32 and off != len(self.inst_bytes):

	150 sep = '.byte 0x'

	151 inst_fully_written = True

	152 for i in xrange(off, self.offsets[off]):

	153 asm += sep + self.inst_bytes[i]

	154 bytes_written += 1

	155 sep = ', 0x'

	156 if bytes_written == 32:

	157 inst_fully_written = False

	158 break

	159 asm += '\n'

	160 if inst_fully_written:

	161 off = self.offsets[off]

	162

	163 has_next = True

	164 if off == len(self.inst_bytes):

	165 has_next = False

	166

	167 # Write NOPs if we did not get generate enough bytes yet.

	168 for i in xrange((32 - (bytes_written % 32)) % 32):

	169 asm += 'nop\n'

	170 assert(asm)

	171 return (asm, has_next)

	172

	173 def GenAsm(self):

	174 """Generates text for all instructions suitable for assembler."""

	175 asm = '.text\n'

	176 off = 0

	177 while True:

	178 sep = '.byte 0x'

	179 for i in xrange(off, self.offsets[off]):

	180 asm += sep + self.inst_bytes[i]

	181 sep = ', 0x'

	182 off = self.offsets[off]

	183 asm += '\n'

	184 if off == len(self.inst_bytes):

	185 break

	186 return asm

	187

	188

	189 class TestRunner:

	190 """Knows about naming tests, files, placement of golden files, etc."""

	191

	192 def __init__(self, tmpdir, gas, decoder, validator):

	193 self.tmp = tmpdir

	194 self.gas = gas

	195 self.decoder = decoder

	196 self.validator = validator

	197

	198 def CheckDecoder(self, asm, hexfile):

	199 """Test if we are decoding correctly.

	200

	201 Generate binary code from given text, disassembly it with the DFA-based

	202 decoder, check correctness.

	203

	204 Args:

	205 asm: the code to feed into assembler

	206 hexfile: the original file name, where asm was extracted from, useful

	207 for grouping all artifacts from each test under the same name

	208 prefix.

	209 Returns:

	210 True iff the test passes.

	211 """

	212 basename = os.path.basename(hexfile[:-4])

	213 asmfile = os.path.join(self.tmp, basename + '.all.s')

	214 objfile = os.path.join(self.tmp, basename + '.o')

	215 WriteFile(asmfile, asm)

	216 gas_cmd = [self.gas, asmfile, '-o', objfile]

	217 if subprocess.call(gas_cmd) != 0:

	218 PrintError('assembler failed to execute command: %s' % gas_cmd)

	219 return False

	220 decoder_process = subprocess.Popen([self.decoder, objfile],

	221 stdout=subprocess.PIPE)

	222 (decode_out, decode_err) = decoder_process.communicate()

	223 WriteFile(os.path.join(self.tmp, basename + '.all.decode.out'), decode_out)

	224 # TODO(pasko): Compare output with objdump or a golden file.

	225 return True

	226

	227 def CheckAsm(self, asm, hexfile, run_id):

	228 """Extract the first error offset from the validator on given code.

	229

	230 Args:

	231 asm: The code to feed into assembler and then the tested validator.

	232 hexfile: Original input file name, where the code was extracted from.

	233 run_id: An integer identifier of the certain testing run, must be

	234 distinct from one invocation to another.

	235

	236 Returns:

	237 A pair of (non_fatal, error_offset), where:

	238 non_fatal: True iff testing steps did not reveal any fatal errors.

	239 error_offset: The offset of the first instruction that the validator

	240 rejected.

	241 """

	242 asmfile = os.path.basename(hexfile[:-4]) + ('_part%03d.s' % run_id)

	243 asmfile = os.path.join(self.tmp, asmfile)

	244 WriteFile(asmfile, asm)

	245 basename = asmfile[:-2]

	246 objfile = basename + '.o'

	247 if subprocess.call([self.gas, asmfile, '-o', objfile]) != 0:

	248 return (False, None)

	249 validator_process = subprocess.Popen([self.validator, objfile],

	250 stdout=subprocess.PIPE)

	251 (val_out, val_err) = validator_process.communicate()

	252 offsets = []

	253 for line in string.split(val_out, '\n'):

	254 re_match = re.match(r'offset ([^:]+):.+', line)

	255 if not re_match:

	256 continue

	257 offsets.append(int(re_match.group(1), 16))

	258 assert(len(offsets) < 2)

	259 if len(offsets) == 0:

	260 return (True, None)

	261 return (True, offsets[0])

	262

	263 def CompareOffsets(self, off_info, hexfile):

	264 """Check for correctness the knowledge from analysing a single test.

	265

	266 Args:

	267 off_info: A dict mapping an integer offset to a list of string errors

	268 encountered for this offset. The order of errors is important.

	269 hexfile: Original input file name, where the code was extracted from.

	270 Returns:

	271 True iff the comparison with the golden file succeeds.

	272 """

	273 output = ''

	274 for off, msg_list in sorted(off_info.iteritems()):

	275 for msg in msg_list:

	276 output += 'offset 0x%x: %s\n' % (off, msg)

	277 basename = os.path.basename(hexfile[:-4])

	278 output_file = os.path.join(self.tmp , basename + '.val.out')

	279 WriteFile(output_file, output)

	280 golden_file = os.path.join('golden', basename + '.val.ref')

	281 golden = ReadFile(golden_file)

	282 if output == golden:

	283 return True

	284 PrintError('files differ: %s %s' % (golden_file, output_file))

	285 return False

	286

	287 def RunTest(self, test):

	288 """Runs the test by name. Checks the decoder and the validator.

	289

	290 Each test contains a sequence of instructions described as individual hex

	291 bytes. Checks the decoder by feeding it with the whole code sequence of the

	292 test.

	293

	294 Checks the validator by separating the input code into 32-byte chunks,

	295 asking the validator to try validate every piece, compare the answers

	296 against the golden output.

	297

	298 Args:

	299 test: the name of the test, used only to construct the names of the .hex

	300 and the golden file.

	301 Returns:

	302 True iff the test passes.

	303 """

	304 hexfile = 'testdata/64/%s.hex' % test

	305 if not os.path.exists(hexfile):

	306 PrintError('%s: no such file' % hexfile)

	307 return False

	308

	309 # Check disassembling of the whole input.

	310 hex_instructions = InstByteSequence()

	311 hex_instructions.Parse(hexfile)

	312 if not self.CheckDecoder(hex_instructions.GenAsm(), hexfile):

	313 return False

	314

	315 # Cut the input instruction sequence in bundles and run a test for each

	316 # bundle. For instructions that cross a bundle run an additional

	317 # test that starts from this instruction.

	318 start_pos = 0

	319 runs = 0

	320 top_errors = {} # Mapping of offset to a list of error strings.

	321 has_next = True

	322 while has_next:

	323 (asm, has_next) = hex_instructions.GenAsmBundle(start_pos)

	324 # Collect validation reject offsets, stub them out, repeat until no error.

	325 while True:

	326 (status, err_in_bundle) = self.CheckAsm(asm, hexfile, runs)

	327 runs += 1

	328 if not status:

	329 return False

	330 if err_in_bundle == None:

	331 break

	332 err_offset = start_pos + err_in_bundle

	333 if not hex_instructions.HasOffset(err_offset):

	334 PrintError('validator returned error on offset that is not a ' +

	335 'start of an instruction: 0x%x' % err_offset)

	336 return False

	337 if hex_instructions.InstInBundle(err_in_bundle, start_pos):

	338 top_errors[err_offset] = ['validation error']

	339 hex_instructions.StuboutInst(err_offset)

	340 (asm, _) = hex_instructions.GenAsmBundle(start_pos)

	341 else:

	342 # If the instruction crosses the bundle boundary, we check if it gets

	343 # validated as placed at address 0mod32, then go processing the next

	344 # bundle. Stubout the instruction if necessary.

	345 top_errors[err_offset] = ['crosses boundary']

	346 (asm, _) = hex_instructions.GenAsmBundle(err_offset)

	347 (status, cross_err_off) = self.CheckAsm(asm, hexfile, runs)

	348 runs += 1

	349 if not status:

	350 return False

	351 if cross_err_off != None:

	352 if hex_instructions.OffsetBelongsToInst(err_offset + cross_err_off,

	353 err_offset):

	354 top_errors[err_offset].append('validation error')

	355 hex_instructions.StuboutInst(err_offset)

	356 break

	357 start_pos += 32

	358

	359 # Compare the collected offsets with the golden file.

	360 if not self.CompareOffsets(top_errors, hexfile):

	361 return False

	362 return True

	363

	364

	365 def Main():

	366 parser = optparse.OptionParser()

	367 parser.add_option(

	368 '-t', '--tests', dest='tests',

	369 # new validator allows unaligned calls:

	370 # default='call_not_aligned',

	371 # default='call_not_aligned_16',

	372 # reports error on instruction that follows the xchg esp, ebp, replacing it does

	373 # not help causing an infinite loop

	374 # default='stack_regs',

	375 # default='mov-lea-rbp-bad-1',

	376 # default='mov-lea-rbp-bad-2',

	377 # default='mov-lea-rbp-bad-3',

	378 # default='mov-lea-rbp-bad-4',

	379 # default='mv_ebp_alone',

	380 # the @ expansion is not yet parsed:

	381 # default='call0',

	382 # default='call1',

	383 # default='call_long',

	384 # default='call_short',

	385 # default='jmp0',

	386 # default='jump_not_atomic',

	387 # default='jump_not_atomic_1',

	388 # default='jump_overflow',

	389 # default='jump_underflow',

	390 # default='mv_ebp_add_crossing',

	391 # default='return',

	392 # default='segment_aligned',

	393 # default='segment_not_aligned',

	394 # default='update-rsp',

	395 # needs a tiny fix in old validator input file:

	396 # default='legacy',

	397 # http://code.google.com/p/nativeclient/issues/detail?id=2529

	398 # default='maskmov_test',

	399 # http://code.google.com/p/nativeclient/issues/detail?id=2603

	400 # default='bsf-mask',

	401 # default='bsr-mask',

	402 # http://code.google.com/p/nativeclient/issues/detail?id=2606

	403 # default='extensions',

	404 # http://code.google.com/p/nativeclient/issues/detail?id=2607

	405 # default='indirect_jmp_masked',

	406 # default='jump_atomic',

	407 # super-instruction crosses boundary, small instruction does not:

	408 # default='fpu',

	409 # have .hex, but not .rval:

	410 # default='data66prefix,rdmsr,stubseq,test_alias,test_insts,wrmsr',

	411 # need more investigation:

	412 # default='jump_outside,mmx,movs_test,prefix-2,prefix-single,strings,sse',

	413 # these tests pass:

	414 default='3DNow,add_cs_gs_prefix,add_mult_prefix,addrex,AhNotSubRsp,bt,call _aligned,call-ex,cmpxchg,cpuid,dup-prefix,hlt,incno67,indirect_jmp_not_masked,in valid_base,invalid_base_store,invalid_width_index,jmp-16,lea,lea-add-rsp,lea-rsp ,mov-esi-nop-use,mov_esp_add_rsp_r15,mov-lea-rbp,mov-lea-rsp,movlps-ex,mov_rbp_2 _rsp,movsbw,mv_ebp_add_rbp_r15,nops,pop-rbp,prefix-3,push-memoff,rbp67,read_cons t_ptr,rep_tests,rex_invalid,rex_not_last,rip-relative,segment_assign,stosd,stosd 67,stosd-bad,stosdno67,sub-add-rsp,sub-rsp,ud2,valid_and_store,valid_base_only,v alid_lea_store,x87,add_rsp_r15,addrex2,ambig-segment,bad66,fs_use,inc67,mov-lea- rbp-bad-5,nacl_illegal,rip67,segment_store,change-subregs,ambig-segment',

	415 help='a comma-separated list of tests')

	416 parser.add_option(

	417 '-a', '--gas', dest='gas',

	418 default=None,

	419 help='path to assembler')

	420 parser.add_option(

	421 '-d', '--decoder', dest='decoder',

	422 default=None,

	423 help='path to decoder')

	424 parser.add_option(

	425 '-v', '--validator', dest='validator',

	426 default=None,

	427 help='path to validator')

	428 parser.add_option(

	429 '-p', '--tmp', dest='tmp',

	430 default=None,

	431 help='a directory for storing temporary files')

	432 opt, args = parser.parse_args()

	433 if (args or

	434 not opt.tmp or

	435 not opt.gas or

	436 not opt.decoder or

	437 not opt.validator):

	438 parser.error('invalid arguments')

	439 no_failures = True

	440 tester = TestRunner(opt.tmp, opt.gas, opt.decoder, opt.validator)

	441 for tst in string.split(opt.tests, ','):

	442 if tester.RunTest(tst):

	443 print '%s: PASS' % tst

	444 else:

	445 print '%s: FAIL' % tst

	446 no_failures = False

	447 if no_failures:

	448 print 'All tests PASSed'

	449 else:

	450 print 'Some tests FAILed'

	451 return 1

	452 return 0

	453

	454

	455 if __name__ == '__main__':

	456 sys.exit(Main())

OLD	NEW

« no previous file with comments | « src/trusted/validator_ragel/unreviewed/parse_hex.py ('k') | no next file » | no next file with comments »