OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Runs in-tree NaCl x86 validator tests against the DFA-based validator. |
| 7 |
| 8 Takes *.hex files as input bytes. Each test output is a union of all errors |
| 9 occurred when running the input bytes through the DFA-based validator. The |
| 10 latter can only detect one error per bundle without making mistakes about |
| 11 offending instruction. After each run the invalid instruction is replaced with |
| 12 a sequence of NOPs of the same length until the code passes the validator. |
| 13 |
| 14 The output from each test is then compared to golden/ files that parse_hex.py |
| 15 produced. |
| 16 """ |
| 17 |
| 18 import optparse |
| 19 import os |
| 20 import re |
| 21 import string |
| 22 import subprocess |
| 23 import sys |
| 24 |
| 25 |
| 26 def WriteFile(filename, data): |
| 27 fh = open(filename, "w") |
| 28 try: |
| 29 fh.write(data) |
| 30 finally: |
| 31 fh.close() |
| 32 |
| 33 |
| 34 def ReadFile(filename): |
| 35 try: |
| 36 file = open(filename, 'r') |
| 37 except IOError, e: |
| 38 print >> sys.stderr, ('Error reading file %s: %s' % |
| 39 (filename, e.strerror)) |
| 40 return None |
| 41 contents = file.read() |
| 42 file.close() |
| 43 return contents |
| 44 |
| 45 |
| 46 def PrintError(msg): |
| 47 print >> sys.stderr, 'error: %s' % msg |
| 48 |
| 49 |
| 50 class InstByteSequence: |
| 51 """Parses a sequence of instructions, generates code pieces out of them. |
| 52 |
| 53 Each instruction comes as a sequence of bytes in the input. It is required |
| 54 that the input source has the information of instruction boundaries in the |
| 55 byte stream. |
| 56 |
| 57 """ |
| 58 |
| 59 def __init__(self): |
| 60 self.inst_bytes = [] |
| 61 self.offsets = {} |
| 62 |
| 63 def Parse(self, hexfile): |
| 64 """Read instruction bytes. |
| 65 |
| 66 Args: |
| 67 hexfile: Name of file with instruction descriptions. Each line is a |
| 68 a sequence of hex-encoded bytes separated by spaces or a comment. |
| 69 """ |
| 70 off = 0 |
| 71 inst_begin = 0 |
| 72 for line in open(hexfile, 'r').readlines(): |
| 73 inst_begin = off |
| 74 if line.startswith('#'): |
| 75 continue |
| 76 for word in line.rstrip().split(' '): |
| 77 if re.match(r'^\s*$', word): |
| 78 continue |
| 79 assert(re.match(r'[0-9a-zA-Z][0-9a-zA-Z]', word)) |
| 80 self.inst_bytes.append(word) |
| 81 off += 1 |
| 82 self.offsets[inst_begin] = off |
| 83 |
| 84 def HasOffset(self, offset): |
| 85 """Tells if the given offset contains the first byte of some instruction.""" |
| 86 return offset in self.offsets |
| 87 |
| 88 def InstInBundle(self, inst_offset, bundle_start): |
| 89 assert((bundle_start + inst_offset) in self.offsets) |
| 90 if bundle_start + 32 >= self.offsets[bundle_start + inst_offset]: |
| 91 return True |
| 92 return False |
| 93 |
| 94 def OffsetBelongsToInst(self, offset, inst_start): |
| 95 """Detects whether the byte at given offset is a part of an instruction. |
| 96 |
| 97 Args: |
| 98 offset: An integer offset, the address of the given byte. |
| 99 inst_start: An integer offset of the beginning of the instruction. |
| 100 """ |
| 101 assert(inst_start in self.offsets) |
| 102 if offset == inst_start: |
| 103 return True |
| 104 for i in xrange(inst_start, len(self.inst_bytes)): |
| 105 if self.HasOffset(i): |
| 106 return False |
| 107 if i == offset: |
| 108 return True |
| 109 return False |
| 110 |
| 111 def StuboutInst(self, offset): |
| 112 """Fill the instruction at offset with NOP bytes.""" |
| 113 assert(offset in self.offsets) |
| 114 for off in xrange(offset, self.offsets[offset]): |
| 115 self.inst_bytes[off] = '90' |
| 116 |
| 117 def GenAsmBundle(self, start_offset): |
| 118 """Generates 32 bytes of the original instructions suitable for assembler. |
| 119 |
| 120 May start from arbitrary offsets, which is useful when we have replaced a |
| 121 bundle-crossing instruction with NOPs. Append enough NOPs to form 32 bytes |
| 122 if there are not enough instructions. |
| 123 |
| 124 Args: |
| 125 start_offset: the offset of the first byte to output |
| 126 Returns: |
| 127 A pair of (asm, has_next), where: |
| 128 asm: text representing code for the bundle suitable as assembler input |
| 129 has_next: boolean value indicating presence of instruction bytes after |
| 130 the bundle |
| 131 """ |
| 132 off = start_offset |
| 133 asm = '.text\n' |
| 134 bytes_written = 0 |
| 135 |
| 136 # Allow to start from offset that does not start an instruction. |
| 137 sep = '.byte 0x' |
| 138 while off < len(self.inst_bytes): |
| 139 if off in self.offsets: |
| 140 break |
| 141 asm += sep + self.inst_bytes[off] |
| 142 sep = ', 0x' |
| 143 bytes_written += 1 |
| 144 off += 1 |
| 145 if bytes_written > 0: |
| 146 asm += '\n' |
| 147 |
| 148 # Write the bytes from our source. |
| 149 while bytes_written != 32 and off != len(self.inst_bytes): |
| 150 sep = '.byte 0x' |
| 151 inst_fully_written = True |
| 152 for i in xrange(off, self.offsets[off]): |
| 153 asm += sep + self.inst_bytes[i] |
| 154 bytes_written += 1 |
| 155 sep = ', 0x' |
| 156 if bytes_written == 32: |
| 157 inst_fully_written = False |
| 158 break |
| 159 asm += '\n' |
| 160 if inst_fully_written: |
| 161 off = self.offsets[off] |
| 162 |
| 163 has_next = True |
| 164 if off == len(self.inst_bytes): |
| 165 has_next = False |
| 166 |
| 167 # Write NOPs if we did not get generate enough bytes yet. |
| 168 for i in xrange((32 - (bytes_written % 32)) % 32): |
| 169 asm += 'nop\n' |
| 170 assert(asm) |
| 171 return (asm, has_next) |
| 172 |
| 173 def GenAsm(self): |
| 174 """Generates text for all instructions suitable for assembler.""" |
| 175 asm = '.text\n' |
| 176 off = 0 |
| 177 while True: |
| 178 sep = '.byte 0x' |
| 179 for i in xrange(off, self.offsets[off]): |
| 180 asm += sep + self.inst_bytes[i] |
| 181 sep = ', 0x' |
| 182 off = self.offsets[off] |
| 183 asm += '\n' |
| 184 if off == len(self.inst_bytes): |
| 185 break |
| 186 return asm |
| 187 |
| 188 |
| 189 class TestRunner: |
| 190 """Knows about naming tests, files, placement of golden files, etc.""" |
| 191 |
| 192 def __init__(self, tmpdir, gas, decoder, validator): |
| 193 self.tmp = tmpdir |
| 194 self.gas = gas |
| 195 self.decoder = decoder |
| 196 self.validator = validator |
| 197 |
| 198 def CheckDecoder(self, asm, hexfile): |
| 199 """Test if we are decoding correctly. |
| 200 |
| 201 Generate binary code from given text, disassembly it with the DFA-based |
| 202 decoder, check correctness. |
| 203 |
| 204 Args: |
| 205 asm: the code to feed into assembler |
| 206 hexfile: the original file name, where asm was extracted from, useful |
| 207 for grouping all artifacts from each test under the same name |
| 208 prefix. |
| 209 Returns: |
| 210 True iff the test passes. |
| 211 """ |
| 212 basename = os.path.basename(hexfile[:-4]) |
| 213 asmfile = os.path.join(self.tmp, basename + '.all.s') |
| 214 objfile = os.path.join(self.tmp, basename + '.o') |
| 215 WriteFile(asmfile, asm) |
| 216 gas_cmd = [self.gas, asmfile, '-o', objfile] |
| 217 if subprocess.call(gas_cmd) != 0: |
| 218 PrintError('assembler failed to execute command: %s' % gas_cmd) |
| 219 return False |
| 220 decoder_process = subprocess.Popen([self.decoder, objfile], |
| 221 stdout=subprocess.PIPE) |
| 222 (decode_out, decode_err) = decoder_process.communicate() |
| 223 WriteFile(os.path.join(self.tmp, basename + '.all.decode.out'), decode_out) |
| 224 # TODO(pasko): Compare output with objdump or a golden file. |
| 225 return True |
| 226 |
| 227 def CheckAsm(self, asm, hexfile, run_id): |
| 228 """Extract the first error offset from the validator on given code. |
| 229 |
| 230 Args: |
| 231 asm: The code to feed into assembler and then the tested validator. |
| 232 hexfile: Original input file name, where the code was extracted from. |
| 233 run_id: An integer identifier of the certain testing run, must be |
| 234 distinct from one invocation to another. |
| 235 |
| 236 Returns: |
| 237 A pair of (non_fatal, error_offset), where: |
| 238 non_fatal: True iff testing steps did not reveal any fatal errors. |
| 239 error_offset: The offset of the first instruction that the validator |
| 240 rejected. |
| 241 """ |
| 242 asmfile = os.path.basename(hexfile[:-4]) + ('_part%03d.s' % run_id) |
| 243 asmfile = os.path.join(self.tmp, asmfile) |
| 244 WriteFile(asmfile, asm) |
| 245 basename = asmfile[:-2] |
| 246 objfile = basename + '.o' |
| 247 if subprocess.call([self.gas, asmfile, '-o', objfile]) != 0: |
| 248 return (False, None) |
| 249 validator_process = subprocess.Popen([self.validator, objfile], |
| 250 stdout=subprocess.PIPE) |
| 251 (val_out, val_err) = validator_process.communicate() |
| 252 offsets = [] |
| 253 for line in string.split(val_out, '\n'): |
| 254 re_match = re.match(r'offset ([^:]+):.+', line) |
| 255 if not re_match: |
| 256 continue |
| 257 offsets.append(int(re_match.group(1), 16)) |
| 258 assert(len(offsets) < 2) |
| 259 if len(offsets) == 0: |
| 260 return (True, None) |
| 261 return (True, offsets[0]) |
| 262 |
| 263 def CompareOffsets(self, off_info, hexfile): |
| 264 """Check for correctness the knowledge from analysing a single test. |
| 265 |
| 266 Args: |
| 267 off_info: A dict mapping an integer offset to a list of string errors |
| 268 encountered for this offset. The order of errors is important. |
| 269 hexfile: Original input file name, where the code was extracted from. |
| 270 Returns: |
| 271 True iff the comparison with the golden file succeeds. |
| 272 """ |
| 273 output = '' |
| 274 for off, msg_list in sorted(off_info.iteritems()): |
| 275 for msg in msg_list: |
| 276 output += 'offset 0x%x: %s\n' % (off, msg) |
| 277 basename = os.path.basename(hexfile[:-4]) |
| 278 output_file = os.path.join(self.tmp , basename + '.val.out') |
| 279 WriteFile(output_file, output) |
| 280 golden_file = os.path.join('golden', basename + '.val.ref') |
| 281 golden = ReadFile(golden_file) |
| 282 if output == golden: |
| 283 return True |
| 284 PrintError('files differ: %s %s' % (golden_file, output_file)) |
| 285 return False |
| 286 |
| 287 def RunTest(self, test): |
| 288 """Runs the test by name. Checks the decoder and the validator. |
| 289 |
| 290 Each test contains a sequence of instructions described as individual hex |
| 291 bytes. Checks the decoder by feeding it with the whole code sequence of the |
| 292 test. |
| 293 |
| 294 Checks the validator by separating the input code into 32-byte chunks, |
| 295 asking the validator to try validate every piece, compare the answers |
| 296 against the golden output. |
| 297 |
| 298 Args: |
| 299 test: the name of the test, used only to construct the names of the .hex |
| 300 and the golden file. |
| 301 Returns: |
| 302 True iff the test passes. |
| 303 """ |
| 304 hexfile = 'testdata/64/%s.hex' % test |
| 305 if not os.path.exists(hexfile): |
| 306 PrintError('%s: no such file' % hexfile) |
| 307 return False |
| 308 |
| 309 # Check disassembling of the whole input. |
| 310 hex_instructions = InstByteSequence() |
| 311 hex_instructions.Parse(hexfile) |
| 312 if not self.CheckDecoder(hex_instructions.GenAsm(), hexfile): |
| 313 return False |
| 314 |
| 315 # Cut the input instruction sequence in bundles and run a test for each |
| 316 # bundle. For instructions that cross a bundle run an additional |
| 317 # test that starts from this instruction. |
| 318 start_pos = 0 |
| 319 runs = 0 |
| 320 top_errors = {} # Mapping of offset to a list of error strings. |
| 321 has_next = True |
| 322 while has_next: |
| 323 (asm, has_next) = hex_instructions.GenAsmBundle(start_pos) |
| 324 # Collect validation reject offsets, stub them out, repeat until no error. |
| 325 while True: |
| 326 (status, err_in_bundle) = self.CheckAsm(asm, hexfile, runs) |
| 327 runs += 1 |
| 328 if not status: |
| 329 return False |
| 330 if err_in_bundle == None: |
| 331 break |
| 332 err_offset = start_pos + err_in_bundle |
| 333 if not hex_instructions.HasOffset(err_offset): |
| 334 PrintError('validator returned error on offset that is not a ' + |
| 335 'start of an instruction: 0x%x' % err_offset) |
| 336 return False |
| 337 if hex_instructions.InstInBundle(err_in_bundle, start_pos): |
| 338 top_errors[err_offset] = ['validation error'] |
| 339 hex_instructions.StuboutInst(err_offset) |
| 340 (asm, _) = hex_instructions.GenAsmBundle(start_pos) |
| 341 else: |
| 342 # If the instruction crosses the bundle boundary, we check if it gets |
| 343 # validated as placed at address 0mod32, then go processing the next |
| 344 # bundle. Stubout the instruction if necessary. |
| 345 top_errors[err_offset] = ['crosses boundary'] |
| 346 (asm, _) = hex_instructions.GenAsmBundle(err_offset) |
| 347 (status, cross_err_off) = self.CheckAsm(asm, hexfile, runs) |
| 348 runs += 1 |
| 349 if not status: |
| 350 return False |
| 351 if cross_err_off != None: |
| 352 if hex_instructions.OffsetBelongsToInst(err_offset + cross_err_off, |
| 353 err_offset): |
| 354 top_errors[err_offset].append('validation error') |
| 355 hex_instructions.StuboutInst(err_offset) |
| 356 break |
| 357 start_pos += 32 |
| 358 |
| 359 # Compare the collected offsets with the golden file. |
| 360 if not self.CompareOffsets(top_errors, hexfile): |
| 361 return False |
| 362 return True |
| 363 |
| 364 |
| 365 def Main(): |
| 366 parser = optparse.OptionParser() |
| 367 parser.add_option( |
| 368 '-t', '--tests', dest='tests', |
| 369 # new validator allows unaligned calls: |
| 370 # default='call_not_aligned', |
| 371 # default='call_not_aligned_16', |
| 372 # reports error on instruction that follows the xchg esp, ebp, replacing it does |
| 373 # not help causing an infinite loop |
| 374 # default='stack_regs', |
| 375 # default='mov-lea-rbp-bad-1', |
| 376 # default='mov-lea-rbp-bad-2', |
| 377 # default='mov-lea-rbp-bad-3', |
| 378 # default='mov-lea-rbp-bad-4', |
| 379 # default='mv_ebp_alone', |
| 380 # the @ expansion is not yet parsed: |
| 381 # default='call0', |
| 382 # default='call1', |
| 383 # default='call_long', |
| 384 # default='call_short', |
| 385 # default='jmp0', |
| 386 # default='jump_not_atomic', |
| 387 # default='jump_not_atomic_1', |
| 388 # default='jump_overflow', |
| 389 # default='jump_underflow', |
| 390 # default='mv_ebp_add_crossing', |
| 391 # default='return', |
| 392 # default='segment_aligned', |
| 393 # default='segment_not_aligned', |
| 394 # default='update-rsp', |
| 395 # needs a tiny fix in old validator input file: |
| 396 # default='legacy', |
| 397 # http://code.google.com/p/nativeclient/issues/detail?id=2529 |
| 398 # default='maskmov_test', |
| 399 # http://code.google.com/p/nativeclient/issues/detail?id=2603 |
| 400 # default='bsf-mask', |
| 401 # default='bsr-mask', |
| 402 # http://code.google.com/p/nativeclient/issues/detail?id=2606 |
| 403 # default='extensions', |
| 404 # http://code.google.com/p/nativeclient/issues/detail?id=2607 |
| 405 # default='indirect_jmp_masked', |
| 406 # default='jump_atomic', |
| 407 # super-instruction crosses boundary, small instruction does not: |
| 408 # default='fpu', |
| 409 # have .hex, but not .rval: |
| 410 # default='data66prefix,rdmsr,stubseq,test_alias,test_insts,wrmsr', |
| 411 # need more investigation: |
| 412 # default='jump_outside,mmx,movs_test,prefix-2,prefix-single,strings,sse', |
| 413 # these tests pass: |
| 414 default='3DNow,add_cs_gs_prefix,add_mult_prefix,addrex,AhNotSubRsp,bt,call
_aligned,call-ex,cmpxchg,cpuid,dup-prefix,hlt,incno67,indirect_jmp_not_masked,in
valid_base,invalid_base_store,invalid_width_index,jmp-16,lea,lea-add-rsp,lea-rsp
,mov-esi-nop-use,mov_esp_add_rsp_r15,mov-lea-rbp,mov-lea-rsp,movlps-ex,mov_rbp_2
_rsp,movsbw,mv_ebp_add_rbp_r15,nops,pop-rbp,prefix-3,push-memoff,rbp67,read_cons
t_ptr,rep_tests,rex_invalid,rex_not_last,rip-relative,segment_assign,stosd,stosd
67,stosd-bad,stosdno67,sub-add-rsp,sub-rsp,ud2,valid_and_store,valid_base_only,v
alid_lea_store,x87,add_rsp_r15,addrex2,ambig-segment,bad66,fs_use,inc67,mov-lea-
rbp-bad-5,nacl_illegal,rip67,segment_store,change-subregs,ambig-segment', |
| 415 help='a comma-separated list of tests') |
| 416 parser.add_option( |
| 417 '-a', '--gas', dest='gas', |
| 418 default=None, |
| 419 help='path to assembler') |
| 420 parser.add_option( |
| 421 '-d', '--decoder', dest='decoder', |
| 422 default=None, |
| 423 help='path to decoder') |
| 424 parser.add_option( |
| 425 '-v', '--validator', dest='validator', |
| 426 default=None, |
| 427 help='path to validator') |
| 428 parser.add_option( |
| 429 '-p', '--tmp', dest='tmp', |
| 430 default=None, |
| 431 help='a directory for storing temporary files') |
| 432 opt, args = parser.parse_args() |
| 433 if (args or |
| 434 not opt.tmp or |
| 435 not opt.gas or |
| 436 not opt.decoder or |
| 437 not opt.validator): |
| 438 parser.error('invalid arguments') |
| 439 no_failures = True |
| 440 tester = TestRunner(opt.tmp, opt.gas, opt.decoder, opt.validator) |
| 441 for tst in string.split(opt.tests, ','): |
| 442 if tester.RunTest(tst): |
| 443 print '%s: PASS' % tst |
| 444 else: |
| 445 print '%s: FAIL' % tst |
| 446 no_failures = False |
| 447 if no_failures: |
| 448 print 'All tests PASSed' |
| 449 else: |
| 450 print 'Some tests FAILed' |
| 451 return 1 |
| 452 return 0 |
| 453 |
| 454 |
| 455 if __name__ == '__main__': |
| 456 sys.exit(Main()) |
OLD | NEW |