| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/python | |
| 2 # Copyright (c) 2013 The Native Client Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """ | |
| 7 Generate all acceptable regular instructions by traversing validator DFA | |
| 8 and run objdump, new and old validator on them. | |
| 9 """ | |
| 10 # TODO(shcherbina): get rid of this test once text-based specification is | |
| 11 # complete (https://code.google.com/p/nativeclient/issues/detail?id=3453). | |
| 12 | |
| 13 import itertools | |
| 14 import multiprocessing | |
| 15 import optparse | |
| 16 import os | |
| 17 import re | |
| 18 import subprocess | |
| 19 import sys | |
| 20 import tempfile | |
| 21 import traceback | |
| 22 | |
| 23 import dfa_parser | |
| 24 import dfa_traversal | |
| 25 import objdump_parser | |
| 26 import validator | |
| 27 | |
| 28 | |
| 29 FWAIT = 0x9b | |
| 30 NOP = 0x90 | |
| 31 | |
| 32 | |
| 33 def IsRexPrefix(byte): | |
| 34 return 0x40 <= byte < 0x50 | |
| 35 | |
| 36 | |
| 37 def Cached(f): | |
| 38 cache = {} | |
| 39 def CachedF(*args): | |
| 40 args = tuple(args) | |
| 41 if args not in cache: | |
| 42 cache[args] = f(*args) | |
| 43 return cache[args] | |
| 44 return CachedF | |
| 45 | |
| 46 | |
| 47 class AssemblerError(Exception): | |
| 48 pass | |
| 49 | |
| 50 | |
| 51 @Cached | |
| 52 def Assemble(bitness, asm): | |
| 53 # Instead of parsing object files properly, I put two distinct sequences, | |
| 54 # begin_mark and end_mark, around code of interest. | |
| 55 # I neglect possibility that they occur somewhere else in the file. | |
| 56 begin_mark = 'begin mark>>>' | |
| 57 end_mark = '<<<end mark' | |
| 58 | |
| 59 try: | |
| 60 obj_file = tempfile.NamedTemporaryFile( | |
| 61 mode='w+b', | |
| 62 suffix='.o', | |
| 63 delete=False) | |
| 64 | |
| 65 proc = subprocess.Popen( | |
| 66 [options.gas, | |
| 67 '--%s' % bitness, | |
| 68 '-o', obj_file.name], | |
| 69 stdin=subprocess.PIPE) | |
| 70 | |
| 71 asm_content = '' | |
| 72 for c in begin_mark: | |
| 73 asm_content += '.byte %d\n' % ord(c) | |
| 74 asm_content += '%s\n' % asm | |
| 75 for c in end_mark: | |
| 76 asm_content += '.byte %d\n' % ord(c) | |
| 77 | |
| 78 proc.communicate(asm_content) | |
| 79 return_code = proc.wait() | |
| 80 if return_code != 0: | |
| 81 raise AssemblerError("Can't assemble '%s'" % asm) | |
| 82 | |
| 83 data = obj_file.read() | |
| 84 obj_file.close() | |
| 85 | |
| 86 # Extract the data between begin_mark and end_mark. | |
| 87 begin = data.find(begin_mark) | |
| 88 assert begin != -1, 'begin_mark is missing' | |
| 89 begin += len(begin_mark) | |
| 90 end = data.find(end_mark, begin) | |
| 91 assert end != -1, 'end_mark is missing' | |
| 92 return map(ord, data[begin:end]) | |
| 93 | |
| 94 finally: | |
| 95 os.remove(obj_file.name) | |
| 96 | |
| 97 | |
| 98 class OldValidator(object): | |
| 99 def __init__(self): | |
| 100 self._bundles = [] | |
| 101 self._errors = [] | |
| 102 pass | |
| 103 | |
| 104 def Validate(self, bundle, comment): | |
| 105 self._bundles.append((bundle, comment)) | |
| 106 | |
| 107 if len(self._bundles) == 40: | |
| 108 self._Process() | |
| 109 | |
| 110 def _Process(self): | |
| 111 bytes = sum((instr for instr, _ in self._bundles), []) | |
| 112 hex_content = ' '.join('%02x' % byte for byte in bytes).replace('0x', '') | |
| 113 | |
| 114 assert len(hex_content) < 4096 | |
| 115 | |
| 116 ncval = {32: options.ncval32, 64: options.ncval64}[options.bitness] | |
| 117 proc = subprocess.Popen( | |
| 118 [ncval, '--hex_text=-', '--max_errors=-1'], | |
| 119 stdin=subprocess.PIPE, | |
| 120 stdout=subprocess.PIPE) | |
| 121 | |
| 122 stdout, stderr = proc.communicate(hex_content) | |
| 123 return_code = proc.wait() | |
| 124 assert return_code == 0, (stdout, stderr) | |
| 125 | |
| 126 if '*** <input> is safe ***' in stdout: | |
| 127 self._bundles = [] | |
| 128 return | |
| 129 | |
| 130 assert '*** <input> IS UNSAFE ***' in stdout | |
| 131 | |
| 132 rejected_bundles = set() | |
| 133 for line in stdout.split('\n'): | |
| 134 line = line.strip() | |
| 135 if line == '': | |
| 136 continue | |
| 137 if line == '*** <input> IS UNSAFE ***': | |
| 138 continue | |
| 139 if line == 'Some instructions were replaced with HLTs.': | |
| 140 continue | |
| 141 if line.startswith( | |
| 142 'VALIDATOR: Checking block alignment and jump targets'): | |
| 143 continue | |
| 144 m = re.match(r'VALIDATOR: ([0-9a-f]+): (.*)$', line, re.IGNORECASE) | |
| 145 assert m is not None, (line, hex_content) | |
| 146 error_offset = int(m.group(1), 16) | |
| 147 rejected_bundles.add(error_offset // validator.BUNDLE_SIZE) | |
| 148 | |
| 149 assert len(rejected_bundles) != 0 | |
| 150 for b in sorted(rejected_bundles): | |
| 151 _, comment = self._bundles[b] | |
| 152 self._errors.append(comment) | |
| 153 | |
| 154 self._bundles = [] | |
| 155 | |
| 156 def GetErrors(self): | |
| 157 if len(self._bundles) > 0: | |
| 158 self._Process() | |
| 159 return self._errors | |
| 160 | |
| 161 | |
| 162 def CheckFinalRestrictedRegister( | |
| 163 sandboxing, | |
| 164 instruction, | |
| 165 disassembly, | |
| 166 old_validator): | |
| 167 bundle = sandboxing + instruction | |
| 168 assert len(bundle) <= validator.BUNDLE_SIZE | |
| 169 bundle += [NOP] * (validator.BUNDLE_SIZE - len(bundle)) | |
| 170 | |
| 171 final_restricted_register = [None] | |
| 172 | |
| 173 def Callback(begin, end, info): | |
| 174 if begin == len(sandboxing): | |
| 175 assert end == len(sandboxing) + len(instruction) | |
| 176 final_restricted_register[0] = ( | |
| 177 (info & validator.RESTRICTED_REGISTER_MASK) >> | |
| 178 validator.RESTRICTED_REGISTER_SHIFT) | |
| 179 elif begin > len(sandboxing): | |
| 180 assert bundle[begin:end] == [NOP] | |
| 181 | |
| 182 result = validator_inst.ValidateChunk( | |
| 183 ''.join(map(chr, bundle)), | |
| 184 bitness=options.bitness, | |
| 185 callback=Callback, | |
| 186 on_each_instruction=True) | |
| 187 assert result, (disassembly, map(hex, bundle)) | |
| 188 | |
| 189 (final_restricted_register,) = final_restricted_register | |
| 190 if final_restricted_register == validator.NO_REG: | |
| 191 final_restricted_register = None | |
| 192 | |
| 193 assert final_restricted_register != validator.REG_R15, ( | |
| 194 'restricted register can not be r15') | |
| 195 | |
| 196 if final_restricted_register is not None: | |
| 197 register_name = validator.REGISTER_NAMES[final_restricted_register] | |
| 198 memory_reference = 'mov (%%r15, %s), %%al' % register_name | |
| 199 bundle = sandboxing + instruction + Assemble(64, memory_reference) | |
| 200 assert len(bundle) <= validator.BUNDLE_SIZE | |
| 201 bundle += [NOP] * (validator.BUNDLE_SIZE - len(bundle)) | |
| 202 | |
| 203 assert validator_inst.ValidateChunk( | |
| 204 ''.join(map(chr, bundle)), | |
| 205 bitness=options.bitness), (bundle, disassembly, memory_reference) | |
| 206 | |
| 207 old_validator.Validate( | |
| 208 bundle, | |
| 209 (disassembly + '; ' + memory_reference, instruction)) | |
| 210 | |
| 211 | |
| 212 def ValidateInstruction( | |
| 213 instruction, | |
| 214 disassembly, | |
| 215 old_validator): | |
| 216 assert len(instruction) <= validator.BUNDLE_SIZE | |
| 217 bundle = instruction + [NOP] * (validator.BUNDLE_SIZE - len(instruction)) | |
| 218 | |
| 219 if options.bitness == 32: | |
| 220 result = validator_inst.ValidateChunk( | |
| 221 ''.join(map(chr, bundle)), | |
| 222 bitness=options.bitness) | |
| 223 | |
| 224 if result: | |
| 225 old_validator.Validate(bundle, (disassembly, instruction)) | |
| 226 | |
| 227 return result | |
| 228 | |
| 229 else: | |
| 230 result = validator_inst.ValidateChunk( | |
| 231 ''.join(map(chr, bundle)), | |
| 232 bitness=options.bitness) | |
| 233 if result: | |
| 234 old_validator.Validate(bundle, (disassembly, instruction)) | |
| 235 CheckFinalRestrictedRegister([], instruction, disassembly, old_validator) | |
| 236 | |
| 237 # Additionally, we try to restrict all possible | |
| 238 # registers and check whether instruction would be accepted. | |
| 239 for register, register_name in validator.REGISTER_NAMES.items(): | |
| 240 if register == validator.REG_R15: | |
| 241 continue | |
| 242 if validator_inst.ValidateChunk( | |
| 243 ''.join(map(chr, bundle)), | |
| 244 bitness=options.bitness, | |
| 245 restricted_register=register): | |
| 246 | |
| 247 # %r8 -> %r8d | |
| 248 # %rax -> %eax | |
| 249 if re.match(r'%r\d+$', register_name): | |
| 250 register_name += 'd' | |
| 251 else: | |
| 252 assert register_name.startswith('%r') | |
| 253 register_name = '%e' + register_name[2:] | |
| 254 | |
| 255 sandboxing = 'mov %%eax, %s' % register_name | |
| 256 CheckFinalRestrictedRegister( | |
| 257 Assemble(64, sandboxing), | |
| 258 instruction, | |
| 259 sandboxing + '; ' + disassembly, | |
| 260 old_validator) | |
| 261 result = True | |
| 262 | |
| 263 return result | |
| 264 | |
| 265 | |
| 266 class WorkerState(object): | |
| 267 def __init__(self, prefix): | |
| 268 self.total_instructions = 0 | |
| 269 self.num_valid = 0 | |
| 270 self._file_prefix = 'check_validator_%s_' % '_'.join(map(hex, prefix)) | |
| 271 self._instructions = [] | |
| 272 self.errors = [] | |
| 273 | |
| 274 def ReceiveInstruction(self, bytes): | |
| 275 self._instructions.append(bytes) | |
| 276 | |
| 277 # Objdump prints crazy stuff when x87 instructions are prefixed with | |
| 278 # fwait (especially when REX prefixes are involved). To avoid that, | |
| 279 # we insert nops after each fwait. | |
| 280 if (bytes == [FWAIT] or | |
| 281 len(bytes) == 2 and IsRexPrefix(bytes[0]) and bytes[1] == FWAIT): | |
| 282 self._instructions.append([NOP]) | |
| 283 | |
| 284 if len(self._instructions) >= 1000000: | |
| 285 self.CheckReceivedInstructions() | |
| 286 self._instructions = [] | |
| 287 | |
| 288 def CheckReceivedInstructions(self): | |
| 289 # Check instructions accumulated so far and clear the list. | |
| 290 if len(self._instructions) == 0: | |
| 291 return | |
| 292 try: | |
| 293 raw_file = tempfile.NamedTemporaryFile( | |
| 294 mode='wb', | |
| 295 prefix=self._file_prefix, | |
| 296 suffix='.o', | |
| 297 delete=False) | |
| 298 for instr in self._instructions: | |
| 299 raw_file.write(''.join(map(chr, instr))) | |
| 300 raw_file.close() | |
| 301 | |
| 302 objdump_proc = subprocess.Popen( | |
| 303 [options.objdump, | |
| 304 '--disassemble-all', '--disassemble-zeroes', | |
| 305 '-b', 'binary', | |
| 306 '-m', 'i386'] + | |
| 307 {32: [], 64: ['-M', 'x86-64']}[options.bitness] + | |
| 308 ['--insn-width', '15', | |
| 309 raw_file.name], | |
| 310 stdout=subprocess.PIPE) | |
| 311 | |
| 312 objdump_iter = iter(objdump_parser.SkipHeader(objdump_proc.stdout)) | |
| 313 | |
| 314 old_validator = OldValidator() | |
| 315 for instr in self._instructions: | |
| 316 # Objdump prints fwait with REX prefix in this ridiculous way: | |
| 317 # 0: 41 fwait | |
| 318 # 1: 9b fwait | |
| 319 # So in such cases we expect two lines from objdump. | |
| 320 # TODO(shcherbina): get rid of this special handling once | |
| 321 # https://code.google.com/p/nativeclient/issues/detail?id=3496 is fixed. | |
| 322 if len(instr) == 2 and IsRexPrefix(instr[0]) and instr[1] == FWAIT: | |
| 323 expected_lines = 2 | |
| 324 else: | |
| 325 expected_lines = 1 | |
| 326 | |
| 327 bytes = [] | |
| 328 for _ in range(expected_lines): | |
| 329 line = next(objdump_iter) | |
| 330 # Parse tab-separated line of the form | |
| 331 # 0: f2 40 0f 10 00 rex movsd (%rax),%xmm0 | |
| 332 addr, more_bytes, disassembly = line.strip().split('\t') | |
| 333 more_bytes = [int(b, 16) for b in more_bytes.split()] | |
| 334 bytes += more_bytes | |
| 335 | |
| 336 assert bytes == instr, (map(hex, bytes), map(hex, instr)) | |
| 337 self.total_instructions += 1 | |
| 338 | |
| 339 self.num_valid += ValidateInstruction(instr, disassembly, old_validator) | |
| 340 | |
| 341 # Make sure we read objdump output to the end. | |
| 342 end = next(objdump_iter, None) | |
| 343 assert end is None, end | |
| 344 | |
| 345 return_code = objdump_proc.wait() | |
| 346 assert return_code == 0 | |
| 347 | |
| 348 finally: | |
| 349 os.remove(raw_file.name) | |
| 350 | |
| 351 errors = old_validator.GetErrors() | |
| 352 for error in errors: | |
| 353 print error | |
| 354 self.errors += errors | |
| 355 | |
| 356 | |
| 357 def Worker((prefix, state_index)): | |
| 358 worker_state = WorkerState(prefix) | |
| 359 | |
| 360 try: | |
| 361 dfa_traversal.TraverseTree( | |
| 362 dfa.states[state_index], | |
| 363 final_callback=worker_state.ReceiveInstruction, | |
| 364 prefix=prefix, | |
| 365 anyfield=0) | |
| 366 worker_state.CheckReceivedInstructions() | |
| 367 except Exception as e: | |
| 368 traceback.print_exc() # because multiprocessing imap swallows traceback | |
| 369 raise | |
| 370 | |
| 371 return ( | |
| 372 prefix, | |
| 373 worker_state.total_instructions, | |
| 374 worker_state.num_valid, | |
| 375 worker_state.errors) | |
| 376 | |
| 377 | |
| 378 def ParseOptions(): | |
| 379 parser = optparse.OptionParser(usage='%prog [options] xmlfile') | |
| 380 | |
| 381 parser.add_option('--bitness', | |
| 382 type=int, | |
| 383 help='The subarchitecture: 32 or 64') | |
| 384 parser.add_option('--gas', | |
| 385 help='Path to GNU AS executable') | |
| 386 parser.add_option('--objdump', | |
| 387 help='Path to objdump executable') | |
| 388 parser.add_option('--validator_dll', | |
| 389 help='Path to librdfa_validator_dll') | |
| 390 parser.add_option('--ncval32', | |
| 391 help='Path to old 32-bit ncval') | |
| 392 parser.add_option('--ncval64', | |
| 393 help='Path to old 64-bit ncval') | |
| 394 parser.add_option('--errors', | |
| 395 help='Where to save errors') | |
| 396 | |
| 397 options, args = parser.parse_args() | |
| 398 | |
| 399 if options.bitness not in [32, 64]: | |
| 400 parser.error('specify -b 32 or -b 64') | |
| 401 | |
| 402 if not (options.gas and options.objdump and options.validator_dll): | |
| 403 parser.error('specify path to gas, objdump, and validator_dll') | |
| 404 | |
| 405 if not (options.ncval32 and options.ncval64): | |
| 406 parser.error('specify path to old validator (32-bit and 64-bit versions)') | |
| 407 | |
| 408 if not options.errors: | |
| 409 parser.errors('specify file to save errors to') | |
| 410 | |
| 411 if not os.path.exists(options.ncval32): | |
| 412 print options.ncval32, 'not found (try ./scons ncval platform=x86-32)' | |
| 413 sys.exit(1) | |
| 414 if not os.path.exists(options.ncval64): | |
| 415 print options.ncval64, 'not found (try ./scons ncval platform=x86-64)' | |
| 416 sys.exit(1) | |
| 417 | |
| 418 if len(args) != 1: | |
| 419 parser.error('specify one xml file') | |
| 420 | |
| 421 (xml_file,) = args | |
| 422 | |
| 423 return options, xml_file | |
| 424 | |
| 425 | |
| 426 options, xml_file = ParseOptions() | |
| 427 # We are doing it here to share state graph between workers spawned by | |
| 428 # multiprocess. Passing it every time is slow. | |
| 429 dfa = dfa_parser.ParseXml(xml_file) | |
| 430 | |
| 431 validator_inst = validator.Validator(validator_dll=options.validator_dll) | |
| 432 | |
| 433 | |
| 434 def main(): | |
| 435 assert dfa.initial_state.is_accepting | |
| 436 assert not dfa.initial_state.any_byte | |
| 437 | |
| 438 print len(dfa.states), 'states' | |
| 439 | |
| 440 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state) | |
| 441 | |
| 442 # We can't just write 'num_suffixes[dfa.initial_state]' because | |
| 443 # initial state is accepting. | |
| 444 total_instructions = sum( | |
| 445 num_suffixes[t.to_state] | |
| 446 for t in dfa.initial_state.forward_transitions.values()) | |
| 447 print total_instructions, 'regular instructions total' | |
| 448 | |
| 449 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state) | |
| 450 print len(tasks), 'tasks' | |
| 451 | |
| 452 pool = multiprocessing.Pool() | |
| 453 | |
| 454 results = pool.imap(Worker, tasks) | |
| 455 | |
| 456 total = 0 | |
| 457 num_valid = 0 | |
| 458 errors = [] | |
| 459 for prefix, count, valid_count, more_errors in results: | |
| 460 print ', '.join(map(hex, prefix)) | |
| 461 total += count | |
| 462 num_valid += valid_count | |
| 463 errors += more_errors | |
| 464 | |
| 465 print total, 'instructions were processed' | |
| 466 print num_valid, 'valid instructions' | |
| 467 | |
| 468 print len(errors), 'errors' | |
| 469 | |
| 470 errors.sort() | |
| 471 with open(options.errors, 'w') as errors_file: | |
| 472 errors_file.write( | |
| 473 'Instructions accepted by new validator but rejected by old one:\n') | |
| 474 for disassembly, bytes in errors: | |
| 475 hex_bytes = ' '.join('%02x' % byte for byte in bytes).replace('0x', '') | |
| 476 errors_file.write('%-50s %s\n' % (disassembly, hex_bytes)) | |
| 477 | |
| 478 | |
| 479 if __name__ == '__main__': | |
| 480 main() | |
| OLD | NEW |