OLD | NEW |
| (Empty) |
1 #!/usr/bin/python | |
2 # Copyright (c) 2013 The Native Client Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """ | |
7 Generate all acceptable regular instructions by traversing validator DFA | |
8 and run objdump, new and old validator on them. | |
9 """ | |
10 # TODO(shcherbina): get rid of this test once text-based specification is | |
11 # complete (https://code.google.com/p/nativeclient/issues/detail?id=3453). | |
12 | |
13 import itertools | |
14 import multiprocessing | |
15 import optparse | |
16 import os | |
17 import re | |
18 import subprocess | |
19 import sys | |
20 import tempfile | |
21 import traceback | |
22 | |
23 import dfa_parser | |
24 import dfa_traversal | |
25 import objdump_parser | |
26 import validator | |
27 | |
28 | |
29 FWAIT = 0x9b | |
30 NOP = 0x90 | |
31 | |
32 | |
33 def IsRexPrefix(byte): | |
34 return 0x40 <= byte < 0x50 | |
35 | |
36 | |
37 def Cached(f): | |
38 cache = {} | |
39 def CachedF(*args): | |
40 args = tuple(args) | |
41 if args not in cache: | |
42 cache[args] = f(*args) | |
43 return cache[args] | |
44 return CachedF | |
45 | |
46 | |
47 class AssemblerError(Exception): | |
48 pass | |
49 | |
50 | |
51 @Cached | |
52 def Assemble(bitness, asm): | |
53 # Instead of parsing object files properly, I put two distinct sequences, | |
54 # begin_mark and end_mark, around code of interest. | |
55 # I neglect possibility that they occur somewhere else in the file. | |
56 begin_mark = 'begin mark>>>' | |
57 end_mark = '<<<end mark' | |
58 | |
59 try: | |
60 obj_file = tempfile.NamedTemporaryFile( | |
61 mode='w+b', | |
62 suffix='.o', | |
63 delete=False) | |
64 | |
65 proc = subprocess.Popen( | |
66 [options.gas, | |
67 '--%s' % bitness, | |
68 '-o', obj_file.name], | |
69 stdin=subprocess.PIPE) | |
70 | |
71 asm_content = '' | |
72 for c in begin_mark: | |
73 asm_content += '.byte %d\n' % ord(c) | |
74 asm_content += '%s\n' % asm | |
75 for c in end_mark: | |
76 asm_content += '.byte %d\n' % ord(c) | |
77 | |
78 proc.communicate(asm_content) | |
79 return_code = proc.wait() | |
80 if return_code != 0: | |
81 raise AssemblerError("Can't assemble '%s'" % asm) | |
82 | |
83 data = obj_file.read() | |
84 obj_file.close() | |
85 | |
86 # Extract the data between begin_mark and end_mark. | |
87 begin = data.find(begin_mark) | |
88 assert begin != -1, 'begin_mark is missing' | |
89 begin += len(begin_mark) | |
90 end = data.find(end_mark, begin) | |
91 assert end != -1, 'end_mark is missing' | |
92 return map(ord, data[begin:end]) | |
93 | |
94 finally: | |
95 os.remove(obj_file.name) | |
96 | |
97 | |
98 class OldValidator(object): | |
99 def __init__(self): | |
100 self._bundles = [] | |
101 self._errors = [] | |
102 pass | |
103 | |
104 def Validate(self, bundle, comment): | |
105 self._bundles.append((bundle, comment)) | |
106 | |
107 if len(self._bundles) == 40: | |
108 self._Process() | |
109 | |
110 def _Process(self): | |
111 bytes = sum((instr for instr, _ in self._bundles), []) | |
112 hex_content = ' '.join('%02x' % byte for byte in bytes).replace('0x', '') | |
113 | |
114 assert len(hex_content) < 4096 | |
115 | |
116 ncval = {32: options.ncval32, 64: options.ncval64}[options.bitness] | |
117 proc = subprocess.Popen( | |
118 [ncval, '--hex_text=-', '--max_errors=-1'], | |
119 stdin=subprocess.PIPE, | |
120 stdout=subprocess.PIPE) | |
121 | |
122 stdout, stderr = proc.communicate(hex_content) | |
123 return_code = proc.wait() | |
124 assert return_code == 0, (stdout, stderr) | |
125 | |
126 if '*** <input> is safe ***' in stdout: | |
127 self._bundles = [] | |
128 return | |
129 | |
130 assert '*** <input> IS UNSAFE ***' in stdout | |
131 | |
132 rejected_bundles = set() | |
133 for line in stdout.split('\n'): | |
134 line = line.strip() | |
135 if line == '': | |
136 continue | |
137 if line == '*** <input> IS UNSAFE ***': | |
138 continue | |
139 if line == 'Some instructions were replaced with HLTs.': | |
140 continue | |
141 if line.startswith( | |
142 'VALIDATOR: Checking block alignment and jump targets'): | |
143 continue | |
144 m = re.match(r'VALIDATOR: ([0-9a-f]+): (.*)$', line, re.IGNORECASE) | |
145 assert m is not None, (line, hex_content) | |
146 error_offset = int(m.group(1), 16) | |
147 rejected_bundles.add(error_offset // validator.BUNDLE_SIZE) | |
148 | |
149 assert len(rejected_bundles) != 0 | |
150 for b in sorted(rejected_bundles): | |
151 _, comment = self._bundles[b] | |
152 self._errors.append(comment) | |
153 | |
154 self._bundles = [] | |
155 | |
156 def GetErrors(self): | |
157 if len(self._bundles) > 0: | |
158 self._Process() | |
159 return self._errors | |
160 | |
161 | |
162 def CheckFinalRestrictedRegister( | |
163 sandboxing, | |
164 instruction, | |
165 disassembly, | |
166 old_validator): | |
167 bundle = sandboxing + instruction | |
168 assert len(bundle) <= validator.BUNDLE_SIZE | |
169 bundle += [NOP] * (validator.BUNDLE_SIZE - len(bundle)) | |
170 | |
171 final_restricted_register = [None] | |
172 | |
173 def Callback(begin, end, info): | |
174 if begin == len(sandboxing): | |
175 assert end == len(sandboxing) + len(instruction) | |
176 final_restricted_register[0] = ( | |
177 (info & validator.RESTRICTED_REGISTER_MASK) >> | |
178 validator.RESTRICTED_REGISTER_SHIFT) | |
179 elif begin > len(sandboxing): | |
180 assert bundle[begin:end] == [NOP] | |
181 | |
182 result = validator_inst.ValidateChunk( | |
183 ''.join(map(chr, bundle)), | |
184 bitness=options.bitness, | |
185 callback=Callback, | |
186 on_each_instruction=True) | |
187 assert result, (disassembly, map(hex, bundle)) | |
188 | |
189 (final_restricted_register,) = final_restricted_register | |
190 if final_restricted_register == validator.NO_REG: | |
191 final_restricted_register = None | |
192 | |
193 assert final_restricted_register != validator.REG_R15, ( | |
194 'restricted register can not be r15') | |
195 | |
196 if final_restricted_register is not None: | |
197 register_name = validator.REGISTER_NAMES[final_restricted_register] | |
198 memory_reference = 'mov (%%r15, %s), %%al' % register_name | |
199 bundle = sandboxing + instruction + Assemble(64, memory_reference) | |
200 assert len(bundle) <= validator.BUNDLE_SIZE | |
201 bundle += [NOP] * (validator.BUNDLE_SIZE - len(bundle)) | |
202 | |
203 assert validator_inst.ValidateChunk( | |
204 ''.join(map(chr, bundle)), | |
205 bitness=options.bitness), (bundle, disassembly, memory_reference) | |
206 | |
207 old_validator.Validate( | |
208 bundle, | |
209 (disassembly + '; ' + memory_reference, instruction)) | |
210 | |
211 | |
212 def ValidateInstruction( | |
213 instruction, | |
214 disassembly, | |
215 old_validator): | |
216 assert len(instruction) <= validator.BUNDLE_SIZE | |
217 bundle = instruction + [NOP] * (validator.BUNDLE_SIZE - len(instruction)) | |
218 | |
219 if options.bitness == 32: | |
220 result = validator_inst.ValidateChunk( | |
221 ''.join(map(chr, bundle)), | |
222 bitness=options.bitness) | |
223 | |
224 if result: | |
225 old_validator.Validate(bundle, (disassembly, instruction)) | |
226 | |
227 return result | |
228 | |
229 else: | |
230 result = validator_inst.ValidateChunk( | |
231 ''.join(map(chr, bundle)), | |
232 bitness=options.bitness) | |
233 if result: | |
234 old_validator.Validate(bundle, (disassembly, instruction)) | |
235 CheckFinalRestrictedRegister([], instruction, disassembly, old_validator) | |
236 | |
237 # Additionally, we try to restrict all possible | |
238 # registers and check whether instruction would be accepted. | |
239 for register, register_name in validator.REGISTER_NAMES.items(): | |
240 if register == validator.REG_R15: | |
241 continue | |
242 if validator_inst.ValidateChunk( | |
243 ''.join(map(chr, bundle)), | |
244 bitness=options.bitness, | |
245 restricted_register=register): | |
246 | |
247 # %r8 -> %r8d | |
248 # %rax -> %eax | |
249 if re.match(r'%r\d+$', register_name): | |
250 register_name += 'd' | |
251 else: | |
252 assert register_name.startswith('%r') | |
253 register_name = '%e' + register_name[2:] | |
254 | |
255 sandboxing = 'mov %%eax, %s' % register_name | |
256 CheckFinalRestrictedRegister( | |
257 Assemble(64, sandboxing), | |
258 instruction, | |
259 sandboxing + '; ' + disassembly, | |
260 old_validator) | |
261 result = True | |
262 | |
263 return result | |
264 | |
265 | |
266 class WorkerState(object): | |
267 def __init__(self, prefix): | |
268 self.total_instructions = 0 | |
269 self.num_valid = 0 | |
270 self._file_prefix = 'check_validator_%s_' % '_'.join(map(hex, prefix)) | |
271 self._instructions = [] | |
272 self.errors = [] | |
273 | |
274 def ReceiveInstruction(self, bytes): | |
275 self._instructions.append(bytes) | |
276 | |
277 # Objdump prints crazy stuff when x87 instructions are prefixed with | |
278 # fwait (especially when REX prefixes are involved). To avoid that, | |
279 # we insert nops after each fwait. | |
280 if (bytes == [FWAIT] or | |
281 len(bytes) == 2 and IsRexPrefix(bytes[0]) and bytes[1] == FWAIT): | |
282 self._instructions.append([NOP]) | |
283 | |
284 if len(self._instructions) >= 1000000: | |
285 self.CheckReceivedInstructions() | |
286 self._instructions = [] | |
287 | |
288 def CheckReceivedInstructions(self): | |
289 # Check instructions accumulated so far and clear the list. | |
290 if len(self._instructions) == 0: | |
291 return | |
292 try: | |
293 raw_file = tempfile.NamedTemporaryFile( | |
294 mode='wb', | |
295 prefix=self._file_prefix, | |
296 suffix='.o', | |
297 delete=False) | |
298 for instr in self._instructions: | |
299 raw_file.write(''.join(map(chr, instr))) | |
300 raw_file.close() | |
301 | |
302 objdump_proc = subprocess.Popen( | |
303 [options.objdump, | |
304 '--disassemble-all', '--disassemble-zeroes', | |
305 '-b', 'binary', | |
306 '-m', 'i386'] + | |
307 {32: [], 64: ['-M', 'x86-64']}[options.bitness] + | |
308 ['--insn-width', '15', | |
309 raw_file.name], | |
310 stdout=subprocess.PIPE) | |
311 | |
312 objdump_iter = iter(objdump_parser.SkipHeader(objdump_proc.stdout)) | |
313 | |
314 old_validator = OldValidator() | |
315 for instr in self._instructions: | |
316 # Objdump prints fwait with REX prefix in this ridiculous way: | |
317 # 0: 41 fwait | |
318 # 1: 9b fwait | |
319 # So in such cases we expect two lines from objdump. | |
320 # TODO(shcherbina): get rid of this special handling once | |
321 # https://code.google.com/p/nativeclient/issues/detail?id=3496 is fixed. | |
322 if len(instr) == 2 and IsRexPrefix(instr[0]) and instr[1] == FWAIT: | |
323 expected_lines = 2 | |
324 else: | |
325 expected_lines = 1 | |
326 | |
327 bytes = [] | |
328 for _ in range(expected_lines): | |
329 line = next(objdump_iter) | |
330 # Parse tab-separated line of the form | |
331 # 0: f2 40 0f 10 00 rex movsd (%rax),%xmm0 | |
332 addr, more_bytes, disassembly = line.strip().split('\t') | |
333 more_bytes = [int(b, 16) for b in more_bytes.split()] | |
334 bytes += more_bytes | |
335 | |
336 assert bytes == instr, (map(hex, bytes), map(hex, instr)) | |
337 self.total_instructions += 1 | |
338 | |
339 self.num_valid += ValidateInstruction(instr, disassembly, old_validator) | |
340 | |
341 # Make sure we read objdump output to the end. | |
342 end = next(objdump_iter, None) | |
343 assert end is None, end | |
344 | |
345 return_code = objdump_proc.wait() | |
346 assert return_code == 0 | |
347 | |
348 finally: | |
349 os.remove(raw_file.name) | |
350 | |
351 errors = old_validator.GetErrors() | |
352 for error in errors: | |
353 print error | |
354 self.errors += errors | |
355 | |
356 | |
357 def Worker((prefix, state_index)): | |
358 worker_state = WorkerState(prefix) | |
359 | |
360 try: | |
361 dfa_traversal.TraverseTree( | |
362 dfa.states[state_index], | |
363 final_callback=worker_state.ReceiveInstruction, | |
364 prefix=prefix, | |
365 anyfield=0) | |
366 worker_state.CheckReceivedInstructions() | |
367 except Exception as e: | |
368 traceback.print_exc() # because multiprocessing imap swallows traceback | |
369 raise | |
370 | |
371 return ( | |
372 prefix, | |
373 worker_state.total_instructions, | |
374 worker_state.num_valid, | |
375 worker_state.errors) | |
376 | |
377 | |
378 def ParseOptions(): | |
379 parser = optparse.OptionParser(usage='%prog [options] xmlfile') | |
380 | |
381 parser.add_option('--bitness', | |
382 type=int, | |
383 help='The subarchitecture: 32 or 64') | |
384 parser.add_option('--gas', | |
385 help='Path to GNU AS executable') | |
386 parser.add_option('--objdump', | |
387 help='Path to objdump executable') | |
388 parser.add_option('--validator_dll', | |
389 help='Path to librdfa_validator_dll') | |
390 parser.add_option('--ncval32', | |
391 help='Path to old 32-bit ncval') | |
392 parser.add_option('--ncval64', | |
393 help='Path to old 64-bit ncval') | |
394 parser.add_option('--errors', | |
395 help='Where to save errors') | |
396 | |
397 options, args = parser.parse_args() | |
398 | |
399 if options.bitness not in [32, 64]: | |
400 parser.error('specify -b 32 or -b 64') | |
401 | |
402 if not (options.gas and options.objdump and options.validator_dll): | |
403 parser.error('specify path to gas, objdump, and validator_dll') | |
404 | |
405 if not (options.ncval32 and options.ncval64): | |
406 parser.error('specify path to old validator (32-bit and 64-bit versions)') | |
407 | |
408 if not options.errors: | |
409 parser.errors('specify file to save errors to') | |
410 | |
411 if not os.path.exists(options.ncval32): | |
412 print options.ncval32, 'not found (try ./scons ncval platform=x86-32)' | |
413 sys.exit(1) | |
414 if not os.path.exists(options.ncval64): | |
415 print options.ncval64, 'not found (try ./scons ncval platform=x86-64)' | |
416 sys.exit(1) | |
417 | |
418 if len(args) != 1: | |
419 parser.error('specify one xml file') | |
420 | |
421 (xml_file,) = args | |
422 | |
423 return options, xml_file | |
424 | |
425 | |
426 options, xml_file = ParseOptions() | |
427 # We are doing it here to share state graph between workers spawned by | |
428 # multiprocess. Passing it every time is slow. | |
429 dfa = dfa_parser.ParseXml(xml_file) | |
430 | |
431 validator_inst = validator.Validator(validator_dll=options.validator_dll) | |
432 | |
433 | |
434 def main(): | |
435 assert dfa.initial_state.is_accepting | |
436 assert not dfa.initial_state.any_byte | |
437 | |
438 print len(dfa.states), 'states' | |
439 | |
440 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state) | |
441 | |
442 # We can't just write 'num_suffixes[dfa.initial_state]' because | |
443 # initial state is accepting. | |
444 total_instructions = sum( | |
445 num_suffixes[t.to_state] | |
446 for t in dfa.initial_state.forward_transitions.values()) | |
447 print total_instructions, 'regular instructions total' | |
448 | |
449 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state) | |
450 print len(tasks), 'tasks' | |
451 | |
452 pool = multiprocessing.Pool() | |
453 | |
454 results = pool.imap(Worker, tasks) | |
455 | |
456 total = 0 | |
457 num_valid = 0 | |
458 errors = [] | |
459 for prefix, count, valid_count, more_errors in results: | |
460 print ', '.join(map(hex, prefix)) | |
461 total += count | |
462 num_valid += valid_count | |
463 errors += more_errors | |
464 | |
465 print total, 'instructions were processed' | |
466 print num_valid, 'valid instructions' | |
467 | |
468 print len(errors), 'errors' | |
469 | |
470 errors.sort() | |
471 with open(options.errors, 'w') as errors_file: | |
472 errors_file.write( | |
473 'Instructions accepted by new validator but rejected by old one:\n') | |
474 for disassembly, bytes in errors: | |
475 hex_bytes = ' '.join('%02x' % byte for byte in bytes).replace('0x', '') | |
476 errors_file.write('%-50s %s\n' % (disassembly, hex_bytes)) | |
477 | |
478 | |
479 if __name__ == '__main__': | |
480 main() | |
OLD | NEW |