OLD | NEW |
| (Empty) |
1 #!/usr/bin/python | |
2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 import glob | |
7 import optparse | |
8 import os | |
9 import re | |
10 import struct | |
11 import subprocess | |
12 import sys | |
13 import tempfile | |
14 | |
15 import test_format | |
16 | |
17 | |
18 BUNDLE_SIZE = 32 | |
19 | |
20 | |
21 def AssertEquals(actual, expected): | |
22 if actual != expected: | |
23 raise AssertionError('\nEXPECTED:\n"""\n%s"""\n\nACTUAL:\n"""\n%s"""' | |
24 % (expected, actual)) | |
25 | |
26 | |
27 def ParseHex(hex_content): | |
28 """Parse content of @hex section and return binary data | |
29 | |
30 Args: | |
31 hex_content: Content of @hex section as a string. | |
32 | |
33 Yields: | |
34 Chunks of binary data corresponding to lines of given @hex section (as | |
35 strings). If line ends with r'\\', chunk is continued on the following line. | |
36 """ | |
37 | |
38 bytes = [] | |
39 for line in hex_content.split('\n'): | |
40 line, sep, comment = line.partition('#') | |
41 line = line.strip() | |
42 if line == '': | |
43 continue | |
44 | |
45 if line.endswith(r'\\'): | |
46 line = line[:-2] | |
47 continuation = True | |
48 else: | |
49 continuation = False | |
50 | |
51 for byte in line.split(): | |
52 assert len(byte) == 2 | |
53 bytes.append(chr(int(byte, 16))) | |
54 | |
55 if not continuation: | |
56 assert len(bytes) > 0 | |
57 yield ''.join(bytes) | |
58 bytes = [] | |
59 | |
60 assert bytes == [], r'r"\\" should not appear on the last line' | |
61 | |
62 | |
63 def CreateElfContent(bits, text_segment): | |
64 e_ident = { | |
65 32: '\177ELF\1', | |
66 64: '\177ELF\2'}[bits] | |
67 e_machine = { | |
68 32: 3, | |
69 64: 62}[bits] | |
70 | |
71 e_phoff = 256 | |
72 e_phnum = 1 | |
73 e_phentsize = 0 | |
74 | |
75 elf_header_fmt = { | |
76 32: '<16sHHIIIIIHHHHHH', | |
77 64: '<16sHHIQQQIHHHHHH'}[bits] | |
78 | |
79 elf_header = struct.pack( | |
80 elf_header_fmt, | |
81 e_ident, 0, e_machine, 0, 0, e_phoff, 0, 0, 0, | |
82 e_phentsize, e_phnum, 0, 0, 0) | |
83 | |
84 p_type = 1 # PT_LOAD | |
85 p_flags = 5 # r-x | |
86 p_filesz = len(text_segment) | |
87 p_memsz = p_filesz | |
88 p_vaddr = 0 | |
89 p_offset = 512 | |
90 p_align = 0 | |
91 p_paddr = 0 | |
92 | |
93 pheader_fmt = { | |
94 32: '<IIIIIIII', | |
95 64: '<IIQQQQQQ'}[bits] | |
96 | |
97 pheader_fields = { | |
98 32: (p_type, p_offset, p_vaddr, p_paddr, | |
99 p_filesz, p_memsz, p_flags, p_align), | |
100 64: (p_type, p_flags, p_offset, p_vaddr, | |
101 p_paddr, p_filesz, p_memsz, p_align)}[bits] | |
102 | |
103 pheader = struct.pack(pheader_fmt, *pheader_fields) | |
104 | |
105 result = elf_header | |
106 assert len(result) <= e_phoff | |
107 result += '\0' * (e_phoff - len(result)) | |
108 result += pheader | |
109 assert len(result) <= p_offset | |
110 result += '\0' * (p_offset - len(result)) | |
111 result += text_segment | |
112 | |
113 return result | |
114 | |
115 | |
116 def RunRdfaValidator(options, data): | |
117 # Add nops to make it bundle-sized. | |
118 data += (-len(data) % BUNDLE_SIZE) * '\x90' | |
119 assert len(data) % BUNDLE_SIZE == 0 | |
120 | |
121 tmp = tempfile.NamedTemporaryFile( | |
122 prefix='tmp_legacy_validator_', mode='wb', delete=False) | |
123 try: | |
124 tmp.write(CreateElfContent(options.bits, data)) | |
125 tmp.close() | |
126 | |
127 proc = subprocess.Popen([options.rdfaval, tmp.name], | |
128 stdout=subprocess.PIPE, | |
129 stderr=subprocess.PIPE) | |
130 stdout, stderr = proc.communicate() | |
131 assert stderr == '', stderr | |
132 return_code = proc.wait() | |
133 finally: | |
134 tmp.close() | |
135 os.remove(tmp.name) | |
136 | |
137 # Remove the carriage return characters that we get on Windows. | |
138 stdout = stdout.replace('\r', '') | |
139 return return_code, stdout | |
140 | |
141 | |
142 def ParseRdfaMessages(stdout): | |
143 """Get (offset, message) pairs from rdfa validator output. | |
144 | |
145 Args: | |
146 stdout: Output of rdfa validator as string. | |
147 | |
148 Yields: | |
149 Pairs (offset, message). | |
150 """ | |
151 for line in stdout.split('\n'): | |
152 line = line.strip() | |
153 if line == '': | |
154 continue | |
155 if re.match(r"(Valid|Invalid)\.$", line): | |
156 continue | |
157 | |
158 m = re.match(r'([0-9a-f]+): (.*)$', line, re.IGNORECASE) | |
159 assert m is not None, "can't parse line '%s'" % line | |
160 offset = int(m.group(1), 16) | |
161 message = m.group(2) | |
162 | |
163 if not message.startswith('warning - '): | |
164 yield offset, message | |
165 | |
166 | |
167 def RunRdfaWithNopPatching(options, data_chunks): | |
168 r"""Run RDFA validator with NOP patching for better error reporting. | |
169 | |
170 If the RDFA validator encounters an invalid instruction, it resumes validation | |
171 from the beginning of the next bundle, while the original, non-DFA-based | |
172 validators skip maybe one or two bytes and recover. And there are plenty of | |
173 tests where there are more than one error in a single bundle. To mitigate such | |
174 spurious disagreements, the following procedure is used: when RDFA complaints | |
175 that particular piece can't be decoded, the problematic line in @hex section | |
176 (which usually corresponds to one instruction) is replaced with NOPs and the | |
177 validator is rerun from the beginning. This process may take several | |
178 iterations (it seems it always converges in practice). All errors reported on | |
179 all such runs (sans duplicate ones) are taken as validation result. So, in a | |
180 sense, this trick is to emulate line-level recovery as opposed to bundle- | |
181 level. In practice it turns out ok, and lots of spurious errors are | |
182 eliminated. To each error message we add the stage at which it was produced, | |
183 so we can destinguish 'primary' errors from additional ones. | |
184 | |
185 Example. Suppose DE AD and BE EF machine codes correspond to invalid | |
186 instructions. Lets take a look at what happens when we invoke | |
187 RunRdfaWithNopPatching(options, ['\de\ad', '\be\ef']). First the RDFA | |
188 validator is run on the code '\de\ad\be\ef\90\90\90...'. It encounters an | |
189 undecipherable instruction, produces an error message at offset zero and | |
190 stops. Now we replace what is at offset zero ('\de\ad') with corresponding | |
191 amount of nops, and run the RDFA validator again on | |
192 '\90\90\be\ef\90\90\90...'. This time it decodes first two NOPs sucessfully | |
193 and reports problem at offset 2. In the next iteration of NOP patching BE EF | |
194 is replaced with 90 90 as well, no decoding errors are reported on the next | |
195 run so the whole process stops. Finally the combined output looks like | |
196 following: | |
197 | |
198 0: [0] unrecognized instruction <- produced at stage 0 | |
199 2: [1] unrecognized instruction <- produced at stage 1 | |
200 return code: 1 <- return code at stage 0 | |
201 | |
202 Args: | |
203 options: Options as produced by optparse. | |
204 Relevant fields are .bits and .update. | |
205 data_chunks: List of strings containing binary data. For the described | |
206 heuristic to work better it is desirable (although not absolutelty | |
207 required) that strings correspond to singular instructions, as it | |
208 usually happens in @hex section. | |
209 | |
210 Returns: | |
211 String representing combined output from all stages. Error messages are | |
212 of the form | |
213 <offset in hex>: [<stage>] <message> | |
214 """ | |
215 | |
216 data_chunks = list(data_chunks) | |
217 | |
218 offset_to_chunk = {} | |
219 offset = 0 | |
220 for i, chunk in enumerate(data_chunks): | |
221 offset_to_chunk[offset] = i | |
222 offset += len(chunk) | |
223 | |
224 first_return_code = None | |
225 messages = [] # list of triples (offset, stage, message) | |
226 messages_set = set() # set of pairs (offset, message) | |
227 stage = 0 | |
228 | |
229 while True: | |
230 return_code, stdout = RunRdfaValidator(options, ''.join(data_chunks)) | |
231 if first_return_code is None: | |
232 first_return_code = return_code | |
233 | |
234 nop_patched = False | |
235 | |
236 for offset, message in ParseRdfaMessages(stdout): | |
237 if (offset, message) in messages_set: | |
238 continue | |
239 messages.append((offset, stage, message)) | |
240 messages_set.add((offset, message)) | |
241 | |
242 if offset in offset_to_chunk and message == 'unrecognized instruction': | |
243 chunk_no = offset_to_chunk[offset] | |
244 nops_chunk = '\x90' * len(data_chunks[chunk_no]) | |
245 if nops_chunk != data_chunks[chunk_no]: | |
246 data_chunks[chunk_no] = nops_chunk | |
247 nop_patched = True | |
248 | |
249 if not nop_patched: | |
250 break | |
251 stage += 1 | |
252 | |
253 messages.sort(key=lambda (offset, stage, _): (offset, stage)) | |
254 | |
255 result = ''.join('%x: [%d] %s\n' % (offset, stage, message) | |
256 for offset, stage, message in messages) | |
257 result += 'return code: %d\n' % first_return_code | |
258 return result | |
259 | |
260 | |
261 def CheckValidJumpTargets(options, data_chunks): | |
262 """ | |
263 Check that the validator infers valid jump targets correctly. | |
264 | |
265 This test checks that the validator identifies instruction boundaries and | |
266 superinstructions correctly. In order to do that, it attempts to append a jump | |
267 to each byte at the end of the given code. Jump should be valid if and only if | |
268 it goes to the boundary between data chunks. | |
269 | |
270 Note that the same chunks as in RunRdfaWithNopPatching are used, but here they | |
271 play a different role. In RunRdfaWithNopPatching the partitioning into chunks | |
272 is only relevant when the whole snippet is invalid. Here, on the other hand, | |
273 we only care about valid snippets, and we use chunks to mark valid jump | |
274 targets. | |
275 | |
276 Args: | |
277 options: Options as produced by optparse. | |
278 data_chunks: List of strings containing binary data. Each such chunk is | |
279 expected to correspond to indivisible instruction or superinstruction. | |
280 | |
281 Returns: | |
282 None. | |
283 """ | |
284 data = ''.join(data_chunks) | |
285 # Add nops to make it bundle-sized. | |
286 data += (-len(data) % BUNDLE_SIZE) * '\x90' | |
287 assert len(data) % BUNDLE_SIZE == 0 | |
288 | |
289 # Since we check validity of jump target by adding jump and validating | |
290 # resulting piece, we rely on validity of original snippet. | |
291 return_code, _ = RunRdfaValidator(options, data) | |
292 assert return_code == 0, 'Can only validate jump targets on valid snippet' | |
293 | |
294 valid_jump_targets = set() | |
295 pos = 0 | |
296 for data_chunk in data_chunks: | |
297 valid_jump_targets.add(pos) | |
298 pos += len(data_chunk) | |
299 valid_jump_targets.add(pos) | |
300 | |
301 for i in range(pos + 1): | |
302 # Encode JMP with 32-bit relative target. | |
303 jump = '\xe9' + struct.pack('<i', i - (len(data) + 5)) | |
304 return_code, _ = RunRdfaValidator(options, data + jump) | |
305 if return_code == 0: | |
306 assert i in valid_jump_targets, ( | |
307 'Offset 0x%x was reported valid jump target' % i) | |
308 else: | |
309 assert i not in valid_jump_targets, ( | |
310 'Offset 0x%x was reported invalid jump target' % i) | |
311 | |
312 | |
313 def Test(options, items_list): | |
314 info = dict(items_list) | |
315 | |
316 if 'rdfa_output' in info: | |
317 data_chunks = list(ParseHex(info['hex'])) | |
318 stdout = RunRdfaWithNopPatching(options, data_chunks) | |
319 print ' Checking rdfa_output field...' | |
320 if options.update: | |
321 if stdout != info['rdfa_output']: | |
322 print ' Updating rdfa_output field...' | |
323 info['rdfa_output'] = stdout | |
324 else: | |
325 AssertEquals(stdout, info['rdfa_output']) | |
326 | |
327 last_line = re.search('return code: (-?\d+)\n$', info['rdfa_output']) | |
328 expected_return_code = int(last_line.group(1)) | |
329 | |
330 # This test only works for valid snippets, see CheckValidJumpTargets | |
331 # for details. | |
332 if expected_return_code == 0: | |
333 print ' Checking jump targets...' | |
334 CheckValidJumpTargets(options, data_chunks) | |
335 | |
336 # Update field values, but preserve their order. | |
337 items_list = [(field, info[field]) for field, _ in items_list] | |
338 | |
339 return items_list | |
340 | |
341 | |
342 def main(args): | |
343 parser = optparse.OptionParser() | |
344 parser.add_option('--rdfaval', default='validator_test', | |
345 help='Path to the ncval validator executable') | |
346 parser.add_option('--bits', | |
347 type=int, | |
348 help='The subarchitecture to run tests against: 32 or 64') | |
349 parser.add_option('--update', | |
350 default=False, | |
351 action='store_true', | |
352 help='Regenerate golden fields instead of testing') | |
353 | |
354 options, args = parser.parse_args(args) | |
355 | |
356 if options.bits not in [32, 64]: | |
357 parser.error('specify --bits 32 or --bits 64') | |
358 | |
359 if len(args) == 0: | |
360 parser.error('No test files specified') | |
361 processed = 0 | |
362 for glob_expr in args: | |
363 test_files = sorted(glob.glob(glob_expr)) | |
364 if len(test_files) == 0: | |
365 raise AssertionError( | |
366 '%r matched no files, which was probably not intended' % glob_expr) | |
367 for test_file in test_files: | |
368 print 'Testing %s...' % test_file | |
369 tests = test_format.LoadTestFile(test_file) | |
370 tests = [Test(options, test) for test in tests] | |
371 if options.update: | |
372 test_format.SaveTestFile(tests, test_file) | |
373 processed += 1 | |
374 print '%s test files were processed.' % processed | |
375 | |
376 | |
377 if __name__ == '__main__': | |
378 main(sys.argv[1:]) | |
OLD | NEW |