Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/trusted/validator_x86/testscripts/run_rdfa_validator_tests.py

Issue 625923004: Delete old x86 validator. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client
Patch Set: rebase master Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright (c) 2012 The Native Client Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 import glob
7 import optparse
8 import os
9 import re
10 import struct
11 import subprocess
12 import sys
13 import tempfile
14
15 import test_format
16
17
18 BUNDLE_SIZE = 32
19
20
21 def AssertEquals(actual, expected):
22 if actual != expected:
23 raise AssertionError('\nEXPECTED:\n"""\n%s"""\n\nACTUAL:\n"""\n%s"""'
24 % (expected, actual))
25
26
27 def ParseHex(hex_content):
28 """Parse content of @hex section and return binary data
29
30 Args:
31 hex_content: Content of @hex section as a string.
32
33 Yields:
34 Chunks of binary data corresponding to lines of given @hex section (as
35 strings). If line ends with r'\\', chunk is continued on the following line.
36 """
37
38 bytes = []
39 for line in hex_content.split('\n'):
40 line, sep, comment = line.partition('#')
41 line = line.strip()
42 if line == '':
43 continue
44
45 if line.endswith(r'\\'):
46 line = line[:-2]
47 continuation = True
48 else:
49 continuation = False
50
51 for byte in line.split():
52 assert len(byte) == 2
53 bytes.append(chr(int(byte, 16)))
54
55 if not continuation:
56 assert len(bytes) > 0
57 yield ''.join(bytes)
58 bytes = []
59
60 assert bytes == [], r'r"\\" should not appear on the last line'
61
62
63 def CreateElfContent(bits, text_segment):
64 e_ident = {
65 32: '\177ELF\1',
66 64: '\177ELF\2'}[bits]
67 e_machine = {
68 32: 3,
69 64: 62}[bits]
70
71 e_phoff = 256
72 e_phnum = 1
73 e_phentsize = 0
74
75 elf_header_fmt = {
76 32: '<16sHHIIIIIHHHHHH',
77 64: '<16sHHIQQQIHHHHHH'}[bits]
78
79 elf_header = struct.pack(
80 elf_header_fmt,
81 e_ident, 0, e_machine, 0, 0, e_phoff, 0, 0, 0,
82 e_phentsize, e_phnum, 0, 0, 0)
83
84 p_type = 1 # PT_LOAD
85 p_flags = 5 # r-x
86 p_filesz = len(text_segment)
87 p_memsz = p_filesz
88 p_vaddr = 0
89 p_offset = 512
90 p_align = 0
91 p_paddr = 0
92
93 pheader_fmt = {
94 32: '<IIIIIIII',
95 64: '<IIQQQQQQ'}[bits]
96
97 pheader_fields = {
98 32: (p_type, p_offset, p_vaddr, p_paddr,
99 p_filesz, p_memsz, p_flags, p_align),
100 64: (p_type, p_flags, p_offset, p_vaddr,
101 p_paddr, p_filesz, p_memsz, p_align)}[bits]
102
103 pheader = struct.pack(pheader_fmt, *pheader_fields)
104
105 result = elf_header
106 assert len(result) <= e_phoff
107 result += '\0' * (e_phoff - len(result))
108 result += pheader
109 assert len(result) <= p_offset
110 result += '\0' * (p_offset - len(result))
111 result += text_segment
112
113 return result
114
115
116 def RunRdfaValidator(options, data):
117 # Add nops to make it bundle-sized.
118 data += (-len(data) % BUNDLE_SIZE) * '\x90'
119 assert len(data) % BUNDLE_SIZE == 0
120
121 tmp = tempfile.NamedTemporaryFile(
122 prefix='tmp_legacy_validator_', mode='wb', delete=False)
123 try:
124 tmp.write(CreateElfContent(options.bits, data))
125 tmp.close()
126
127 proc = subprocess.Popen([options.rdfaval, tmp.name],
128 stdout=subprocess.PIPE,
129 stderr=subprocess.PIPE)
130 stdout, stderr = proc.communicate()
131 assert stderr == '', stderr
132 return_code = proc.wait()
133 finally:
134 tmp.close()
135 os.remove(tmp.name)
136
137 # Remove the carriage return characters that we get on Windows.
138 stdout = stdout.replace('\r', '')
139 return return_code, stdout
140
141
142 def ParseRdfaMessages(stdout):
143 """Get (offset, message) pairs from rdfa validator output.
144
145 Args:
146 stdout: Output of rdfa validator as string.
147
148 Yields:
149 Pairs (offset, message).
150 """
151 for line in stdout.split('\n'):
152 line = line.strip()
153 if line == '':
154 continue
155 if re.match(r"(Valid|Invalid)\.$", line):
156 continue
157
158 m = re.match(r'([0-9a-f]+): (.*)$', line, re.IGNORECASE)
159 assert m is not None, "can't parse line '%s'" % line
160 offset = int(m.group(1), 16)
161 message = m.group(2)
162
163 if not message.startswith('warning - '):
164 yield offset, message
165
166
167 def RunRdfaWithNopPatching(options, data_chunks):
168 r"""Run RDFA validator with NOP patching for better error reporting.
169
170 If the RDFA validator encounters an invalid instruction, it resumes validation
171 from the beginning of the next bundle, while the original, non-DFA-based
172 validators skip maybe one or two bytes and recover. And there are plenty of
173 tests where there are more than one error in a single bundle. To mitigate such
174 spurious disagreements, the following procedure is used: when RDFA complaints
175 that particular piece can't be decoded, the problematic line in @hex section
176 (which usually corresponds to one instruction) is replaced with NOPs and the
177 validator is rerun from the beginning. This process may take several
178 iterations (it seems it always converges in practice). All errors reported on
179 all such runs (sans duplicate ones) are taken as validation result. So, in a
180 sense, this trick is to emulate line-level recovery as opposed to bundle-
181 level. In practice it turns out ok, and lots of spurious errors are
182 eliminated. To each error message we add the stage at which it was produced,
183 so we can destinguish 'primary' errors from additional ones.
184
185 Example. Suppose DE AD and BE EF machine codes correspond to invalid
186 instructions. Lets take a look at what happens when we invoke
187 RunRdfaWithNopPatching(options, ['\de\ad', '\be\ef']). First the RDFA
188 validator is run on the code '\de\ad\be\ef\90\90\90...'. It encounters an
189 undecipherable instruction, produces an error message at offset zero and
190 stops. Now we replace what is at offset zero ('\de\ad') with corresponding
191 amount of nops, and run the RDFA validator again on
192 '\90\90\be\ef\90\90\90...'. This time it decodes first two NOPs sucessfully
193 and reports problem at offset 2. In the next iteration of NOP patching BE EF
194 is replaced with 90 90 as well, no decoding errors are reported on the next
195 run so the whole process stops. Finally the combined output looks like
196 following:
197
198 0: [0] unrecognized instruction <- produced at stage 0
199 2: [1] unrecognized instruction <- produced at stage 1
200 return code: 1 <- return code at stage 0
201
202 Args:
203 options: Options as produced by optparse.
204 Relevant fields are .bits and .update.
205 data_chunks: List of strings containing binary data. For the described
206 heuristic to work better it is desirable (although not absolutelty
207 required) that strings correspond to singular instructions, as it
208 usually happens in @hex section.
209
210 Returns:
211 String representing combined output from all stages. Error messages are
212 of the form
213 <offset in hex>: [<stage>] <message>
214 """
215
216 data_chunks = list(data_chunks)
217
218 offset_to_chunk = {}
219 offset = 0
220 for i, chunk in enumerate(data_chunks):
221 offset_to_chunk[offset] = i
222 offset += len(chunk)
223
224 first_return_code = None
225 messages = [] # list of triples (offset, stage, message)
226 messages_set = set() # set of pairs (offset, message)
227 stage = 0
228
229 while True:
230 return_code, stdout = RunRdfaValidator(options, ''.join(data_chunks))
231 if first_return_code is None:
232 first_return_code = return_code
233
234 nop_patched = False
235
236 for offset, message in ParseRdfaMessages(stdout):
237 if (offset, message) in messages_set:
238 continue
239 messages.append((offset, stage, message))
240 messages_set.add((offset, message))
241
242 if offset in offset_to_chunk and message == 'unrecognized instruction':
243 chunk_no = offset_to_chunk[offset]
244 nops_chunk = '\x90' * len(data_chunks[chunk_no])
245 if nops_chunk != data_chunks[chunk_no]:
246 data_chunks[chunk_no] = nops_chunk
247 nop_patched = True
248
249 if not nop_patched:
250 break
251 stage += 1
252
253 messages.sort(key=lambda (offset, stage, _): (offset, stage))
254
255 result = ''.join('%x: [%d] %s\n' % (offset, stage, message)
256 for offset, stage, message in messages)
257 result += 'return code: %d\n' % first_return_code
258 return result
259
260
261 def CheckValidJumpTargets(options, data_chunks):
262 """
263 Check that the validator infers valid jump targets correctly.
264
265 This test checks that the validator identifies instruction boundaries and
266 superinstructions correctly. In order to do that, it attempts to append a jump
267 to each byte at the end of the given code. Jump should be valid if and only if
268 it goes to the boundary between data chunks.
269
270 Note that the same chunks as in RunRdfaWithNopPatching are used, but here they
271 play a different role. In RunRdfaWithNopPatching the partitioning into chunks
272 is only relevant when the whole snippet is invalid. Here, on the other hand,
273 we only care about valid snippets, and we use chunks to mark valid jump
274 targets.
275
276 Args:
277 options: Options as produced by optparse.
278 data_chunks: List of strings containing binary data. Each such chunk is
279 expected to correspond to indivisible instruction or superinstruction.
280
281 Returns:
282 None.
283 """
284 data = ''.join(data_chunks)
285 # Add nops to make it bundle-sized.
286 data += (-len(data) % BUNDLE_SIZE) * '\x90'
287 assert len(data) % BUNDLE_SIZE == 0
288
289 # Since we check validity of jump target by adding jump and validating
290 # resulting piece, we rely on validity of original snippet.
291 return_code, _ = RunRdfaValidator(options, data)
292 assert return_code == 0, 'Can only validate jump targets on valid snippet'
293
294 valid_jump_targets = set()
295 pos = 0
296 for data_chunk in data_chunks:
297 valid_jump_targets.add(pos)
298 pos += len(data_chunk)
299 valid_jump_targets.add(pos)
300
301 for i in range(pos + 1):
302 # Encode JMP with 32-bit relative target.
303 jump = '\xe9' + struct.pack('<i', i - (len(data) + 5))
304 return_code, _ = RunRdfaValidator(options, data + jump)
305 if return_code == 0:
306 assert i in valid_jump_targets, (
307 'Offset 0x%x was reported valid jump target' % i)
308 else:
309 assert i not in valid_jump_targets, (
310 'Offset 0x%x was reported invalid jump target' % i)
311
312
313 def Test(options, items_list):
314 info = dict(items_list)
315
316 if 'rdfa_output' in info:
317 data_chunks = list(ParseHex(info['hex']))
318 stdout = RunRdfaWithNopPatching(options, data_chunks)
319 print ' Checking rdfa_output field...'
320 if options.update:
321 if stdout != info['rdfa_output']:
322 print ' Updating rdfa_output field...'
323 info['rdfa_output'] = stdout
324 else:
325 AssertEquals(stdout, info['rdfa_output'])
326
327 last_line = re.search('return code: (-?\d+)\n$', info['rdfa_output'])
328 expected_return_code = int(last_line.group(1))
329
330 # This test only works for valid snippets, see CheckValidJumpTargets
331 # for details.
332 if expected_return_code == 0:
333 print ' Checking jump targets...'
334 CheckValidJumpTargets(options, data_chunks)
335
336 # Update field values, but preserve their order.
337 items_list = [(field, info[field]) for field, _ in items_list]
338
339 return items_list
340
341
342 def main(args):
343 parser = optparse.OptionParser()
344 parser.add_option('--rdfaval', default='validator_test',
345 help='Path to the ncval validator executable')
346 parser.add_option('--bits',
347 type=int,
348 help='The subarchitecture to run tests against: 32 or 64')
349 parser.add_option('--update',
350 default=False,
351 action='store_true',
352 help='Regenerate golden fields instead of testing')
353
354 options, args = parser.parse_args(args)
355
356 if options.bits not in [32, 64]:
357 parser.error('specify --bits 32 or --bits 64')
358
359 if len(args) == 0:
360 parser.error('No test files specified')
361 processed = 0
362 for glob_expr in args:
363 test_files = sorted(glob.glob(glob_expr))
364 if len(test_files) == 0:
365 raise AssertionError(
366 '%r matched no files, which was probably not intended' % glob_expr)
367 for test_file in test_files:
368 print 'Testing %s...' % test_file
369 tests = test_format.LoadTestFile(test_file)
370 tests = [Test(options, test) for test in tests]
371 if options.update:
372 test_format.SaveTestFile(tests, test_file)
373 processed += 1
374 print '%s test files were processed.' % processed
375
376
377 if __name__ == '__main__':
378 main(sys.argv[1:])
OLDNEW
« no previous file with comments | « src/trusted/validator_x86/testscripts/run_old_validator_tests.py ('k') | src/trusted/validator_x86/testscripts/test_format.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698