| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/python | |
| 2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 import glob | |
| 7 import optparse | |
| 8 import os | |
| 9 import re | |
| 10 import struct | |
| 11 import subprocess | |
| 12 import sys | |
| 13 import tempfile | |
| 14 | |
| 15 import test_format | |
| 16 | |
| 17 | |
| 18 BUNDLE_SIZE = 32 | |
| 19 | |
| 20 | |
| 21 def AssertEquals(actual, expected): | |
| 22 if actual != expected: | |
| 23 raise AssertionError('\nEXPECTED:\n"""\n%s"""\n\nACTUAL:\n"""\n%s"""' | |
| 24 % (expected, actual)) | |
| 25 | |
| 26 | |
| 27 def ParseHex(hex_content): | |
| 28 """Parse content of @hex section and return binary data | |
| 29 | |
| 30 Args: | |
| 31 hex_content: Content of @hex section as a string. | |
| 32 | |
| 33 Yields: | |
| 34 Chunks of binary data corresponding to lines of given @hex section (as | |
| 35 strings). If line ends with r'\\', chunk is continued on the following line. | |
| 36 """ | |
| 37 | |
| 38 bytes = [] | |
| 39 for line in hex_content.split('\n'): | |
| 40 line, sep, comment = line.partition('#') | |
| 41 line = line.strip() | |
| 42 if line == '': | |
| 43 continue | |
| 44 | |
| 45 if line.endswith(r'\\'): | |
| 46 line = line[:-2] | |
| 47 continuation = True | |
| 48 else: | |
| 49 continuation = False | |
| 50 | |
| 51 for byte in line.split(): | |
| 52 assert len(byte) == 2 | |
| 53 bytes.append(chr(int(byte, 16))) | |
| 54 | |
| 55 if not continuation: | |
| 56 assert len(bytes) > 0 | |
| 57 yield ''.join(bytes) | |
| 58 bytes = [] | |
| 59 | |
| 60 assert bytes == [], r'r"\\" should not appear on the last line' | |
| 61 | |
| 62 | |
| 63 def CreateElfContent(bits, text_segment): | |
| 64 e_ident = { | |
| 65 32: '\177ELF\1', | |
| 66 64: '\177ELF\2'}[bits] | |
| 67 e_machine = { | |
| 68 32: 3, | |
| 69 64: 62}[bits] | |
| 70 | |
| 71 e_phoff = 256 | |
| 72 e_phnum = 1 | |
| 73 e_phentsize = 0 | |
| 74 | |
| 75 elf_header_fmt = { | |
| 76 32: '<16sHHIIIIIHHHHHH', | |
| 77 64: '<16sHHIQQQIHHHHHH'}[bits] | |
| 78 | |
| 79 elf_header = struct.pack( | |
| 80 elf_header_fmt, | |
| 81 e_ident, 0, e_machine, 0, 0, e_phoff, 0, 0, 0, | |
| 82 e_phentsize, e_phnum, 0, 0, 0) | |
| 83 | |
| 84 p_type = 1 # PT_LOAD | |
| 85 p_flags = 5 # r-x | |
| 86 p_filesz = len(text_segment) | |
| 87 p_memsz = p_filesz | |
| 88 p_vaddr = 0 | |
| 89 p_offset = 512 | |
| 90 p_align = 0 | |
| 91 p_paddr = 0 | |
| 92 | |
| 93 pheader_fmt = { | |
| 94 32: '<IIIIIIII', | |
| 95 64: '<IIQQQQQQ'}[bits] | |
| 96 | |
| 97 pheader_fields = { | |
| 98 32: (p_type, p_offset, p_vaddr, p_paddr, | |
| 99 p_filesz, p_memsz, p_flags, p_align), | |
| 100 64: (p_type, p_flags, p_offset, p_vaddr, | |
| 101 p_paddr, p_filesz, p_memsz, p_align)}[bits] | |
| 102 | |
| 103 pheader = struct.pack(pheader_fmt, *pheader_fields) | |
| 104 | |
| 105 result = elf_header | |
| 106 assert len(result) <= e_phoff | |
| 107 result += '\0' * (e_phoff - len(result)) | |
| 108 result += pheader | |
| 109 assert len(result) <= p_offset | |
| 110 result += '\0' * (p_offset - len(result)) | |
| 111 result += text_segment | |
| 112 | |
| 113 return result | |
| 114 | |
| 115 | |
| 116 def RunRdfaValidator(options, data): | |
| 117 # Add nops to make it bundle-sized. | |
| 118 data += (-len(data) % BUNDLE_SIZE) * '\x90' | |
| 119 assert len(data) % BUNDLE_SIZE == 0 | |
| 120 | |
| 121 tmp = tempfile.NamedTemporaryFile( | |
| 122 prefix='tmp_legacy_validator_', mode='wb', delete=False) | |
| 123 try: | |
| 124 tmp.write(CreateElfContent(options.bits, data)) | |
| 125 tmp.close() | |
| 126 | |
| 127 proc = subprocess.Popen([options.rdfaval, tmp.name], | |
| 128 stdout=subprocess.PIPE, | |
| 129 stderr=subprocess.PIPE) | |
| 130 stdout, stderr = proc.communicate() | |
| 131 assert stderr == '', stderr | |
| 132 return_code = proc.wait() | |
| 133 finally: | |
| 134 tmp.close() | |
| 135 os.remove(tmp.name) | |
| 136 | |
| 137 # Remove the carriage return characters that we get on Windows. | |
| 138 stdout = stdout.replace('\r', '') | |
| 139 return return_code, stdout | |
| 140 | |
| 141 | |
| 142 def ParseRdfaMessages(stdout): | |
| 143 """Get (offset, message) pairs from rdfa validator output. | |
| 144 | |
| 145 Args: | |
| 146 stdout: Output of rdfa validator as string. | |
| 147 | |
| 148 Yields: | |
| 149 Pairs (offset, message). | |
| 150 """ | |
| 151 for line in stdout.split('\n'): | |
| 152 line = line.strip() | |
| 153 if line == '': | |
| 154 continue | |
| 155 if re.match(r"(Valid|Invalid)\.$", line): | |
| 156 continue | |
| 157 | |
| 158 m = re.match(r'([0-9a-f]+): (.*)$', line, re.IGNORECASE) | |
| 159 assert m is not None, "can't parse line '%s'" % line | |
| 160 offset = int(m.group(1), 16) | |
| 161 message = m.group(2) | |
| 162 | |
| 163 if not message.startswith('warning - '): | |
| 164 yield offset, message | |
| 165 | |
| 166 | |
| 167 def RunRdfaWithNopPatching(options, data_chunks): | |
| 168 r"""Run RDFA validator with NOP patching for better error reporting. | |
| 169 | |
| 170 If the RDFA validator encounters an invalid instruction, it resumes validation | |
| 171 from the beginning of the next bundle, while the original, non-DFA-based | |
| 172 validators skip maybe one or two bytes and recover. And there are plenty of | |
| 173 tests where there are more than one error in a single bundle. To mitigate such | |
| 174 spurious disagreements, the following procedure is used: when RDFA complaints | |
| 175 that particular piece can't be decoded, the problematic line in @hex section | |
| 176 (which usually corresponds to one instruction) is replaced with NOPs and the | |
| 177 validator is rerun from the beginning. This process may take several | |
| 178 iterations (it seems it always converges in practice). All errors reported on | |
| 179 all such runs (sans duplicate ones) are taken as validation result. So, in a | |
| 180 sense, this trick is to emulate line-level recovery as opposed to bundle- | |
| 181 level. In practice it turns out ok, and lots of spurious errors are | |
| 182 eliminated. To each error message we add the stage at which it was produced, | |
| 183 so we can destinguish 'primary' errors from additional ones. | |
| 184 | |
| 185 Example. Suppose DE AD and BE EF machine codes correspond to invalid | |
| 186 instructions. Lets take a look at what happens when we invoke | |
| 187 RunRdfaWithNopPatching(options, ['\de\ad', '\be\ef']). First the RDFA | |
| 188 validator is run on the code '\de\ad\be\ef\90\90\90...'. It encounters an | |
| 189 undecipherable instruction, produces an error message at offset zero and | |
| 190 stops. Now we replace what is at offset zero ('\de\ad') with corresponding | |
| 191 amount of nops, and run the RDFA validator again on | |
| 192 '\90\90\be\ef\90\90\90...'. This time it decodes first two NOPs sucessfully | |
| 193 and reports problem at offset 2. In the next iteration of NOP patching BE EF | |
| 194 is replaced with 90 90 as well, no decoding errors are reported on the next | |
| 195 run so the whole process stops. Finally the combined output looks like | |
| 196 following: | |
| 197 | |
| 198 0: [0] unrecognized instruction <- produced at stage 0 | |
| 199 2: [1] unrecognized instruction <- produced at stage 1 | |
| 200 return code: 1 <- return code at stage 0 | |
| 201 | |
| 202 Args: | |
| 203 options: Options as produced by optparse. | |
| 204 Relevant fields are .bits and .update. | |
| 205 data_chunks: List of strings containing binary data. For the described | |
| 206 heuristic to work better it is desirable (although not absolutelty | |
| 207 required) that strings correspond to singular instructions, as it | |
| 208 usually happens in @hex section. | |
| 209 | |
| 210 Returns: | |
| 211 String representing combined output from all stages. Error messages are | |
| 212 of the form | |
| 213 <offset in hex>: [<stage>] <message> | |
| 214 """ | |
| 215 | |
| 216 data_chunks = list(data_chunks) | |
| 217 | |
| 218 offset_to_chunk = {} | |
| 219 offset = 0 | |
| 220 for i, chunk in enumerate(data_chunks): | |
| 221 offset_to_chunk[offset] = i | |
| 222 offset += len(chunk) | |
| 223 | |
| 224 first_return_code = None | |
| 225 messages = [] # list of triples (offset, stage, message) | |
| 226 messages_set = set() # set of pairs (offset, message) | |
| 227 stage = 0 | |
| 228 | |
| 229 while True: | |
| 230 return_code, stdout = RunRdfaValidator(options, ''.join(data_chunks)) | |
| 231 if first_return_code is None: | |
| 232 first_return_code = return_code | |
| 233 | |
| 234 nop_patched = False | |
| 235 | |
| 236 for offset, message in ParseRdfaMessages(stdout): | |
| 237 if (offset, message) in messages_set: | |
| 238 continue | |
| 239 messages.append((offset, stage, message)) | |
| 240 messages_set.add((offset, message)) | |
| 241 | |
| 242 if offset in offset_to_chunk and message == 'unrecognized instruction': | |
| 243 chunk_no = offset_to_chunk[offset] | |
| 244 nops_chunk = '\x90' * len(data_chunks[chunk_no]) | |
| 245 if nops_chunk != data_chunks[chunk_no]: | |
| 246 data_chunks[chunk_no] = nops_chunk | |
| 247 nop_patched = True | |
| 248 | |
| 249 if not nop_patched: | |
| 250 break | |
| 251 stage += 1 | |
| 252 | |
| 253 messages.sort(key=lambda (offset, stage, _): (offset, stage)) | |
| 254 | |
| 255 result = ''.join('%x: [%d] %s\n' % (offset, stage, message) | |
| 256 for offset, stage, message in messages) | |
| 257 result += 'return code: %d\n' % first_return_code | |
| 258 return result | |
| 259 | |
| 260 | |
| 261 def CheckValidJumpTargets(options, data_chunks): | |
| 262 """ | |
| 263 Check that the validator infers valid jump targets correctly. | |
| 264 | |
| 265 This test checks that the validator identifies instruction boundaries and | |
| 266 superinstructions correctly. In order to do that, it attempts to append a jump | |
| 267 to each byte at the end of the given code. Jump should be valid if and only if | |
| 268 it goes to the boundary between data chunks. | |
| 269 | |
| 270 Note that the same chunks as in RunRdfaWithNopPatching are used, but here they | |
| 271 play a different role. In RunRdfaWithNopPatching the partitioning into chunks | |
| 272 is only relevant when the whole snippet is invalid. Here, on the other hand, | |
| 273 we only care about valid snippets, and we use chunks to mark valid jump | |
| 274 targets. | |
| 275 | |
| 276 Args: | |
| 277 options: Options as produced by optparse. | |
| 278 data_chunks: List of strings containing binary data. Each such chunk is | |
| 279 expected to correspond to indivisible instruction or superinstruction. | |
| 280 | |
| 281 Returns: | |
| 282 None. | |
| 283 """ | |
| 284 data = ''.join(data_chunks) | |
| 285 # Add nops to make it bundle-sized. | |
| 286 data += (-len(data) % BUNDLE_SIZE) * '\x90' | |
| 287 assert len(data) % BUNDLE_SIZE == 0 | |
| 288 | |
| 289 # Since we check validity of jump target by adding jump and validating | |
| 290 # resulting piece, we rely on validity of original snippet. | |
| 291 return_code, _ = RunRdfaValidator(options, data) | |
| 292 assert return_code == 0, 'Can only validate jump targets on valid snippet' | |
| 293 | |
| 294 valid_jump_targets = set() | |
| 295 pos = 0 | |
| 296 for data_chunk in data_chunks: | |
| 297 valid_jump_targets.add(pos) | |
| 298 pos += len(data_chunk) | |
| 299 valid_jump_targets.add(pos) | |
| 300 | |
| 301 for i in range(pos + 1): | |
| 302 # Encode JMP with 32-bit relative target. | |
| 303 jump = '\xe9' + struct.pack('<i', i - (len(data) + 5)) | |
| 304 return_code, _ = RunRdfaValidator(options, data + jump) | |
| 305 if return_code == 0: | |
| 306 assert i in valid_jump_targets, ( | |
| 307 'Offset 0x%x was reported valid jump target' % i) | |
| 308 else: | |
| 309 assert i not in valid_jump_targets, ( | |
| 310 'Offset 0x%x was reported invalid jump target' % i) | |
| 311 | |
| 312 | |
| 313 def Test(options, items_list): | |
| 314 info = dict(items_list) | |
| 315 | |
| 316 if 'rdfa_output' in info: | |
| 317 data_chunks = list(ParseHex(info['hex'])) | |
| 318 stdout = RunRdfaWithNopPatching(options, data_chunks) | |
| 319 print ' Checking rdfa_output field...' | |
| 320 if options.update: | |
| 321 if stdout != info['rdfa_output']: | |
| 322 print ' Updating rdfa_output field...' | |
| 323 info['rdfa_output'] = stdout | |
| 324 else: | |
| 325 AssertEquals(stdout, info['rdfa_output']) | |
| 326 | |
| 327 last_line = re.search('return code: (-?\d+)\n$', info['rdfa_output']) | |
| 328 expected_return_code = int(last_line.group(1)) | |
| 329 | |
| 330 # This test only works for valid snippets, see CheckValidJumpTargets | |
| 331 # for details. | |
| 332 if expected_return_code == 0: | |
| 333 print ' Checking jump targets...' | |
| 334 CheckValidJumpTargets(options, data_chunks) | |
| 335 | |
| 336 # Update field values, but preserve their order. | |
| 337 items_list = [(field, info[field]) for field, _ in items_list] | |
| 338 | |
| 339 return items_list | |
| 340 | |
| 341 | |
| 342 def main(args): | |
| 343 parser = optparse.OptionParser() | |
| 344 parser.add_option('--rdfaval', default='validator_test', | |
| 345 help='Path to the ncval validator executable') | |
| 346 parser.add_option('--bits', | |
| 347 type=int, | |
| 348 help='The subarchitecture to run tests against: 32 or 64') | |
| 349 parser.add_option('--update', | |
| 350 default=False, | |
| 351 action='store_true', | |
| 352 help='Regenerate golden fields instead of testing') | |
| 353 | |
| 354 options, args = parser.parse_args(args) | |
| 355 | |
| 356 if options.bits not in [32, 64]: | |
| 357 parser.error('specify --bits 32 or --bits 64') | |
| 358 | |
| 359 if len(args) == 0: | |
| 360 parser.error('No test files specified') | |
| 361 processed = 0 | |
| 362 for glob_expr in args: | |
| 363 test_files = sorted(glob.glob(glob_expr)) | |
| 364 if len(test_files) == 0: | |
| 365 raise AssertionError( | |
| 366 '%r matched no files, which was probably not intended' % glob_expr) | |
| 367 for test_file in test_files: | |
| 368 print 'Testing %s...' % test_file | |
| 369 tests = test_format.LoadTestFile(test_file) | |
| 370 tests = [Test(options, test) for test in tests] | |
| 371 if options.update: | |
| 372 test_format.SaveTestFile(tests, test_file) | |
| 373 processed += 1 | |
| 374 print '%s test files were processed.' % processed | |
| 375 | |
| 376 | |
| 377 if __name__ == '__main__': | |
| 378 main(sys.argv[1:]) | |
| OLD | NEW |