Index: src/trusted/validator_x86/testscripts/run_rdfa_validator_tests.py |
diff --git a/src/trusted/validator_x86/testscripts/run_rdfa_validator_tests.py b/src/trusted/validator_x86/testscripts/run_rdfa_validator_tests.py |
deleted file mode 100644 |
index b5c877303b238290073e8a3c67595018f4e1a405..0000000000000000000000000000000000000000 |
--- a/src/trusted/validator_x86/testscripts/run_rdfa_validator_tests.py |
+++ /dev/null |
@@ -1,378 +0,0 @@ |
-#!/usr/bin/python |
-# Copyright (c) 2012 The Native Client Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
- |
-import glob |
-import optparse |
-import os |
-import re |
-import struct |
-import subprocess |
-import sys |
-import tempfile |
- |
-import test_format |
- |
- |
-BUNDLE_SIZE = 32 |
- |
- |
-def AssertEquals(actual, expected): |
- if actual != expected: |
- raise AssertionError('\nEXPECTED:\n"""\n%s"""\n\nACTUAL:\n"""\n%s"""' |
- % (expected, actual)) |
- |
- |
-def ParseHex(hex_content): |
- """Parse content of @hex section and return binary data |
- |
- Args: |
- hex_content: Content of @hex section as a string. |
- |
- Yields: |
- Chunks of binary data corresponding to lines of given @hex section (as |
- strings). If line ends with r'\\', chunk is continued on the following line. |
- """ |
- |
- bytes = [] |
- for line in hex_content.split('\n'): |
- line, sep, comment = line.partition('#') |
- line = line.strip() |
- if line == '': |
- continue |
- |
- if line.endswith(r'\\'): |
- line = line[:-2] |
- continuation = True |
- else: |
- continuation = False |
- |
- for byte in line.split(): |
- assert len(byte) == 2 |
- bytes.append(chr(int(byte, 16))) |
- |
- if not continuation: |
- assert len(bytes) > 0 |
- yield ''.join(bytes) |
- bytes = [] |
- |
- assert bytes == [], r'r"\\" should not appear on the last line' |
- |
- |
-def CreateElfContent(bits, text_segment): |
- e_ident = { |
- 32: '\177ELF\1', |
- 64: '\177ELF\2'}[bits] |
- e_machine = { |
- 32: 3, |
- 64: 62}[bits] |
- |
- e_phoff = 256 |
- e_phnum = 1 |
- e_phentsize = 0 |
- |
- elf_header_fmt = { |
- 32: '<16sHHIIIIIHHHHHH', |
- 64: '<16sHHIQQQIHHHHHH'}[bits] |
- |
- elf_header = struct.pack( |
- elf_header_fmt, |
- e_ident, 0, e_machine, 0, 0, e_phoff, 0, 0, 0, |
- e_phentsize, e_phnum, 0, 0, 0) |
- |
- p_type = 1 # PT_LOAD |
- p_flags = 5 # r-x |
- p_filesz = len(text_segment) |
- p_memsz = p_filesz |
- p_vaddr = 0 |
- p_offset = 512 |
- p_align = 0 |
- p_paddr = 0 |
- |
- pheader_fmt = { |
- 32: '<IIIIIIII', |
- 64: '<IIQQQQQQ'}[bits] |
- |
- pheader_fields = { |
- 32: (p_type, p_offset, p_vaddr, p_paddr, |
- p_filesz, p_memsz, p_flags, p_align), |
- 64: (p_type, p_flags, p_offset, p_vaddr, |
- p_paddr, p_filesz, p_memsz, p_align)}[bits] |
- |
- pheader = struct.pack(pheader_fmt, *pheader_fields) |
- |
- result = elf_header |
- assert len(result) <= e_phoff |
- result += '\0' * (e_phoff - len(result)) |
- result += pheader |
- assert len(result) <= p_offset |
- result += '\0' * (p_offset - len(result)) |
- result += text_segment |
- |
- return result |
- |
- |
-def RunRdfaValidator(options, data): |
- # Add nops to make it bundle-sized. |
- data += (-len(data) % BUNDLE_SIZE) * '\x90' |
- assert len(data) % BUNDLE_SIZE == 0 |
- |
- tmp = tempfile.NamedTemporaryFile( |
- prefix='tmp_legacy_validator_', mode='wb', delete=False) |
- try: |
- tmp.write(CreateElfContent(options.bits, data)) |
- tmp.close() |
- |
- proc = subprocess.Popen([options.rdfaval, tmp.name], |
- stdout=subprocess.PIPE, |
- stderr=subprocess.PIPE) |
- stdout, stderr = proc.communicate() |
- assert stderr == '', stderr |
- return_code = proc.wait() |
- finally: |
- tmp.close() |
- os.remove(tmp.name) |
- |
- # Remove the carriage return characters that we get on Windows. |
- stdout = stdout.replace('\r', '') |
- return return_code, stdout |
- |
- |
-def ParseRdfaMessages(stdout): |
- """Get (offset, message) pairs from rdfa validator output. |
- |
- Args: |
- stdout: Output of rdfa validator as string. |
- |
- Yields: |
- Pairs (offset, message). |
- """ |
- for line in stdout.split('\n'): |
- line = line.strip() |
- if line == '': |
- continue |
- if re.match(r"(Valid|Invalid)\.$", line): |
- continue |
- |
- m = re.match(r'([0-9a-f]+): (.*)$', line, re.IGNORECASE) |
- assert m is not None, "can't parse line '%s'" % line |
- offset = int(m.group(1), 16) |
- message = m.group(2) |
- |
- if not message.startswith('warning - '): |
- yield offset, message |
- |
- |
-def RunRdfaWithNopPatching(options, data_chunks): |
- r"""Run RDFA validator with NOP patching for better error reporting. |
- |
- If the RDFA validator encounters an invalid instruction, it resumes validation |
- from the beginning of the next bundle, while the original, non-DFA-based |
- validators skip maybe one or two bytes and recover. And there are plenty of |
- tests where there are more than one error in a single bundle. To mitigate such |
- spurious disagreements, the following procedure is used: when RDFA complaints |
- that particular piece can't be decoded, the problematic line in @hex section |
- (which usually corresponds to one instruction) is replaced with NOPs and the |
- validator is rerun from the beginning. This process may take several |
- iterations (it seems it always converges in practice). All errors reported on |
- all such runs (sans duplicate ones) are taken as validation result. So, in a |
- sense, this trick is to emulate line-level recovery as opposed to bundle- |
- level. In practice it turns out ok, and lots of spurious errors are |
- eliminated. To each error message we add the stage at which it was produced, |
- so we can destinguish 'primary' errors from additional ones. |
- |
- Example. Suppose DE AD and BE EF machine codes correspond to invalid |
- instructions. Lets take a look at what happens when we invoke |
- RunRdfaWithNopPatching(options, ['\de\ad', '\be\ef']). First the RDFA |
- validator is run on the code '\de\ad\be\ef\90\90\90...'. It encounters an |
- undecipherable instruction, produces an error message at offset zero and |
- stops. Now we replace what is at offset zero ('\de\ad') with corresponding |
- amount of nops, and run the RDFA validator again on |
- '\90\90\be\ef\90\90\90...'. This time it decodes first two NOPs sucessfully |
- and reports problem at offset 2. In the next iteration of NOP patching BE EF |
- is replaced with 90 90 as well, no decoding errors are reported on the next |
- run so the whole process stops. Finally the combined output looks like |
- following: |
- |
- 0: [0] unrecognized instruction <- produced at stage 0 |
- 2: [1] unrecognized instruction <- produced at stage 1 |
- return code: 1 <- return code at stage 0 |
- |
- Args: |
- options: Options as produced by optparse. |
- Relevant fields are .bits and .update. |
- data_chunks: List of strings containing binary data. For the described |
- heuristic to work better it is desirable (although not absolutelty |
- required) that strings correspond to singular instructions, as it |
- usually happens in @hex section. |
- |
- Returns: |
- String representing combined output from all stages. Error messages are |
- of the form |
- <offset in hex>: [<stage>] <message> |
- """ |
- |
- data_chunks = list(data_chunks) |
- |
- offset_to_chunk = {} |
- offset = 0 |
- for i, chunk in enumerate(data_chunks): |
- offset_to_chunk[offset] = i |
- offset += len(chunk) |
- |
- first_return_code = None |
- messages = [] # list of triples (offset, stage, message) |
- messages_set = set() # set of pairs (offset, message) |
- stage = 0 |
- |
- while True: |
- return_code, stdout = RunRdfaValidator(options, ''.join(data_chunks)) |
- if first_return_code is None: |
- first_return_code = return_code |
- |
- nop_patched = False |
- |
- for offset, message in ParseRdfaMessages(stdout): |
- if (offset, message) in messages_set: |
- continue |
- messages.append((offset, stage, message)) |
- messages_set.add((offset, message)) |
- |
- if offset in offset_to_chunk and message == 'unrecognized instruction': |
- chunk_no = offset_to_chunk[offset] |
- nops_chunk = '\x90' * len(data_chunks[chunk_no]) |
- if nops_chunk != data_chunks[chunk_no]: |
- data_chunks[chunk_no] = nops_chunk |
- nop_patched = True |
- |
- if not nop_patched: |
- break |
- stage += 1 |
- |
- messages.sort(key=lambda (offset, stage, _): (offset, stage)) |
- |
- result = ''.join('%x: [%d] %s\n' % (offset, stage, message) |
- for offset, stage, message in messages) |
- result += 'return code: %d\n' % first_return_code |
- return result |
- |
- |
-def CheckValidJumpTargets(options, data_chunks): |
- """ |
- Check that the validator infers valid jump targets correctly. |
- |
- This test checks that the validator identifies instruction boundaries and |
- superinstructions correctly. In order to do that, it attempts to append a jump |
- to each byte at the end of the given code. Jump should be valid if and only if |
- it goes to the boundary between data chunks. |
- |
- Note that the same chunks as in RunRdfaWithNopPatching are used, but here they |
- play a different role. In RunRdfaWithNopPatching the partitioning into chunks |
- is only relevant when the whole snippet is invalid. Here, on the other hand, |
- we only care about valid snippets, and we use chunks to mark valid jump |
- targets. |
- |
- Args: |
- options: Options as produced by optparse. |
- data_chunks: List of strings containing binary data. Each such chunk is |
- expected to correspond to indivisible instruction or superinstruction. |
- |
- Returns: |
- None. |
- """ |
- data = ''.join(data_chunks) |
- # Add nops to make it bundle-sized. |
- data += (-len(data) % BUNDLE_SIZE) * '\x90' |
- assert len(data) % BUNDLE_SIZE == 0 |
- |
- # Since we check validity of jump target by adding jump and validating |
- # resulting piece, we rely on validity of original snippet. |
- return_code, _ = RunRdfaValidator(options, data) |
- assert return_code == 0, 'Can only validate jump targets on valid snippet' |
- |
- valid_jump_targets = set() |
- pos = 0 |
- for data_chunk in data_chunks: |
- valid_jump_targets.add(pos) |
- pos += len(data_chunk) |
- valid_jump_targets.add(pos) |
- |
- for i in range(pos + 1): |
- # Encode JMP with 32-bit relative target. |
- jump = '\xe9' + struct.pack('<i', i - (len(data) + 5)) |
- return_code, _ = RunRdfaValidator(options, data + jump) |
- if return_code == 0: |
- assert i in valid_jump_targets, ( |
- 'Offset 0x%x was reported valid jump target' % i) |
- else: |
- assert i not in valid_jump_targets, ( |
- 'Offset 0x%x was reported invalid jump target' % i) |
- |
- |
-def Test(options, items_list): |
- info = dict(items_list) |
- |
- if 'rdfa_output' in info: |
- data_chunks = list(ParseHex(info['hex'])) |
- stdout = RunRdfaWithNopPatching(options, data_chunks) |
- print ' Checking rdfa_output field...' |
- if options.update: |
- if stdout != info['rdfa_output']: |
- print ' Updating rdfa_output field...' |
- info['rdfa_output'] = stdout |
- else: |
- AssertEquals(stdout, info['rdfa_output']) |
- |
- last_line = re.search('return code: (-?\d+)\n$', info['rdfa_output']) |
- expected_return_code = int(last_line.group(1)) |
- |
- # This test only works for valid snippets, see CheckValidJumpTargets |
- # for details. |
- if expected_return_code == 0: |
- print ' Checking jump targets...' |
- CheckValidJumpTargets(options, data_chunks) |
- |
- # Update field values, but preserve their order. |
- items_list = [(field, info[field]) for field, _ in items_list] |
- |
- return items_list |
- |
- |
-def main(args): |
- parser = optparse.OptionParser() |
- parser.add_option('--rdfaval', default='validator_test', |
- help='Path to the ncval validator executable') |
- parser.add_option('--bits', |
- type=int, |
- help='The subarchitecture to run tests against: 32 or 64') |
- parser.add_option('--update', |
- default=False, |
- action='store_true', |
- help='Regenerate golden fields instead of testing') |
- |
- options, args = parser.parse_args(args) |
- |
- if options.bits not in [32, 64]: |
- parser.error('specify --bits 32 or --bits 64') |
- |
- if len(args) == 0: |
- parser.error('No test files specified') |
- processed = 0 |
- for glob_expr in args: |
- test_files = sorted(glob.glob(glob_expr)) |
- if len(test_files) == 0: |
- raise AssertionError( |
- '%r matched no files, which was probably not intended' % glob_expr) |
- for test_file in test_files: |
- print 'Testing %s...' % test_file |
- tests = test_format.LoadTestFile(test_file) |
- tests = [Test(options, test) for test in tests] |
- if options.update: |
- test_format.SaveTestFile(tests, test_file) |
- processed += 1 |
- print '%s test files were processed.' % processed |
- |
- |
-if __name__ == '__main__': |
- main(sys.argv[1:]) |