Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright (c) 2012 The Native Client Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import copy | |
| 6 import re | |
| 7 | |
| 8 import asm | |
| 9 import utils | |
| 10 import val_runner | |
| 11 | |
| 12 | |
| 13 class Instruction(object): | |
| 14 __slots__ = [ | |
| 15 'offset', | |
| 16 'asm', | |
| 17 'hex', | |
| 18 'data', | |
| 19 'outs', # {validator: [message, ...], ...} | |
| 20 ] | |
| 21 | |
| 22 def __init__(self): | |
| 23 self.offset = None | |
| 24 self.hex = None | |
| 25 self.asm = None | |
| 26 self.outs = dict((v, []) for v in val_runner.VALIDATORS) | |
| 27 | |
| 28 @property | |
| 29 def size(self): | |
| 30 return len(self.data) | |
| 31 | |
| 32 @property | |
| 33 def end_offset(self): | |
| 34 return self.offset + self.size | |
| 35 | |
| 36 def CheckAsm(self, bits): | |
|
Mark Seaborn
2012/09/27 02:25:43
This method doesn't seem to be called.
| |
| 37 if self.asm is None: | |
| 38 return | |
| 39 asm_data = asm.Assemble(bits, self.asm) | |
| 40 assert self.data == asm_data, (utils.DataToReadableHex(asm_data), | |
| 41 utils.DataToReadableHex(self.data)) | |
| 42 | |
| 43 def __repr__(self): | |
| 44 return 'Instr(%s)@0x%x' % (self.asm or self.hex, self.offset) | |
| 45 | |
| 46 _out_suffix = '_out' | |
| 47 | |
| 48 @staticmethod | |
| 49 def Parse(bits, lines): | |
| 50 instr = Instruction() | |
| 51 for line in lines: | |
| 52 assert ':' in line, "can't parse line '%s'" % line | |
| 53 field, value = line.split(':', 1) | |
| 54 field = field.strip() | |
| 55 | |
| 56 if field.endswith(Instruction._out_suffix): | |
| 57 validator = field[:-len(Instruction._out_suffix)] | |
| 58 instr.outs[validator].append(value.strip()) | |
| 59 else: | |
| 60 assert getattr(instr, field) is None, 'field %s is already set' % field | |
| 61 setattr(instr, field, value.strip()) | |
| 62 | |
| 63 if instr.hex is not None: | |
| 64 instr.data = utils.ReadableHexToData(instr.hex) | |
| 65 else: | |
|
Mark Seaborn
2012/09/27 02:25:43
Hmm, having a fallback here means that in normal t
| |
| 66 instr.data = asm.Assemble(bits, instr.asm) | |
| 67 instr.hex = utils.DataToReadableHex(instr.data) | |
| 68 return instr | |
| 69 | |
| 70 def ToLines(self, offset=None): | |
| 71 lines = [] | |
| 72 if self.offset != offset: | |
| 73 lines.append('offset: %s' % self.offset) | |
| 74 if self.asm is not None: | |
| 75 lines.append('asm: %s' % self.asm) | |
| 76 if self.hex is not None: | |
| 77 lines.append('hex: %s' % self.hex) | |
| 78 | |
| 79 for validator in val_runner.VALIDATORS: | |
| 80 for msg in self.outs[validator]: | |
| 81 lines.append('%s%s: %s' % (validator, Instruction._out_suffix, msg)) | |
| 82 | |
| 83 return lines | |
| 84 | |
| 85 | |
| 86 class Test(object): | |
| 87 __slots__ = [ | |
| 88 'bits', | |
|
Mark Seaborn
2012/09/27 02:25:43
Some comments on what these fields mean would be h
| |
| 89 'separators', | |
| 90 'sections', | |
| 91 'instructions', | |
| 92 'safe', | |
| 93 ] | |
| 94 | |
| 95 # Each separator/section is a list of strings. | |
| 96 # Number of separators always exceeds number of sections by one, because | |
| 97 # they interleave, starting and finishing with (possibly empty) separator. | |
| 98 | |
| 99 @staticmethod | |
| 100 def Parse(lines): | |
| 101 test = Test() | |
| 102 test.safe = None | |
| 103 test.separators = [[]] | |
| 104 test.sections = [] | |
| 105 | |
| 106 # Parser state: whether we are currently parsing | |
| 107 # separator (comments/whitelines) or section. | |
| 108 in_section = False | |
| 109 | |
| 110 for line in lines: | |
| 111 line = line.strip() | |
| 112 is_sep = line == '' or line.startswith('#') | |
| 113 | |
| 114 if is_sep: | |
| 115 if in_section: | |
| 116 test.separators.append([line]) | |
| 117 else: | |
| 118 test.separators[-1].append(line) | |
| 119 else: | |
| 120 if in_section: | |
| 121 test.sections[-1].append(line) | |
| 122 else: | |
| 123 test.sections.append([line]) | |
| 124 | |
| 125 in_section = not is_sep | |
| 126 | |
| 127 if in_section: | |
| 128 test.separators.append([]) | |
| 129 | |
| 130 assert len(test.separators) == len(test.sections) + 1 | |
| 131 | |
| 132 # header section is required; it specifies BITS and OUTCOME | |
| 133 assert len(test.sections) >= 1 | |
| 134 | |
| 135 for line in test.sections[0]: | |
| 136 m = re.match(r'(.*):\s*(.*)$', line) | |
| 137 field, value = m.groups() | |
| 138 if field == 'BITS': | |
| 139 test.bits = int(value) | |
| 140 elif field == 'OUTCOME': | |
| 141 assert value in ['valid', 'invalid'] | |
| 142 test.safe = value == 'valid' | |
| 143 else: | |
| 144 raise AssertionError('Unrecognized field %s in special section' % | |
| 145 field) | |
| 146 | |
| 147 test.instructions = [] | |
| 148 offset = 0 | |
| 149 for section in test.sections[1:]: | |
| 150 instr = Instruction.Parse(test.bits, section) | |
| 151 | |
| 152 if instr.hex is None: | |
| 153 code = asm.Assemble(test.bits, instr.asm) | |
| 154 instr.hex = utils.DataToReadableHex(code) | |
| 155 | |
| 156 if instr.offset is not None: | |
| 157 instr.offset = int(instr.offset) | |
| 158 else: | |
| 159 instr.offset = offset | |
| 160 test.instructions.append(instr) | |
| 161 offset = instr.end_offset | |
| 162 | |
| 163 return test | |
| 164 | |
| 165 def Print(self, fout): | |
| 166 self.sections[0] = ['BITS: %s' % self.bits] | |
| 167 if self.safe is not None: | |
| 168 self.sections[0].append( | |
| 169 'OUTCOME: valid' if self.safe else 'OUTCOME: invalid') | |
| 170 | |
| 171 offset = 0 | |
| 172 for i, instr in enumerate(self.instructions): | |
| 173 self.sections[i+1] = instr.ToLines(offset) | |
| 174 offset = instr.end_offset | |
| 175 | |
| 176 assert len(self.separators) == len(self.sections) + 1 | |
| 177 groups = [] | |
| 178 for sep, sec in zip(self.separators, self.sections): | |
| 179 groups.append(sep) | |
| 180 groups.append(sec) | |
| 181 groups.append(self.separators[-1]) | |
| 182 | |
| 183 for group in groups: | |
| 184 for line in group: | |
| 185 fout.write('%s\n' % line) | |
| 186 | |
| 187 def PrepareCode(self): | |
| 188 code_size = max(i.end_offset for i in self.instructions) | |
| 189 code_size = ((code_size - 1) // 32 + 1) * 32 | |
| 190 code = ['\x90'] * code_size | |
| 191 | |
| 192 for i in self.instructions: | |
| 193 code[i.offset : i.end_offset] = list(i.data) | |
| 194 | |
| 195 return ''.join(code) | |
| 196 | |
| 197 def RunValidator(self, validator): | |
| 198 assert validator in val_runner.VALIDATORS | |
| 199 if validator == 'nc': | |
| 200 return val_runner.RunValidator(validator, self.bits, self.PrepareCode()) | |
| 201 | |
| 202 test = copy.deepcopy(self) | |
| 203 | |
| 204 # When RDFA validator encounters invalid instruction, it stops processing | |
| 205 # current 32-byte bundle and moves on straight to the next one, while | |
| 206 # prod. validator recovers from error and continues decoding. | |
| 207 # To avoid many spurious errors, some kind of error recovery for RDFA | |
| 208 # is emulated in the following process: | |
| 209 # | |
| 210 # Whenever instuction can't be decoded, the whole section containing it | |
| 211 # is replaced with nops, and validation is repeated from start. This | |
| 212 # patching can happen several times. As a result of validation we take | |
| 213 # union of all errors discovered on all passes. | |
| 214 # This approach is expected to work well only when partition to sections | |
| 215 # matches closely partition to instructions, which is reasonable assumption | |
| 216 # (typical section is a single instruction or few instructions forming | |
| 217 # pseudoinstruction). | |
| 218 # | |
| 219 # This procedure is more or less safe (in a sense that it's unlikely | |
| 220 # to mask any bugs in validator), because first it is guaranteed to | |
| 221 # preserve all errors found in the first pass (on unmodified code), | |
| 222 # and second all spurious errors incorrectly introduced by patching | |
| 223 # will likely be spotted as discrepancies compared to prod. validator. | |
|
Mark Seaborn
2012/09/27 02:25:43
This means you're relying on having the prod valid
| |
| 224 | |
| 225 errors = set() | |
| 226 safe = True | |
| 227 | |
| 228 while True: | |
| 229 res = val_runner.RunValidator(validator, test.bits, test.PrepareCode()) | |
| 230 safe = safe and res.safe | |
| 231 errors.update(res.errors) | |
| 232 | |
| 233 patched = False | |
| 234 for loc, msg in res.errors: | |
| 235 if msg == 'DFA error in validator': | |
| 236 for i in test.instructions: | |
| 237 if i.offset == loc: | |
| 238 nops = '\x90' * i.size | |
| 239 if i.data != nops: | |
| 240 i.data = nops | |
| 241 patched = True | |
| 242 if not patched: | |
| 243 break | |
| 244 | |
| 245 return val_runner.ValidationResults(safe=safe, errors=sorted(errors)) | |
| 246 | |
| 247 def ExpectedErrors(self, validator): | |
| 248 errors = [] | |
| 249 for i in self.instructions: | |
| 250 for msg in i.outs[validator]: | |
| 251 # Check if message has the form | |
| 252 # [at +<delta>] <error message> | |
| 253 m = re.match(r'\[at \+(\d+)\]\s(.*)$', msg) | |
| 254 if m is not None: | |
| 255 delta = int(m.group(1)) | |
| 256 msg = m.group(2) | |
| 257 else: | |
| 258 delta = 0 | |
| 259 errors.append((i.offset + delta, msg)) | |
| 260 return errors | |
| OLD | NEW |