src/trusted/validator_x86/testscripts/run_rdfa_validator_tests.py - Issue 625923004: Delete old x86 validator.

Side by Side Diff: src/trusted/validator_x86/testscripts/run_rdfa_validator_tests.py

Issue 625923004: Delete old x86 validator. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client

Patch Set: rebase master Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/python

2 # Copyright (c) 2012 The Native Client Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 import glob

7 import optparse

8 import os

9 import re

10 import struct

11 import subprocess

12 import sys

13 import tempfile

14

15 import test_format

16

17

18 BUNDLE_SIZE = 32

19

20

21 def AssertEquals(actual, expected):

22 if actual != expected:

23 raise AssertionError('\nEXPECTED:\n"""\n%s"""\n\nACTUAL:\n"""\n%s"""'

24 % (expected, actual))

25

26

27 def ParseHex(hex_content):

28 """Parse content of @hex section and return binary data

29

30 Args:

31 hex_content: Content of @hex section as a string.

32

33 Yields:

34 Chunks of binary data corresponding to lines of given @hex section (as

35 strings). If line ends with r'\\', chunk is continued on the following line.

36 """

37

38 bytes = []

39 for line in hex_content.split('\n'):

40 line, sep, comment = line.partition('#')

41 line = line.strip()

42 if line == '':

43 continue

44

45 if line.endswith(r'\\'):

46 line = line[:-2]

47 continuation = True

48 else:

49 continuation = False

50

51 for byte in line.split():

52 assert len(byte) == 2

53 bytes.append(chr(int(byte, 16)))

54

55 if not continuation:

56 assert len(bytes) > 0

57 yield ''.join(bytes)

58 bytes = []

59

60 assert bytes == [], r'r"\\" should not appear on the last line'

61

62

63 def CreateElfContent(bits, text_segment):

64 e_ident = {

65 32: '\177ELF\1',

66 64: '\177ELF\2'}[bits]

67 e_machine = {

68 32: 3,

69 64: 62}[bits]

70

71 e_phoff = 256

72 e_phnum = 1

73 e_phentsize = 0

74

75 elf_header_fmt = {

76 32: '<16sHHIIIIIHHHHHH',

77 64: '<16sHHIQQQIHHHHHH'}[bits]

78

79 elf_header = struct.pack(

80 elf_header_fmt,

81 e_ident, 0, e_machine, 0, 0, e_phoff, 0, 0, 0,

82 e_phentsize, e_phnum, 0, 0, 0)

83

84 p_type = 1 # PT_LOAD

85 p_flags = 5 # r-x

86 p_filesz = len(text_segment)

87 p_memsz = p_filesz

88 p_vaddr = 0

89 p_offset = 512

90 p_align = 0

91 p_paddr = 0

92

93 pheader_fmt = {

94 32: '<IIIIIIII',

95 64: '<IIQQQQQQ'}[bits]

96

97 pheader_fields = {

98 32: (p_type, p_offset, p_vaddr, p_paddr,

99 p_filesz, p_memsz, p_flags, p_align),

100 64: (p_type, p_flags, p_offset, p_vaddr,

101 p_paddr, p_filesz, p_memsz, p_align)}[bits]

102

103 pheader = struct.pack(pheader_fmt, *pheader_fields)

104

105 result = elf_header

106 assert len(result) <= e_phoff

107 result += '\0' * (e_phoff - len(result))

108 result += pheader

109 assert len(result) <= p_offset

110 result += '\0' * (p_offset - len(result))

111 result += text_segment

112

113 return result

114

115

116 def RunRdfaValidator(options, data):

117 # Add nops to make it bundle-sized.

118 data += (-len(data) % BUNDLE_SIZE) * '\x90'

119 assert len(data) % BUNDLE_SIZE == 0

120

121 tmp = tempfile.NamedTemporaryFile(

122 prefix='tmp_legacy_validator_', mode='wb', delete=False)

123 try:

124 tmp.write(CreateElfContent(options.bits, data))

125 tmp.close()

126

127 proc = subprocess.Popen([options.rdfaval, tmp.name],

128 stdout=subprocess.PIPE,

129 stderr=subprocess.PIPE)

130 stdout, stderr = proc.communicate()

131 assert stderr == '', stderr

132 return_code = proc.wait()

133 finally:

134 tmp.close()

135 os.remove(tmp.name)

136

137 # Remove the carriage return characters that we get on Windows.

138 stdout = stdout.replace('\r', '')

139 return return_code, stdout

140

141

142 def ParseRdfaMessages(stdout):

143 """Get (offset, message) pairs from rdfa validator output.

144

145 Args:

146 stdout: Output of rdfa validator as string.

147

148 Yields:

149 Pairs (offset, message).

150 """

151 for line in stdout.split('\n'):

152 line = line.strip()

153 if line == '':

154 continue

155 if re.match(r"(Valid\|Invalid)\.$", line):

156 continue

157

158 m = re.match(r'([0-9a-f]+): (.*)$', line, re.IGNORECASE)

159 assert m is not None, "can't parse line '%s'" % line

160 offset = int(m.group(1), 16)

161 message = m.group(2)

162

163 if not message.startswith('warning - '):

164 yield offset, message

165

166

167 def RunRdfaWithNopPatching(options, data_chunks):

168 r"""Run RDFA validator with NOP patching for better error reporting.

169

170 If the RDFA validator encounters an invalid instruction, it resumes validation

171 from the beginning of the next bundle, while the original, non-DFA-based

172 validators skip maybe one or two bytes and recover. And there are plenty of

173 tests where there are more than one error in a single bundle. To mitigate such

174 spurious disagreements, the following procedure is used: when RDFA complaints

175 that particular piece can't be decoded, the problematic line in @hex section

176 (which usually corresponds to one instruction) is replaced with NOPs and the

177 validator is rerun from the beginning. This process may take several

178 iterations (it seems it always converges in practice). All errors reported on

179 all such runs (sans duplicate ones) are taken as validation result. So, in a

180 sense, this trick is to emulate line-level recovery as opposed to bundle-

181 level. In practice it turns out ok, and lots of spurious errors are

182 eliminated. To each error message we add the stage at which it was produced,

183 so we can destinguish 'primary' errors from additional ones.

184

185 Example. Suppose DE AD and BE EF machine codes correspond to invalid

186 instructions. Lets take a look at what happens when we invoke

187 RunRdfaWithNopPatching(options, ['\de\ad', '\be\ef']). First the RDFA

188 validator is run on the code '\de\ad\be\ef\90\90\90...'. It encounters an

189 undecipherable instruction, produces an error message at offset zero and

190 stops. Now we replace what is at offset zero ('\de\ad') with corresponding

191 amount of nops, and run the RDFA validator again on

192 '\90\90\be\ef\90\90\90...'. This time it decodes first two NOPs sucessfully

193 and reports problem at offset 2. In the next iteration of NOP patching BE EF

194 is replaced with 90 90 as well, no decoding errors are reported on the next

195 run so the whole process stops. Finally the combined output looks like

196 following:

197

198 0: [0] unrecognized instruction <- produced at stage 0

199 2: [1] unrecognized instruction <- produced at stage 1

200 return code: 1 <- return code at stage 0

201

202 Args:

203 options: Options as produced by optparse.

204 Relevant fields are .bits and .update.

205 data_chunks: List of strings containing binary data. For the described

206 heuristic to work better it is desirable (although not absolutelty

207 required) that strings correspond to singular instructions, as it

208 usually happens in @hex section.

209

210 Returns:

211 String representing combined output from all stages. Error messages are

212 of the form

213 <offset in hex>: [<stage>] <message>

214 """

215

216 data_chunks = list(data_chunks)

217

218 offset_to_chunk = {}

219 offset = 0

220 for i, chunk in enumerate(data_chunks):

221 offset_to_chunk[offset] = i

222 offset += len(chunk)

223

224 first_return_code = None

225 messages = [] # list of triples (offset, stage, message)

226 messages_set = set() # set of pairs (offset, message)

227 stage = 0

228

229 while True:

230 return_code, stdout = RunRdfaValidator(options, ''.join(data_chunks))

231 if first_return_code is None:

232 first_return_code = return_code

233

234 nop_patched = False

235

236 for offset, message in ParseRdfaMessages(stdout):

237 if (offset, message) in messages_set:

238 continue

239 messages.append((offset, stage, message))

240 messages_set.add((offset, message))

241

242 if offset in offset_to_chunk and message == 'unrecognized instruction':

243 chunk_no = offset_to_chunk[offset]

244 nops_chunk = '\x90' * len(data_chunks[chunk_no])

245 if nops_chunk != data_chunks[chunk_no]:

246 data_chunks[chunk_no] = nops_chunk

247 nop_patched = True

248

249 if not nop_patched:

250 break

251 stage += 1

252

253 messages.sort(key=lambda (offset, stage, _): (offset, stage))

254

255 result = ''.join('%x: [%d] %s\n' % (offset, stage, message)

256 for offset, stage, message in messages)

257 result += 'return code: %d\n' % first_return_code

258 return result

259

260

261 def CheckValidJumpTargets(options, data_chunks):

262 """

263 Check that the validator infers valid jump targets correctly.

264

265 This test checks that the validator identifies instruction boundaries and

266 superinstructions correctly. In order to do that, it attempts to append a jump

267 to each byte at the end of the given code. Jump should be valid if and only if

268 it goes to the boundary between data chunks.

269

270 Note that the same chunks as in RunRdfaWithNopPatching are used, but here they

271 play a different role. In RunRdfaWithNopPatching the partitioning into chunks

272 is only relevant when the whole snippet is invalid. Here, on the other hand,

273 we only care about valid snippets, and we use chunks to mark valid jump

274 targets.

275

276 Args:

277 options: Options as produced by optparse.

278 data_chunks: List of strings containing binary data. Each such chunk is

279 expected to correspond to indivisible instruction or superinstruction.

280

281 Returns:

282 None.

283 """

284 data = ''.join(data_chunks)

285 # Add nops to make it bundle-sized.

286 data += (-len(data) % BUNDLE_SIZE) * '\x90'

287 assert len(data) % BUNDLE_SIZE == 0

288

289 # Since we check validity of jump target by adding jump and validating

290 # resulting piece, we rely on validity of original snippet.

291 return_code, _ = RunRdfaValidator(options, data)

292 assert return_code == 0, 'Can only validate jump targets on valid snippet'

293

294 valid_jump_targets = set()

295 pos = 0

296 for data_chunk in data_chunks:

297 valid_jump_targets.add(pos)

298 pos += len(data_chunk)

299 valid_jump_targets.add(pos)

300

301 for i in range(pos + 1):

302 # Encode JMP with 32-bit relative target.

303 jump = '\xe9' + struct.pack('<i', i - (len(data) + 5))

304 return_code, _ = RunRdfaValidator(options, data + jump)

305 if return_code == 0:

306 assert i in valid_jump_targets, (

307 'Offset 0x%x was reported valid jump target' % i)

308 else:

309 assert i not in valid_jump_targets, (

310 'Offset 0x%x was reported invalid jump target' % i)

311

312

313 def Test(options, items_list):

314 info = dict(items_list)

315

316 if 'rdfa_output' in info:

317 data_chunks = list(ParseHex(info['hex']))

318 stdout = RunRdfaWithNopPatching(options, data_chunks)

319 print ' Checking rdfa_output field...'

320 if options.update:

321 if stdout != info['rdfa_output']:

322 print ' Updating rdfa_output field...'

323 info['rdfa_output'] = stdout

324 else:

325 AssertEquals(stdout, info['rdfa_output'])

326

327 last_line = re.search('return code: (-?\d+)\n$', info['rdfa_output'])

328 expected_return_code = int(last_line.group(1))

329

330 # This test only works for valid snippets, see CheckValidJumpTargets

331 # for details.

332 if expected_return_code == 0:

333 print ' Checking jump targets...'

334 CheckValidJumpTargets(options, data_chunks)

335

336 # Update field values, but preserve their order.

337 items_list = [(field, info[field]) for field, _ in items_list]

338

339 return items_list

340

341

342 def main(args):

343 parser = optparse.OptionParser()

344 parser.add_option('--rdfaval', default='validator_test',

345 help='Path to the ncval validator executable')

346 parser.add_option('--bits',

347 type=int,

348 help='The subarchitecture to run tests against: 32 or 64')

349 parser.add_option('--update',

350 default=False,

351 action='store_true',

352 help='Regenerate golden fields instead of testing')

353

354 options, args = parser.parse_args(args)

355

356 if options.bits not in [32, 64]:

357 parser.error('specify --bits 32 or --bits 64')

358

359 if len(args) == 0:

360 parser.error('No test files specified')

361 processed = 0

362 for glob_expr in args:

363 test_files = sorted(glob.glob(glob_expr))

364 if len(test_files) == 0:

365 raise AssertionError(

366 '%r matched no files, which was probably not intended' % glob_expr)

367 for test_file in test_files:

368 print 'Testing %s...' % test_file

369 tests = test_format.LoadTestFile(test_file)

370 tests = [Test(options, test) for test in tests]

371 if options.update:

372 test_format.SaveTestFile(tests, test_file)

373 processed += 1

374 print '%s test files were processed.' % processed

375

376

377 if __name__ == '__main__':

378 main(sys.argv[1:])

OLD	NEW