src/trusted/validator_ragel/verify_regular_instructions_old.py - Issue 636933004: stop building/testing old x86 validator.

Side by Side Diff: src/trusted/validator_ragel/verify_regular_instructions_old.py

Issue 636933004: stop building/testing old x86 validator. (Closed) Base URL: svn://svn.chromium.org/native_client/trunk/src/native_client

Patch Set: rebase master Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/python

2 # Copyright (c) 2013 The Native Client Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 """

7 Generate all acceptable regular instructions by traversing validator DFA

8 and run objdump, new and old validator on them.

9 """

10 # TODO(shcherbina): get rid of this test once text-based specification is

11 # complete (https://code.google.com/p/nativeclient/issues/detail?id=3453).

12

13 import itertools

14 import multiprocessing

15 import optparse

16 import os

17 import re

18 import subprocess

19 import sys

20 import tempfile

21 import traceback

22

23 import dfa_parser

24 import dfa_traversal

25 import objdump_parser

26 import validator

27

28

29 FWAIT = 0x9b

30 NOP = 0x90

31

32

33 def IsRexPrefix(byte):

34 return 0x40 <= byte < 0x50

35

36

37 def Cached(f):

38 cache = {}

39 def CachedF(*args):

40 args = tuple(args)

41 if args not in cache:

42 cache[args] = f(*args)

43 return cache[args]

44 return CachedF

45

46

47 class AssemblerError(Exception):

48 pass

49

50

51 @Cached

52 def Assemble(bitness, asm):

53 # Instead of parsing object files properly, I put two distinct sequences,

54 # begin_mark and end_mark, around code of interest.

55 # I neglect possibility that they occur somewhere else in the file.

56 begin_mark = 'begin mark>>>'

57 end_mark = '<<<end mark'

58

59 try:

60 obj_file = tempfile.NamedTemporaryFile(

61 mode='w+b',

62 suffix='.o',

63 delete=False)

64

65 proc = subprocess.Popen(

66 [options.gas,

67 '--%s' % bitness,

68 '-o', obj_file.name],

69 stdin=subprocess.PIPE)

70

71 asm_content = ''

72 for c in begin_mark:

73 asm_content += '.byte %d\n' % ord(c)

74 asm_content += '%s\n' % asm

75 for c in end_mark:

76 asm_content += '.byte %d\n' % ord(c)

77

78 proc.communicate(asm_content)

79 return_code = proc.wait()

80 if return_code != 0:

81 raise AssemblerError("Can't assemble '%s'" % asm)

82

83 data = obj_file.read()

84 obj_file.close()

85

86 # Extract the data between begin_mark and end_mark.

87 begin = data.find(begin_mark)

88 assert begin != -1, 'begin_mark is missing'

89 begin += len(begin_mark)

90 end = data.find(end_mark, begin)

91 assert end != -1, 'end_mark is missing'

92 return map(ord, data[begin:end])

93

94 finally:

95 os.remove(obj_file.name)

96

97

98 class OldValidator(object):

99 def __init__(self):

100 self._bundles = []

101 self._errors = []

102 pass

103

104 def Validate(self, bundle, comment):

105 self._bundles.append((bundle, comment))

106

107 if len(self._bundles) == 40:

108 self._Process()

109

110 def _Process(self):

111 bytes = sum((instr for instr, _ in self._bundles), [])

112 hex_content = ' '.join('%02x' % byte for byte in bytes).replace('0x', '')

113

114 assert len(hex_content) < 4096

115

116 ncval = {32: options.ncval32, 64: options.ncval64}[options.bitness]

117 proc = subprocess.Popen(

118 [ncval, '--hex_text=-', '--max_errors=-1'],

119 stdin=subprocess.PIPE,

120 stdout=subprocess.PIPE)

121

122 stdout, stderr = proc.communicate(hex_content)

123 return_code = proc.wait()

124 assert return_code == 0, (stdout, stderr)

125

126 if '* <input> is safe *' in stdout:

127 self._bundles = []

128 return

129

130 assert '* <input> IS UNSAFE *' in stdout

131

132 rejected_bundles = set()

133 for line in stdout.split('\n'):

134 line = line.strip()

135 if line == '':

136 continue

137 if line == '* <input> IS UNSAFE *':

138 continue

139 if line == 'Some instructions were replaced with HLTs.':

140 continue

141 if line.startswith(

142 'VALIDATOR: Checking block alignment and jump targets'):

143 continue

144 m = re.match(r'VALIDATOR: ([0-9a-f]+): (.*)$', line, re.IGNORECASE)

145 assert m is not None, (line, hex_content)

146 error_offset = int(m.group(1), 16)

147 rejected_bundles.add(error_offset // validator.BUNDLE_SIZE)

148

149 assert len(rejected_bundles) != 0

150 for b in sorted(rejected_bundles):

151 _, comment = self._bundles[b]

152 self._errors.append(comment)

153

154 self._bundles = []

155

156 def GetErrors(self):

157 if len(self._bundles) > 0:

158 self._Process()

159 return self._errors

160

161

162 def CheckFinalRestrictedRegister(

163 sandboxing,

164 instruction,

165 disassembly,

166 old_validator):

167 bundle = sandboxing + instruction

168 assert len(bundle) <= validator.BUNDLE_SIZE

169 bundle += [NOP] * (validator.BUNDLE_SIZE - len(bundle))

170

171 final_restricted_register = [None]

172

173 def Callback(begin, end, info):

174 if begin == len(sandboxing):

175 assert end == len(sandboxing) + len(instruction)

176 final_restricted_register[0] = (

177 (info & validator.RESTRICTED_REGISTER_MASK) >>

178 validator.RESTRICTED_REGISTER_SHIFT)

179 elif begin > len(sandboxing):

180 assert bundle[begin:end] == [NOP]

181

182 result = validator_inst.ValidateChunk(

183 ''.join(map(chr, bundle)),

184 bitness=options.bitness,

185 callback=Callback,

186 on_each_instruction=True)

187 assert result, (disassembly, map(hex, bundle))

188

189 (final_restricted_register,) = final_restricted_register

190 if final_restricted_register == validator.NO_REG:

191 final_restricted_register = None

192

193 assert final_restricted_register != validator.REG_R15, (

194 'restricted register can not be r15')

195

196 if final_restricted_register is not None:

197 register_name = validator.REGISTER_NAMES[final_restricted_register]

198 memory_reference = 'mov (%%r15, %s), %%al' % register_name

199 bundle = sandboxing + instruction + Assemble(64, memory_reference)

200 assert len(bundle) <= validator.BUNDLE_SIZE

201 bundle += [NOP] * (validator.BUNDLE_SIZE - len(bundle))

202

203 assert validator_inst.ValidateChunk(

204 ''.join(map(chr, bundle)),

205 bitness=options.bitness), (bundle, disassembly, memory_reference)

206

207 old_validator.Validate(

208 bundle,

209 (disassembly + '; ' + memory_reference, instruction))

210

211

212 def ValidateInstruction(

213 instruction,

214 disassembly,

215 old_validator):

216 assert len(instruction) <= validator.BUNDLE_SIZE

217 bundle = instruction + [NOP] * (validator.BUNDLE_SIZE - len(instruction))

218

219 if options.bitness == 32:

220 result = validator_inst.ValidateChunk(

221 ''.join(map(chr, bundle)),

222 bitness=options.bitness)

223

224 if result:

225 old_validator.Validate(bundle, (disassembly, instruction))

226

227 return result

228

229 else:

230 result = validator_inst.ValidateChunk(

231 ''.join(map(chr, bundle)),

232 bitness=options.bitness)

233 if result:

234 old_validator.Validate(bundle, (disassembly, instruction))

235 CheckFinalRestrictedRegister([], instruction, disassembly, old_validator)

236

237 # Additionally, we try to restrict all possible

238 # registers and check whether instruction would be accepted.

239 for register, register_name in validator.REGISTER_NAMES.items():

240 if register == validator.REG_R15:

241 continue

242 if validator_inst.ValidateChunk(

243 ''.join(map(chr, bundle)),

244 bitness=options.bitness,

245 restricted_register=register):

246

247 # %r8 -> %r8d

248 # %rax -> %eax

249 if re.match(r'%r\d+$', register_name):

250 register_name += 'd'

251 else:

252 assert register_name.startswith('%r')

253 register_name = '%e' + register_name[2:]

254

255 sandboxing = 'mov %%eax, %s' % register_name

256 CheckFinalRestrictedRegister(

257 Assemble(64, sandboxing),

258 instruction,

259 sandboxing + '; ' + disassembly,

260 old_validator)

261 result = True

262

263 return result

264

265

266 class WorkerState(object):

267 def __init__(self, prefix):

268 self.total_instructions = 0

269 self.num_valid = 0

270 self._file_prefix = 'check_validator_%s_' % '_'.join(map(hex, prefix))

271 self._instructions = []

272 self.errors = []

273

274 def ReceiveInstruction(self, bytes):

275 self._instructions.append(bytes)

276

277 # Objdump prints crazy stuff when x87 instructions are prefixed with

278 # fwait (especially when REX prefixes are involved). To avoid that,

279 # we insert nops after each fwait.

280 if (bytes == [FWAIT] or

281 len(bytes) == 2 and IsRexPrefix(bytes[0]) and bytes[1] == FWAIT):

282 self._instructions.append([NOP])

283

284 if len(self._instructions) >= 1000000:

285 self.CheckReceivedInstructions()

286 self._instructions = []

287

288 def CheckReceivedInstructions(self):

289 # Check instructions accumulated so far and clear the list.

290 if len(self._instructions) == 0:

291 return

292 try:

293 raw_file = tempfile.NamedTemporaryFile(

294 mode='wb',

295 prefix=self._file_prefix,

296 suffix='.o',

297 delete=False)

298 for instr in self._instructions:

299 raw_file.write(''.join(map(chr, instr)))

300 raw_file.close()

301

302 objdump_proc = subprocess.Popen(

303 [options.objdump,

304 '--disassemble-all', '--disassemble-zeroes',

305 '-b', 'binary',

306 '-m', 'i386'] +

307 {32: [], 64: ['-M', 'x86-64']}[options.bitness] +

308 ['--insn-width', '15',

309 raw_file.name],

310 stdout=subprocess.PIPE)

311

312 objdump_iter = iter(objdump_parser.SkipHeader(objdump_proc.stdout))

313

314 old_validator = OldValidator()

315 for instr in self._instructions:

316 # Objdump prints fwait with REX prefix in this ridiculous way:

317 # 0: 41 fwait

318 # 1: 9b fwait

319 # So in such cases we expect two lines from objdump.

320 # TODO(shcherbina): get rid of this special handling once

321 # https://code.google.com/p/nativeclient/issues/detail?id=3496 is fixed.

322 if len(instr) == 2 and IsRexPrefix(instr[0]) and instr[1] == FWAIT:

323 expected_lines = 2

324 else:

325 expected_lines = 1

326

327 bytes = []

328 for _ in range(expected_lines):

329 line = next(objdump_iter)

330 # Parse tab-separated line of the form

331 # 0: f2 40 0f 10 00 rex movsd (%rax),%xmm0

332 addr, more_bytes, disassembly = line.strip().split('\t')

333 more_bytes = [int(b, 16) for b in more_bytes.split()]

334 bytes += more_bytes

335

336 assert bytes == instr, (map(hex, bytes), map(hex, instr))

337 self.total_instructions += 1

338

339 self.num_valid += ValidateInstruction(instr, disassembly, old_validator)

340

341 # Make sure we read objdump output to the end.

342 end = next(objdump_iter, None)

343 assert end is None, end

344

345 return_code = objdump_proc.wait()

346 assert return_code == 0

347

348 finally:

349 os.remove(raw_file.name)

350

351 errors = old_validator.GetErrors()

352 for error in errors:

353 print error

354 self.errors += errors

355

356

357 def Worker((prefix, state_index)):

358 worker_state = WorkerState(prefix)

359

360 try:

361 dfa_traversal.TraverseTree(

362 dfa.states[state_index],

363 final_callback=worker_state.ReceiveInstruction,

364 prefix=prefix,

365 anyfield=0)

366 worker_state.CheckReceivedInstructions()

367 except Exception as e:

368 traceback.print_exc() # because multiprocessing imap swallows traceback

369 raise

370

371 return (

372 prefix,

373 worker_state.total_instructions,

374 worker_state.num_valid,

375 worker_state.errors)

376

377

378 def ParseOptions():

379 parser = optparse.OptionParser(usage='%prog [options] xmlfile')

380

381 parser.add_option('--bitness',

382 type=int,

383 help='The subarchitecture: 32 or 64')

384 parser.add_option('--gas',

385 help='Path to GNU AS executable')

386 parser.add_option('--objdump',

387 help='Path to objdump executable')

388 parser.add_option('--validator_dll',

389 help='Path to librdfa_validator_dll')

390 parser.add_option('--ncval32',

391 help='Path to old 32-bit ncval')

392 parser.add_option('--ncval64',

393 help='Path to old 64-bit ncval')

394 parser.add_option('--errors',

395 help='Where to save errors')

396

397 options, args = parser.parse_args()

398

399 if options.bitness not in [32, 64]:

400 parser.error('specify -b 32 or -b 64')

401

402 if not (options.gas and options.objdump and options.validator_dll):

403 parser.error('specify path to gas, objdump, and validator_dll')

404

405 if not (options.ncval32 and options.ncval64):

406 parser.error('specify path to old validator (32-bit and 64-bit versions)')

407

408 if not options.errors:

409 parser.errors('specify file to save errors to')

410

411 if not os.path.exists(options.ncval32):

412 print options.ncval32, 'not found (try ./scons ncval platform=x86-32)'

413 sys.exit(1)

414 if not os.path.exists(options.ncval64):

415 print options.ncval64, 'not found (try ./scons ncval platform=x86-64)'

416 sys.exit(1)

417

418 if len(args) != 1:

419 parser.error('specify one xml file')

420

421 (xml_file,) = args

422

423 return options, xml_file

424

425

426 options, xml_file = ParseOptions()

427 # We are doing it here to share state graph between workers spawned by

428 # multiprocess. Passing it every time is slow.

429 dfa = dfa_parser.ParseXml(xml_file)

430

431 validator_inst = validator.Validator(validator_dll=options.validator_dll)

432

433

434 def main():

435 assert dfa.initial_state.is_accepting

436 assert not dfa.initial_state.any_byte

437

438 print len(dfa.states), 'states'

439

440 num_suffixes = dfa_traversal.GetNumSuffixes(dfa.initial_state)

441

442 # We can't just write 'num_suffixes[dfa.initial_state]' because

443 # initial state is accepting.

444 total_instructions = sum(

445 num_suffixes[t.to_state]

446 for t in dfa.initial_state.forward_transitions.values())

447 print total_instructions, 'regular instructions total'

448

449 tasks = dfa_traversal.CreateTraversalTasks(dfa.states, dfa.initial_state)

450 print len(tasks), 'tasks'

451

452 pool = multiprocessing.Pool()

453

454 results = pool.imap(Worker, tasks)

455

456 total = 0

457 num_valid = 0

458 errors = []

459 for prefix, count, valid_count, more_errors in results:

460 print ', '.join(map(hex, prefix))

461 total += count

462 num_valid += valid_count

463 errors += more_errors

464

465 print total, 'instructions were processed'

466 print num_valid, 'valid instructions'

467

468 print len(errors), 'errors'

469

470 errors.sort()

471 with open(options.errors, 'w') as errors_file:

472 errors_file.write(

473 'Instructions accepted by new validator but rejected by old one:\n')

474 for disassembly, bytes in errors:

475 hex_bytes = ' '.join('%02x' % byte for byte in bytes).replace('0x', '')

476 errors_file.write('%-50s %s\n' % (disassembly, hex_bytes))

477

478

479 if __name__ == '__main__':

480 main()

OLD	NEW

« no previous file with comments | « src/trusted/validator_ragel/docs/testing.html ('k') | src/trusted/validator_x86/build.scons » ('j') | no next file with comments »