| Index: src/trusted/validator/x86/testing/tf/asm.py
|
| diff --git a/src/trusted/validator/x86/testing/tf/asm.py b/src/trusted/validator/x86/testing/tf/asm.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..3b2efb65820c8d286117e7bf054ec42517d4b43d
|
| --- /dev/null
|
| +++ b/src/trusted/validator/x86/testing/tf/asm.py
|
| @@ -0,0 +1,121 @@
|
| +# Copyright (c) 2012 The Native Client Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +import re
|
| +import subprocess
|
| +
|
| +import utils
|
| +
|
| +
|
| +class AssemblerError(Exception):
|
| + pass
|
| +
|
| +
|
| +def Assemble(bits, asm):
|
| + # Instead of parsing object files properly, I put two distinct sequences,
|
| + # begin_mark and end_mark, around code of interest.
|
| + # I neglect possibility that they occur somewhere else in the file.
|
| + begin_mark = 'begin mark qwert23904!>>>'
|
| + end_mark = '<<<end mark qwe213908!!'
|
| +
|
| + with utils.TempFile(mode='w') as asm_file:
|
| + with utils.TempFile(mode='w+b') as out_file:
|
| +
|
| + for c in begin_mark:
|
| + asm_file.write('.byte %d\n' % ord(c))
|
| + asm_file.write('%s\n' % asm)
|
| + for c in end_mark:
|
| + asm_file.write('.byte %d\n' % ord(c))
|
| + asm_file.flush()
|
| +
|
| + # TODO(shcherbina): deal somehow with the fact that 'as' is only
|
| + # available on Linux.
|
| + result = subprocess.call([
|
| + 'as',
|
| + '--%s' % bits,
|
| + asm_file.name,
|
| + '-o',
|
| + out_file.name,
|
| + ])
|
| + if result != 0:
|
| + raise AssemblerError("Can't assemble '%s'" % asm)
|
| +
|
| + data = out_file.read()
|
| +
|
| + # Extract the data between begin_mark and end_mark.
|
| + begin = data.find(begin_mark)
|
| + assert begin != -1, 'begin_mark is missing'
|
| + begin += len(begin_mark)
|
| + end = data.find(end_mark, begin)
|
| + assert end != -1, 'end_mark is missing'
|
| + return data[begin:end]
|
| +
|
| +
|
| +def Disassemble(bits, data):
|
| + if bits == 32:
|
| + arch = '-Mi386'
|
| + elif bits == 64:
|
| + arch = '-Mx86-64'
|
| +
|
| + with utils.TempFile(mode='wb') as binary_file:
|
| + binary_file.write(data)
|
| + binary_file.flush()
|
| +
|
| + # TODO(shcherbina): objdump would only be available on Linux
|
| + output = utils.CheckOutput([
|
| + 'objdump', '-mi386', arch, '-D', '-b', 'binary', binary_file.name])
|
| +
|
| + result = []
|
| + for line in output.split('\n'):
|
| + # Parse disassembler output in the form
|
| + # 0: 66 0f be 04 10 movsbw (%eax,%edx,1),%ax
|
| + # and extract instruction ('movsbw (%eax,%edx,1),%ax' in this case).
|
| + m = re.match(r'\s*[0-9a-f]+:\s([0-9a-f]{2}\s)+\s*(.*)$',
|
| + line,
|
| + re.IGNORECASE)
|
| + if m is None:
|
| + continue
|
| + result.append(m.group(2).strip())
|
| +
|
| + return '; '.join(result)
|
| +
|
| +
|
| +def DisassembleReversibly(bits, data):
|
| + """Try to disassemble in such a way that assembler would return input.
|
| +
|
| + Try to find such a that Assemble(bits, a) == data. Sometimes it's
|
| + impossible, for instance when instruction prefixes come in inconventional
|
| + order (in these cases None is returned).
|
| +
|
| + Args:
|
| + bits: Bitness (32 or 64).
|
| + data: String that is treated as machine code.
|
| +
|
| + Returns:
|
| + Assembler instruction (or semicolon-separated instructions)
|
| + as a string or None.
|
| + """
|
| +
|
| + a = Disassemble(bits, data)
|
| +
|
| + # Dirty hack to use relative offset in jumps.
|
| + # We take instruction like
|
| + # jmp 0x05
|
| + # and convert it to equivalent relative form
|
| + # jmp .+5
|
| + m = re.match(r'(callq?|j..?)\s+0x([0-9a-f]+)$', a, re.IGNORECASE)
|
| + if m is not None:
|
| + offset = int(m.group(2), 16)
|
| + # Make it signed.
|
| + if offset >= 2 ** 31:
|
| + offset -= 2 ** 32
|
| + a = '%s .%+d' % (m.group(1), offset)
|
| +
|
| + try:
|
| + if Assemble(bits, a) == data:
|
| + return a
|
| + except AssemblerError:
|
| + pass
|
| +
|
| + return None
|
|
|