| Index: tools/valgrind/asan/third_party/asan_symbolize.py
|
| diff --git a/tools/valgrind/asan/third_party/asan_symbolize.py b/tools/valgrind/asan/third_party/asan_symbolize.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..59fceaaed814c031fc08ea301f146cfe862a3e8a
|
| --- /dev/null
|
| +++ b/tools/valgrind/asan/third_party/asan_symbolize.py
|
| @@ -0,0 +1,479 @@
|
| +#!/usr/bin/env python
|
| +#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
|
| +#
|
| +# The LLVM Compiler Infrastructure
|
| +#
|
| +# This file is distributed under the University of Illinois Open Source
|
| +# License. See LICENSE.TXT for details.
|
| +#
|
| +#===------------------------------------------------------------------------===#
|
| +import argparse
|
| +import bisect
|
| +import getopt
|
| +import os
|
| +import re
|
| +import subprocess
|
| +import sys
|
| +
|
| +symbolizers = {}
|
| +DEBUG = False
|
| +demangle = False
|
| +binutils_prefix = None
|
| +sysroot_path = None
|
| +binary_name_filter = None
|
| +fix_filename_patterns = None
|
| +logfile = sys.stdin
|
| +
|
| +# FIXME: merge the code that calls fix_filename().
|
| +def fix_filename(file_name):
|
| + if fix_filename_patterns:
|
| + for path_to_cut in fix_filename_patterns:
|
| + file_name = re.sub('.*' + path_to_cut, '', file_name)
|
| + file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
|
| + file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
|
| + return file_name
|
| +
|
| +def sysroot_path_filter(binary_name):
|
| + return sysroot_path + binary_name
|
| +
|
| +def guess_arch(addr):
|
| + # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
|
| + if len(addr) > 10:
|
| + return 'x86_64'
|
| + else:
|
| + return 'i386'
|
| +
|
| +class Symbolizer(object):
|
| + def __init__(self):
|
| + pass
|
| +
|
| + def symbolize(self, addr, binary, offset):
|
| + """Symbolize the given address (pair of binary and offset).
|
| +
|
| + Overriden in subclasses.
|
| + Args:
|
| + addr: virtual address of an instruction.
|
| + binary: path to executable/shared object containing this instruction.
|
| + offset: instruction offset in the @binary.
|
| + Returns:
|
| + list of strings (one string for each inlined frame) describing
|
| + the code locations for this instruction (that is, function name, file
|
| + name, line and column numbers).
|
| + """
|
| + return None
|
| +
|
| +
|
| +class LLVMSymbolizer(Symbolizer):
|
| + def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
|
| + super(LLVMSymbolizer, self).__init__()
|
| + self.symbolizer_path = symbolizer_path
|
| + self.default_arch = default_arch
|
| + self.system = system
|
| + self.dsym_hints = dsym_hints
|
| + self.pipe = self.open_llvm_symbolizer()
|
| +
|
| + def open_llvm_symbolizer(self):
|
| + cmd = [self.symbolizer_path,
|
| + '--use-symbol-table=true',
|
| + '--demangle=%s' % demangle,
|
| + '--functions=short',
|
| + '--inlining=true',
|
| + '--default-arch=%s' % self.default_arch]
|
| + if self.system == 'Darwin':
|
| + for hint in self.dsym_hints:
|
| + cmd.append('--dsym-hint=%s' % hint)
|
| + if DEBUG:
|
| + print ' '.join(cmd)
|
| + try:
|
| + result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
|
| + stdout=subprocess.PIPE)
|
| + except OSError:
|
| + result = None
|
| + return result
|
| +
|
| + def symbolize(self, addr, binary, offset):
|
| + """Overrides Symbolizer.symbolize."""
|
| + if not self.pipe:
|
| + return None
|
| + result = []
|
| + try:
|
| + symbolizer_input = '"%s" %s' % (binary, offset)
|
| + if DEBUG:
|
| + print symbolizer_input
|
| + print >> self.pipe.stdin, symbolizer_input
|
| + while True:
|
| + function_name = self.pipe.stdout.readline().rstrip()
|
| + if not function_name:
|
| + break
|
| + file_name = self.pipe.stdout.readline().rstrip()
|
| + file_name = fix_filename(file_name)
|
| + if (not function_name.startswith('??') or
|
| + not file_name.startswith('??')):
|
| + # Append only non-trivial frames.
|
| + result.append('%s in %s %s' % (addr, function_name,
|
| + file_name))
|
| + except Exception:
|
| + result = []
|
| + if not result:
|
| + result = None
|
| + return result
|
| +
|
| +
|
| +def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
|
| + symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
|
| + if not symbolizer_path:
|
| + symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
|
| + if not symbolizer_path:
|
| + # Assume llvm-symbolizer is in PATH.
|
| + symbolizer_path = 'llvm-symbolizer'
|
| + return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
|
| +
|
| +
|
| +class Addr2LineSymbolizer(Symbolizer):
|
| + def __init__(self, binary):
|
| + super(Addr2LineSymbolizer, self).__init__()
|
| + self.binary = binary
|
| + self.pipe = self.open_addr2line()
|
| +
|
| + def open_addr2line(self):
|
| + addr2line_tool = 'addr2line'
|
| + if binutils_prefix:
|
| + addr2line_tool = binutils_prefix + addr2line_tool
|
| + cmd = [addr2line_tool, '-f']
|
| + if demangle:
|
| + cmd += ['--demangle']
|
| + cmd += ['-e', self.binary]
|
| + if DEBUG:
|
| + print ' '.join(cmd)
|
| + return subprocess.Popen(cmd,
|
| + stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
| +
|
| + def symbolize(self, addr, binary, offset):
|
| + """Overrides Symbolizer.symbolize."""
|
| + if self.binary != binary:
|
| + return None
|
| + try:
|
| + print >> self.pipe.stdin, offset
|
| + function_name = self.pipe.stdout.readline().rstrip()
|
| + file_name = self.pipe.stdout.readline().rstrip()
|
| + except Exception:
|
| + function_name = ''
|
| + file_name = ''
|
| + file_name = fix_filename(file_name)
|
| + return ['%s in %s %s' % (addr, function_name, file_name)]
|
| +
|
| +
|
| +class UnbufferedLineConverter(object):
|
| + """
|
| + Wrap a child process that responds to each line of input with one line of
|
| + output. Uses pty to trick the child into providing unbuffered output.
|
| + """
|
| + def __init__(self, args, close_stderr=False):
|
| + # Local imports so that the script can start on Windows.
|
| + import pty
|
| + import termios
|
| + pid, fd = pty.fork()
|
| + if pid == 0:
|
| + # We're the child. Transfer control to command.
|
| + if close_stderr:
|
| + dev_null = os.open('/dev/null', 0)
|
| + os.dup2(dev_null, 2)
|
| + os.execvp(args[0], args)
|
| + else:
|
| + # Disable echoing.
|
| + attr = termios.tcgetattr(fd)
|
| + attr[3] = attr[3] & ~termios.ECHO
|
| + termios.tcsetattr(fd, termios.TCSANOW, attr)
|
| + # Set up a file()-like interface to the child process
|
| + self.r = os.fdopen(fd, "r", 1)
|
| + self.w = os.fdopen(os.dup(fd), "w", 1)
|
| +
|
| + def convert(self, line):
|
| + self.w.write(line + "\n")
|
| + return self.readline()
|
| +
|
| + def readline(self):
|
| + return self.r.readline().rstrip()
|
| +
|
| +
|
| +class DarwinSymbolizer(Symbolizer):
|
| + def __init__(self, addr, binary):
|
| + super(DarwinSymbolizer, self).__init__()
|
| + self.binary = binary
|
| + self.arch = guess_arch(addr)
|
| + self.open_atos()
|
| +
|
| + def open_atos(self):
|
| + if DEBUG:
|
| + print 'atos -o %s -arch %s' % (self.binary, self.arch)
|
| + cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
|
| + self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
|
| +
|
| + def symbolize(self, addr, binary, offset):
|
| + """Overrides Symbolizer.symbolize."""
|
| + if self.binary != binary:
|
| + return None
|
| + atos_line = self.atos.convert('0x%x' % int(offset, 16))
|
| + while "got symbolicator for" in atos_line:
|
| + atos_line = self.atos.readline()
|
| + # A well-formed atos response looks like this:
|
| + # foo(type1, type2) (in object.name) (filename.cc:80)
|
| + match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
|
| + if DEBUG:
|
| + print 'atos_line: ', atos_line
|
| + if match:
|
| + function_name = match.group(1)
|
| + function_name = re.sub('\(.*?\)', '', function_name)
|
| + file_name = fix_filename(match.group(3))
|
| + return ['%s in %s %s' % (addr, function_name, file_name)]
|
| + else:
|
| + return ['%s in %s' % (addr, atos_line)]
|
| +
|
| +
|
| +# Chain several symbolizers so that if one symbolizer fails, we fall back
|
| +# to the next symbolizer in chain.
|
| +class ChainSymbolizer(Symbolizer):
|
| + def __init__(self, symbolizer_list):
|
| + super(ChainSymbolizer, self).__init__()
|
| + self.symbolizer_list = symbolizer_list
|
| +
|
| + def symbolize(self, addr, binary, offset):
|
| + """Overrides Symbolizer.symbolize."""
|
| + for symbolizer in self.symbolizer_list:
|
| + if symbolizer:
|
| + result = symbolizer.symbolize(addr, binary, offset)
|
| + if result:
|
| + return result
|
| + return None
|
| +
|
| + def append_symbolizer(self, symbolizer):
|
| + self.symbolizer_list.append(symbolizer)
|
| +
|
| +
|
| +def BreakpadSymbolizerFactory(binary):
|
| + suffix = os.getenv('BREAKPAD_SUFFIX')
|
| + if suffix:
|
| + filename = binary + suffix
|
| + if os.access(filename, os.F_OK):
|
| + return BreakpadSymbolizer(filename)
|
| + return None
|
| +
|
| +
|
| +def SystemSymbolizerFactory(system, addr, binary):
|
| + if system == 'Darwin':
|
| + return DarwinSymbolizer(addr, binary)
|
| + elif system == 'Linux':
|
| + return Addr2LineSymbolizer(binary)
|
| +
|
| +
|
| +class BreakpadSymbolizer(Symbolizer):
|
| + def __init__(self, filename):
|
| + super(BreakpadSymbolizer, self).__init__()
|
| + self.filename = filename
|
| + lines = file(filename).readlines()
|
| + self.files = []
|
| + self.symbols = {}
|
| + self.address_list = []
|
| + self.addresses = {}
|
| + # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
|
| + fragments = lines[0].rstrip().split()
|
| + self.arch = fragments[2]
|
| + self.debug_id = fragments[3]
|
| + self.binary = ' '.join(fragments[4:])
|
| + self.parse_lines(lines[1:])
|
| +
|
| + def parse_lines(self, lines):
|
| + cur_function_addr = ''
|
| + for line in lines:
|
| + fragments = line.split()
|
| + if fragments[0] == 'FILE':
|
| + assert int(fragments[1]) == len(self.files)
|
| + self.files.append(' '.join(fragments[2:]))
|
| + elif fragments[0] == 'PUBLIC':
|
| + self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
|
| + elif fragments[0] in ['CFI', 'STACK']:
|
| + pass
|
| + elif fragments[0] == 'FUNC':
|
| + cur_function_addr = int(fragments[1], 16)
|
| + if not cur_function_addr in self.symbols.keys():
|
| + self.symbols[cur_function_addr] = ' '.join(fragments[4:])
|
| + else:
|
| + # Line starting with an address.
|
| + addr = int(fragments[0], 16)
|
| + self.address_list.append(addr)
|
| + # Tuple of symbol address, size, line, file number.
|
| + self.addresses[addr] = (cur_function_addr,
|
| + int(fragments[1], 16),
|
| + int(fragments[2]),
|
| + int(fragments[3]))
|
| + self.address_list.sort()
|
| +
|
| + def get_sym_file_line(self, addr):
|
| + key = None
|
| + if addr in self.addresses.keys():
|
| + key = addr
|
| + else:
|
| + index = bisect.bisect_left(self.address_list, addr)
|
| + if index == 0:
|
| + return None
|
| + else:
|
| + key = self.address_list[index - 1]
|
| + sym_id, size, line_no, file_no = self.addresses[key]
|
| + symbol = self.symbols[sym_id]
|
| + filename = self.files[file_no]
|
| + if addr < key + size:
|
| + return symbol, filename, line_no
|
| + else:
|
| + return None
|
| +
|
| + def symbolize(self, addr, binary, offset):
|
| + if self.binary != binary:
|
| + return None
|
| + res = self.get_sym_file_line(int(offset, 16))
|
| + if res:
|
| + function_name, file_name, line_no = res
|
| + result = ['%s in %s %s:%d' % (
|
| + addr, function_name, file_name, line_no)]
|
| + print result
|
| + return result
|
| + else:
|
| + return None
|
| +
|
| +
|
| +class SymbolizationLoop(object):
|
| + def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
|
| + if sys.platform == 'win32':
|
| + # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
|
| + # even in sandboxed processes. Nothing needs to be done here.
|
| + self.process_line = self.process_line_echo
|
| + else:
|
| + # Used by clients who may want to supply a different binary name.
|
| + # E.g. in Chrome several binaries may share a single .dSYM.
|
| + self.binary_name_filter = binary_name_filter
|
| + self.dsym_hint_producer = dsym_hint_producer
|
| + self.system = os.uname()[0]
|
| + if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
|
| + raise Exception('Unknown system')
|
| + self.llvm_symbolizers = {}
|
| + self.last_llvm_symbolizer = None
|
| + self.dsym_hints = set([])
|
| + self.frame_no = 0
|
| + self.process_line = self.process_line_posix
|
| +
|
| + def symbolize_address(self, addr, binary, offset):
|
| + # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
|
| + # a single symbolizer binary.
|
| + # On Darwin, if the dsym hint producer is present:
|
| + # 1. check whether we've seen this binary already; if so,
|
| + # use |llvm_symbolizers[binary]|, which has already loaded the debug
|
| + # info for this binary (might not be the case for
|
| + # |last_llvm_symbolizer|);
|
| + # 2. otherwise check if we've seen all the hints for this binary already;
|
| + # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
|
| + # 3. otherwise create a new symbolizer and pass all currently known
|
| + # .dSYM hints to it.
|
| + if not binary in self.llvm_symbolizers:
|
| + use_new_symbolizer = True
|
| + if self.system == 'Darwin' and self.dsym_hint_producer:
|
| + dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
|
| + use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
|
| + self.dsym_hints |= dsym_hints_for_binary
|
| + if self.last_llvm_symbolizer and not use_new_symbolizer:
|
| + self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
|
| + else:
|
| + self.last_llvm_symbolizer = LLVMSymbolizerFactory(
|
| + self.system, guess_arch(addr), self.dsym_hints)
|
| + self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
|
| + # Use the chain of symbolizers:
|
| + # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
|
| + # (fall back to next symbolizer if the previous one fails).
|
| + if not binary in symbolizers:
|
| + symbolizers[binary] = ChainSymbolizer(
|
| + [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
|
| + result = symbolizers[binary].symbolize(addr, binary, offset)
|
| + if result is None:
|
| + # Initialize system symbolizer only if other symbolizers failed.
|
| + symbolizers[binary].append_symbolizer(
|
| + SystemSymbolizerFactory(self.system, addr, binary))
|
| + result = symbolizers[binary].symbolize(addr, binary, offset)
|
| + # The system symbolizer must produce some result.
|
| + assert result
|
| + return result
|
| +
|
| + def get_symbolized_lines(self, symbolized_lines):
|
| + if not symbolized_lines:
|
| + return [self.current_line]
|
| + else:
|
| + result = []
|
| + for symbolized_frame in symbolized_lines:
|
| + result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
|
| + self.frame_no += 1
|
| + return result
|
| +
|
| + def process_logfile(self):
|
| + self.frame_no = 0
|
| + for line in logfile:
|
| + processed = self.process_line(line)
|
| + print '\n'.join(processed)
|
| +
|
| + def process_line_echo(self, line):
|
| + return [line.rstrip()]
|
| +
|
| + def process_line_posix(self, line):
|
| + self.current_line = line.rstrip()
|
| + #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
|
| + stack_trace_line_format = (
|
| + '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
|
| + match = re.match(stack_trace_line_format, line)
|
| + if not match:
|
| + return [self.current_line]
|
| + if DEBUG:
|
| + print line
|
| + _, frameno_str, addr, binary, offset = match.groups()
|
| + if frameno_str == '0':
|
| + # Assume that frame #0 is the first frame of new stack trace.
|
| + self.frame_no = 0
|
| + original_binary = binary
|
| + if self.binary_name_filter:
|
| + binary = self.binary_name_filter(binary)
|
| + symbolized_line = self.symbolize_address(addr, binary, offset)
|
| + if not symbolized_line:
|
| + if original_binary != binary:
|
| + symbolized_line = self.symbolize_address(addr, binary, offset)
|
| + return self.get_symbolized_lines(symbolized_line)
|
| +
|
| +
|
| +if __name__ == '__main__':
|
| + parser = argparse.ArgumentParser(
|
| + formatter_class=argparse.RawDescriptionHelpFormatter,
|
| + description='ASan symbolization script',
|
| + epilog='Example of use:\n'
|
| + 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
|
| + '-s "$HOME/SymbolFiles" < asan.log')
|
| + parser.add_argument('path_to_cut', nargs='*',
|
| + help='pattern to be cut from the result file path ')
|
| + parser.add_argument('-d','--demangle', action='store_true',
|
| + help='demangle function names')
|
| + parser.add_argument('-s', metavar='SYSROOT',
|
| + help='set path to sysroot for sanitized binaries')
|
| + parser.add_argument('-c', metavar='CROSS_COMPILE',
|
| + help='set prefix for binutils')
|
| + parser.add_argument('-l','--logfile', default=sys.stdin,
|
| + type=argparse.FileType('r'),
|
| + help='set log file name to parse, default is stdin')
|
| + args = parser.parse_args()
|
| + if args.path_to_cut:
|
| + fix_filename_patterns = args.path_to_cut
|
| + if args.demangle:
|
| + demangle = True
|
| + if args.s:
|
| + binary_name_filter = sysroot_path_filter
|
| + sysroot_path = args.s
|
| + if args.c:
|
| + binutils_prefix = args.c
|
| + if args.logfile:
|
| + logfile = args.logfile
|
| + else:
|
| + logfile = sys.stdin
|
| + loop = SymbolizationLoop(binary_name_filter)
|
| + loop.process_logfile()
|
|
|