Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(242)

Unified Diff: tools/valgrind/asan/third_party/asan_symbolize.py

Issue 10987049: Move the symbolization code to tools/valgrind/asan/asan_symbolize_trunk.py (it's now a copy of the … (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/valgrind/asan/third_party/__init__.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/valgrind/asan/third_party/asan_symbolize.py
===================================================================
--- tools/valgrind/asan/third_party/asan_symbolize.py (revision 0)
+++ tools/valgrind/asan/third_party/asan_symbolize.py (revision 0)
@@ -0,0 +1,389 @@
+#!/usr/bin/env python
+#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+import bisect
+import os
+import re
+import subprocess
+import sys
+
+llvm_symbolizer = None
+symbolizers = {}
+filetypes = {}
+vmaddrs = {}
+DEBUG = False
+
+
+# FIXME: merge the code that calls fix_filename().
+def fix_filename(file_name):
+ for path_to_cut in sys.argv[1:]:
+ file_name = re.sub('.*' + path_to_cut, '', file_name)
+ file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
+ file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
+ return file_name
+
+
+class Symbolizer(object):
+ def __init__(self):
+ pass
+
+ def symbolize(self, addr, binary, offset):
+ """Symbolize the given address (pair of binary and offset).
+
+ Overriden in subclasses.
+ Args:
+ addr: virtual address of an instruction.
+ binary: path to executable/shared object containing this instruction.
+ offset: instruction offset in the @binary.
+ Returns:
+ list of strings (one string for each inlined frame) describing
+ the code locations for this instruction (that is, function name, file
+ name, line and column numbers).
+ """
+ return None
+
+
+class LLVMSymbolizer(Symbolizer):
+ def __init__(self, symbolizer_path):
+ super(LLVMSymbolizer, self).__init__()
+ self.symbolizer_path = symbolizer_path
+ self.pipe = self.open_llvm_symbolizer()
+
+ def open_llvm_symbolizer(self):
+ if not os.path.exists(self.symbolizer_path):
+ return None
+ cmd = [self.symbolizer_path,
+ '--use-symbol-table=true',
+ '--demangle=false',
+ '--functions=true',
+ '--inlining=true']
+ if DEBUG:
+ print ' '.join(cmd)
+ return subprocess.Popen(cmd, stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+
+ def symbolize(self, addr, binary, offset):
+ """Overrides Symbolizer.symbolize."""
+ if not self.pipe:
+ return None
+ result = []
+ try:
+ symbolizer_input = '%s %s' % (binary, offset)
+ if DEBUG:
+ print symbolizer_input
+ print >> self.pipe.stdin, symbolizer_input
+ while True:
+ function_name = self.pipe.stdout.readline().rstrip()
+ if not function_name:
+ break
+ file_name = self.pipe.stdout.readline().rstrip()
+ file_name = fix_filename(file_name)
+ if (not function_name.startswith('??') and
+ not file_name.startswith('??')):
+ # Append only valid frames.
+ result.append('%s in %s %s' % (addr, function_name,
+ file_name))
+ except Exception:
+ result = []
+ if not result:
+ result = None
+ return result
+
+
+def LLVMSymbolizerFactory(system):
+ if system == 'Linux':
+ symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
+ if not symbolizer_path:
+ # Assume llvm-symbolizer is in PATH.
+ symbolizer_path = 'llvm-symbolizer'
+ return LLVMSymbolizer(symbolizer_path)
+ return None
+
+
+class Addr2LineSymbolizer(Symbolizer):
+ def __init__(self, binary):
+ super(Addr2LineSymbolizer, self).__init__()
+ self.binary = binary
+ self.pipe = self.open_addr2line()
+
+ def open_addr2line(self):
+ cmd = ['addr2line', '-f', '-e', self.binary]
+ if DEBUG:
+ print ' '.join(cmd)
+ return subprocess.Popen(cmd,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+
+ def symbolize(self, addr, binary, offset):
+ """Overrides Symbolizer.symbolize."""
+ if self.binary != binary:
+ return None
+ try:
+ print >> self.pipe.stdin, offset
+ function_name = self.pipe.stdout.readline().rstrip()
+ file_name = self.pipe.stdout.readline().rstrip()
+ except Exception:
+ function_name = ''
+ file_name = ''
+ file_name = fix_filename(file_name)
+ return ['%s in %s %s' % (addr, function_name, file_name)]
+
+
+class DarwinSymbolizer(Symbolizer):
+ def __init__(self, addr, binary):
+ super(DarwinSymbolizer, self).__init__()
+ self.binary = binary
+ # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
+ if len(addr) > 10:
+ self.arch = 'x86_64'
+ else:
+ self.arch = 'i386'
+ self.vmaddr = None
+ self.pipe = None
+
+ def get_binary_vmaddr(self):
+ """Get the slide value to be added to the address.
+
+ We're looking for the following piece in otool -l output:
+ Load command 0
+ cmd LC_SEGMENT
+ cmdsize 736
+ segname __TEXT
+ vmaddr 0x00000000
+ """
+ if self.vmaddr:
+ return self.vmaddr
+ cmdline = ['otool', '-l', self.binary]
+ pipe = subprocess.Popen(cmdline,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ is_text = False
+ vmaddr = 0
+ for line in pipe.stdout:
+ line = line.strip()
+ if line.startswith('segname'):
+ is_text = (line == 'segname __TEXT')
+ continue
+ if line.startswith('vmaddr') and is_text:
+ sv = line.split(' ')
+ vmaddr = int(sv[-1], 16)
+ break
+ self.vmaddr = vmaddr
+ return self.vmaddr
+
+ def write_addr_to_pipe(self, offset):
+ slide = self.get_binary_vmaddr()
+ print >> self.pipe.stdin, '0x%x' % (int(offset, 16) + slide)
+
+ def open_atos(self):
+ if DEBUG:
+ print 'atos -o %s -arch %s' % (self.binary, self.arch)
+ cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
+ self.pipe = subprocess.Popen(cmdline,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ def symbolize(self, addr, binary, offset):
+ """Overrides Symbolizer.symbolize."""
+ if self.binary != binary:
+ return None
+ self.open_atos()
+ self.write_addr_to_pipe(offset)
+ self.pipe.stdin.close()
+ atos_line = self.pipe.stdout.readline().rstrip()
+ # A well-formed atos response looks like this:
+ # foo(type1, type2) (in object.name) (filename.cc:80)
+ match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
+ if DEBUG:
+ print 'atos_line: ', atos_line
+ if match:
+ function_name = match.group(1)
+ function_name = re.sub('\(.*?\)', '', function_name)
+ file_name = fix_filename(match.group(3))
+ return ['%s in %s %s' % (addr, function_name, file_name)]
+ else:
+ return ['%s in %s' % (addr, atos_line)]
+
+
+# Chain several symbolizers so that if one symbolizer fails, we fall back
+# to the next symbolizer in chain.
+class ChainSymbolizer(Symbolizer):
+ def __init__(self, symbolizer_list):
+ super(ChainSymbolizer, self).__init__()
+ self.symbolizer_list = symbolizer_list
+
+ def symbolize(self, addr, binary, offset):
+ """Overrides Symbolizer.symbolize."""
+ for symbolizer in self.symbolizer_list:
+ if symbolizer:
+ result = symbolizer.symbolize(addr, binary, offset)
+ if result:
+ return result
+ return None
+
+ def append_symbolizer(self, symbolizer):
+ self.symbolizer_list.append(symbolizer)
+
+
+def BreakpadSymbolizerFactory(binary):
+ suffix = os.getenv('BREAKPAD_SUFFIX')
+ if suffix:
+ filename = binary + suffix
+ if os.access(filename, os.F_OK):
+ return BreakpadSymbolizer(filename)
+ return None
+
+
+def SystemSymbolizerFactory(system, addr, binary):
+ if system == 'Darwin':
+ return DarwinSymbolizer(addr, binary)
+ elif system == 'Linux':
+ return Addr2LineSymbolizer(binary)
+
+
+class BreakpadSymbolizer(Symbolizer):
+ def __init__(self, filename):
+ super(BreakpadSymbolizer, self).__init__()
+ self.filename = filename
+ lines = file(filename).readlines()
+ self.files = []
+ self.symbols = {}
+ self.address_list = []
+ self.addresses = {}
+ # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
+ fragments = lines[0].rstrip().split()
+ self.arch = fragments[2]
+ self.debug_id = fragments[3]
+ self.binary = ' '.join(fragments[4:])
+ self.parse_lines(lines[1:])
+
+ def parse_lines(self, lines):
+ cur_function_addr = ''
+ for line in lines:
+ fragments = line.split()
+ if fragments[0] == 'FILE':
+ assert int(fragments[1]) == len(self.files)
+ self.files.append(' '.join(fragments[2:]))
+ elif fragments[0] == 'PUBLIC':
+ self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
+ elif fragments[0] in ['CFI', 'STACK']:
+ pass
+ elif fragments[0] == 'FUNC':
+ cur_function_addr = int(fragments[1], 16)
+ if not cur_function_addr in self.symbols.keys():
+ self.symbols[cur_function_addr] = ' '.join(fragments[4:])
+ else:
+ # Line starting with an address.
+ addr = int(fragments[0], 16)
+ self.address_list.append(addr)
+ # Tuple of symbol address, size, line, file number.
+ self.addresses[addr] = (cur_function_addr,
+ int(fragments[1], 16),
+ int(fragments[2]),
+ int(fragments[3]))
+ self.address_list.sort()
+
+ def get_sym_file_line(self, addr):
+ key = None
+ if addr in self.addresses.keys():
+ key = addr
+ else:
+ index = bisect.bisect_left(self.address_list, addr)
+ if index == 0:
+ return None
+ else:
+ key = self.address_list[index - 1]
+ sym_id, size, line_no, file_no = self.addresses[key]
+ symbol = self.symbols[sym_id]
+ filename = self.files[file_no]
+ if addr < key + size:
+ return symbol, filename, line_no
+ else:
+ return None
+
+ def symbolize(self, addr, binary, offset):
+ if self.binary != binary:
+ return None
+ res = self.get_sym_file_line(int(offset, 16))
+ if res:
+ function_name, file_name, line_no = res
+ result = ['%s in %s %s:%d' % (
+ addr, function_name, file_name, line_no)]
+ print result
+ return result
+ else:
+ return None
+
+
+class SymbolizationLoop(object):
+ def __init__(self, binary_name_filter=None):
+ # Used by clients who may want to supply a different binary name.
+ # E.g. in Chrome several binaries may share a single .dSYM.
+ self.binary_name_filter = binary_name_filter
+ self.system = os.uname()[0]
+ if self.system in ['Linux', 'Darwin']:
+ self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
+ else:
+ raise Exception('Unknown system')
+
+ def symbolize_address(self, addr, binary, offset):
+ # Use the chain of symbolizers:
+ # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
+ # (fall back to next symbolizer if the previous one fails).
+ if not binary in symbolizers:
+ symbolizers[binary] = ChainSymbolizer(
+ [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
+ result = symbolizers[binary].symbolize(addr, binary, offset)
+ if result is None:
+ # Initialize system symbolizer only if other symbolizers failed.
+ symbolizers[binary].append_symbolizer(
+ SystemSymbolizerFactory(self.system, addr, binary))
+ result = symbolizers[binary].symbolize(addr, binary, offset)
+ # The system symbolizer must produce some result.
+ assert result
+ return result
+
+ def print_symbolized_lines(self, symbolized_lines):
+ if not symbolized_lines:
+ print self.current_line
+ else:
+ for symbolized_frame in symbolized_lines:
+ print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
+ self.frame_no += 1
+
+ def process_stdin(self):
+ self.frame_no = 0
+ for line in sys.stdin:
+ self.current_line = line.rstrip()
+ #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
+ stack_trace_line_format = (
+ '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
+ match = re.match(stack_trace_line_format, line)
+ if not match:
+ print self.current_line
+ continue
+ if DEBUG:
+ print line
+ _, frameno_str, addr, binary, offset = match.groups()
+ if frameno_str == '0':
+ # Assume that frame #0 is the first frame of new stack trace.
+ self.frame_no = 0
+ original_binary = binary
+ if self.binary_name_filter:
+ binary = self.binary_name_filter(binary)
+ symbolized_line = self.symbolize_address(addr, binary, offset)
+ if not symbolized_line:
+ if original_binary != binary:
+ symbolized_line = self.symbolize_address(addr, binary, offset)
+ self.print_symbolized_lines(symbolized_line)
+
+
+if __name__ == '__main__':
+ loop = SymbolizationLoop()
+ loop.process_stdin()
Property changes on: tools/valgrind/asan/third_party/asan_symbolize.py
___________________________________________________________________
Added: svn:executable
+ *
Added: svn:eol-style
+ LF
« no previous file with comments | « tools/valgrind/asan/third_party/__init__.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698