| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# | 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| 3 # | 3 # |
| 4 # The LLVM Compiler Infrastructure | 4 # The LLVM Compiler Infrastructure |
| 5 # | 5 # |
| 6 # This file is distributed under the University of Illinois Open Source | 6 # This file is distributed under the University of Illinois Open Source |
| 7 # License. See LICENSE.TXT for details. | 7 # License. See LICENSE.TXT for details. |
| 8 # | 8 # |
| 9 #===------------------------------------------------------------------------===# | 9 #===------------------------------------------------------------------------===# |
| 10 import argparse |
| 10 import bisect | 11 import bisect |
| 11 import getopt | 12 import getopt |
| 12 import os | 13 import os |
| 13 import pty | 14 import pty |
| 14 import re | 15 import re |
| 15 import subprocess | 16 import subprocess |
| 16 import sys | 17 import sys |
| 17 import termios | 18 import termios |
| 18 | 19 |
| 19 symbolizers = {} | 20 symbolizers = {} |
| 20 DEBUG = False | 21 DEBUG = False |
| 21 demangle = False; | 22 demangle = False |
| 22 | 23 binutils_prefix = None |
| 24 sysroot_path = None |
| 25 binary_name_filter = None |
| 26 fix_filename_patterns = None |
| 27 logfile = None |
| 23 | 28 |
| 24 # FIXME: merge the code that calls fix_filename(). | 29 # FIXME: merge the code that calls fix_filename(). |
| 25 def fix_filename(file_name): | 30 def fix_filename(file_name): |
| 26 for path_to_cut in sys.argv[1:]: | 31 if fix_filename_patterns: |
| 27 file_name = re.sub('.*' + path_to_cut, '', file_name) | 32 for path_to_cut in fix_filename_patterns: |
| 33 file_name = re.sub('.*' + path_to_cut, '', file_name) |
| 28 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) | 34 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) |
| 29 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) | 35 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) |
| 30 return file_name | 36 return file_name |
| 31 | 37 |
| 32 def GuessArch(addr): | 38 def sysroot_path_filter(binary_name): |
| 39 return sysroot_path + binary_name |
| 40 |
| 41 def guess_arch(addr): |
| 33 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. | 42 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. |
| 34 if len(addr) > 10: | 43 if len(addr) > 10: |
| 35 return 'x86_64' | 44 return 'x86_64' |
| 36 else: | 45 else: |
| 37 return 'i386' | 46 return 'i386' |
| 38 | 47 |
| 39 class Symbolizer(object): | 48 class Symbolizer(object): |
| 40 def __init__(self): | 49 def __init__(self): |
| 41 pass | 50 pass |
| 42 | 51 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 53 the code locations for this instruction (that is, function name, file | 62 the code locations for this instruction (that is, function name, file |
| 54 name, line and column numbers). | 63 name, line and column numbers). |
| 55 """ | 64 """ |
| 56 return None | 65 return None |
| 57 | 66 |
| 58 | 67 |
| 59 class LLVMSymbolizer(Symbolizer): | 68 class LLVMSymbolizer(Symbolizer): |
| 60 def __init__(self, symbolizer_path, addr): | 69 def __init__(self, symbolizer_path, addr): |
| 61 super(LLVMSymbolizer, self).__init__() | 70 super(LLVMSymbolizer, self).__init__() |
| 62 self.symbolizer_path = symbolizer_path | 71 self.symbolizer_path = symbolizer_path |
| 63 self.default_arch = GuessArch(addr) | 72 self.default_arch = guess_arch(addr) |
| 64 self.pipe = self.open_llvm_symbolizer() | 73 self.pipe = self.open_llvm_symbolizer() |
| 65 | 74 |
| 66 def open_llvm_symbolizer(self): | 75 def open_llvm_symbolizer(self): |
| 67 cmd = [self.symbolizer_path, | 76 cmd = [self.symbolizer_path, |
| 68 '--use-symbol-table=true', | 77 '--use-symbol-table=true', |
| 69 '--demangle=%s' % demangle, | 78 '--demangle=%s' % demangle, |
| 70 '--functions=short', | 79 '--functions=short', |
| 71 '--inlining=true', | 80 '--inlining=true', |
| 72 '--default-arch=%s' % self.default_arch] | 81 '--default-arch=%s' % self.default_arch] |
| 73 if DEBUG: | 82 if DEBUG: |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 117 return LLVMSymbolizer(symbolizer_path, addr) | 126 return LLVMSymbolizer(symbolizer_path, addr) |
| 118 | 127 |
| 119 | 128 |
| 120 class Addr2LineSymbolizer(Symbolizer): | 129 class Addr2LineSymbolizer(Symbolizer): |
| 121 def __init__(self, binary): | 130 def __init__(self, binary): |
| 122 super(Addr2LineSymbolizer, self).__init__() | 131 super(Addr2LineSymbolizer, self).__init__() |
| 123 self.binary = binary | 132 self.binary = binary |
| 124 self.pipe = self.open_addr2line() | 133 self.pipe = self.open_addr2line() |
| 125 | 134 |
| 126 def open_addr2line(self): | 135 def open_addr2line(self): |
| 127 cmd = ['addr2line', '-f'] | 136 addr2line_tool = 'addr2line' |
| 137 if binutils_prefix: |
| 138 addr2line_tool = binutils_prefix + addr2line_tool |
| 139 cmd = [addr2line_tool, '-f'] |
| 128 if demangle: | 140 if demangle: |
| 129 cmd += ['--demangle'] | 141 cmd += ['--demangle'] |
| 130 cmd += ['-e', self.binary] | 142 cmd += ['-e', self.binary] |
| 131 if DEBUG: | 143 if DEBUG: |
| 132 print ' '.join(cmd) | 144 print ' '.join(cmd) |
| 133 return subprocess.Popen(cmd, | 145 return subprocess.Popen(cmd, |
| 134 stdin=subprocess.PIPE, stdout=subprocess.PIPE) | 146 stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 135 | 147 |
| 136 def symbolize(self, addr, binary, offset): | 148 def symbolize(self, addr, binary, offset): |
| 137 """Overrides Symbolizer.symbolize.""" | 149 """Overrides Symbolizer.symbolize.""" |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 175 return self.readline() | 187 return self.readline() |
| 176 | 188 |
| 177 def readline(self): | 189 def readline(self): |
| 178 return self.r.readline().rstrip() | 190 return self.r.readline().rstrip() |
| 179 | 191 |
| 180 | 192 |
| 181 class DarwinSymbolizer(Symbolizer): | 193 class DarwinSymbolizer(Symbolizer): |
| 182 def __init__(self, addr, binary): | 194 def __init__(self, addr, binary): |
| 183 super(DarwinSymbolizer, self).__init__() | 195 super(DarwinSymbolizer, self).__init__() |
| 184 self.binary = binary | 196 self.binary = binary |
| 185 self.arch = GuessArch(addr) | 197 self.arch = guess_arch(addr) |
| 186 self.open_atos() | 198 self.open_atos() |
| 187 | 199 |
| 188 def open_atos(self): | 200 def open_atos(self): |
| 189 if DEBUG: | 201 if DEBUG: |
| 190 print 'atos -o %s -arch %s' % (self.binary, self.arch) | 202 print 'atos -o %s -arch %s' % (self.binary, self.arch) |
| 191 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] | 203 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] |
| 192 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) | 204 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) |
| 193 | 205 |
| 194 def symbolize(self, addr, binary, offset): | 206 def symbolize(self, addr, binary, offset): |
| 195 """Overrides Symbolizer.symbolize.""" | 207 """Overrides Symbolizer.symbolize.""" |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 321 else: | 333 else: |
| 322 return None | 334 return None |
| 323 | 335 |
| 324 | 336 |
| 325 class SymbolizationLoop(object): | 337 class SymbolizationLoop(object): |
| 326 def __init__(self, binary_name_filter=None): | 338 def __init__(self, binary_name_filter=None): |
| 327 # Used by clients who may want to supply a different binary name. | 339 # Used by clients who may want to supply a different binary name. |
| 328 # E.g. in Chrome several binaries may share a single .dSYM. | 340 # E.g. in Chrome several binaries may share a single .dSYM. |
| 329 self.binary_name_filter = binary_name_filter | 341 self.binary_name_filter = binary_name_filter |
| 330 self.system = os.uname()[0] | 342 self.system = os.uname()[0] |
| 331 if self.system not in ['Linux', 'Darwin']: | 343 if self.system not in ['Linux', 'Darwin', 'FreeBSD']: |
| 332 raise Exception('Unknown system') | 344 raise Exception('Unknown system') |
| 333 self.llvm_symbolizer = None | 345 self.llvm_symbolizer = None |
| 334 self.frame_no = 0 | 346 self.frame_no = 0 |
| 335 | 347 |
| 336 def symbolize_address(self, addr, binary, offset): | 348 def symbolize_address(self, addr, binary, offset): |
| 337 # Initialize llvm-symbolizer lazily. | 349 # Initialize llvm-symbolizer lazily. |
| 338 if not self.llvm_symbolizer: | 350 if not self.llvm_symbolizer: |
| 339 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr) | 351 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr) |
| 340 # Use the chain of symbolizers: | 352 # Use the chain of symbolizers: |
| 341 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos | 353 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
| (...skipping 14 matching lines...) Expand all Loading... |
| 356 def get_symbolized_lines(self, symbolized_lines): | 368 def get_symbolized_lines(self, symbolized_lines): |
| 357 if not symbolized_lines: | 369 if not symbolized_lines: |
| 358 return [self.current_line] | 370 return [self.current_line] |
| 359 else: | 371 else: |
| 360 result = [] | 372 result = [] |
| 361 for symbolized_frame in symbolized_lines: | 373 for symbolized_frame in symbolized_lines: |
| 362 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstri
p())) | 374 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstri
p())) |
| 363 self.frame_no += 1 | 375 self.frame_no += 1 |
| 364 return result | 376 return result |
| 365 | 377 |
| 366 def process_stdin(self): | 378 def process_logfile(self): |
| 367 self.frame_no = 0 | 379 self.frame_no = 0 |
| 368 while True: | 380 while True: |
| 369 line = sys.stdin.readline() | 381 line = logfile.readline() |
| 370 if not line: | 382 if not line: |
| 371 break | 383 break |
| 372 processed = self.process_line(line) | 384 processed = self.process_line(line) |
| 373 print ''.join(processed) | 385 print '\n'.join(processed) |
| 374 | 386 |
| 375 def process_line(self, line): | 387 def process_line(self, line): |
| 376 self.current_line = line.rstrip() | 388 self.current_line = line.rstrip() |
| 377 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) | 389 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| 378 stack_trace_line_format = ( | 390 stack_trace_line_format = ( |
| 379 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') | 391 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') |
| 380 match = re.match(stack_trace_line_format, line) | 392 match = re.match(stack_trace_line_format, line) |
| 381 if not match: | 393 if not match: |
| 382 return [self.current_line] | 394 return [self.current_line] |
| 383 if DEBUG: | 395 if DEBUG: |
| 384 print line | 396 print line |
| 385 _, frameno_str, addr, binary, offset = match.groups() | 397 _, frameno_str, addr, binary, offset = match.groups() |
| 386 if frameno_str == '0': | 398 if frameno_str == '0': |
| 387 # Assume that frame #0 is the first frame of new stack trace. | 399 # Assume that frame #0 is the first frame of new stack trace. |
| 388 self.frame_no = 0 | 400 self.frame_no = 0 |
| 389 original_binary = binary | 401 original_binary = binary |
| 390 if self.binary_name_filter: | 402 if self.binary_name_filter: |
| 391 binary = self.binary_name_filter(binary) | 403 binary = self.binary_name_filter(binary) |
| 392 symbolized_line = self.symbolize_address(addr, binary, offset) | 404 symbolized_line = self.symbolize_address(addr, binary, offset) |
| 393 if not symbolized_line: | 405 if not symbolized_line: |
| 394 if original_binary != binary: | 406 if original_binary != binary: |
| 395 symbolized_line = self.symbolize_address(addr, binary, offset) | 407 symbolized_line = self.symbolize_address(addr, binary, offset) |
| 396 return self.get_symbolized_lines(symbolized_line) | 408 return self.get_symbolized_lines(symbolized_line) |
| 397 | 409 |
| 398 | 410 |
| 399 if __name__ == '__main__': | 411 if __name__ == '__main__': |
| 400 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) | 412 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFo
rmatter, |
| 401 for o, a in opts: | 413 description='ASan symbolization script', |
| 402 if o in ("-d", "--demangle"): | 414 epilog='''Example of use: |
| 403 demangle = True; | 415 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/Symbol
Files" < asan.log''') |
| 404 loop = SymbolizationLoop() | 416 parser.add_argument('path_to_cut', nargs='*', |
| 405 loop.process_stdin() | 417 help='pattern to be cut from the result file path ') |
| 418 parser.add_argument('-d','--demangle', action='store_true', |
| 419 help='demangle function names') |
| 420 parser.add_argument('-s', metavar='SYSROOT', |
| 421 help='set path to sysroot for sanitized binaries') |
| 422 parser.add_argument('-c', metavar='CROSS_COMPILE', |
| 423 help='set prefix for binutils') |
| 424 parser.add_argument('-l','--logfile', default=sys.stdin, type=argparse.FileTyp
e('r'), |
| 425 help='set log file name to parse, default is stdin') |
| 426 args = parser.parse_args() |
| 427 if args.path_to_cut: |
| 428 fix_filename_patterns = args.path_to_cut |
| 429 if args.demangle: |
| 430 demangle = True |
| 431 if args.s: |
| 432 binary_name_filter = sysroot_path_filter |
| 433 sysroot_path = args.s |
| 434 if args.c: |
| 435 binutils_prefix = args.c |
| 436 if args.logfile: |
| 437 logfile = args.logfile |
| 438 else: |
| 439 logfile = sys.stdin |
| 440 loop = SymbolizationLoop(binary_name_filter) |
| 441 loop.process_logfile() |
| OLD | NEW |