OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| 3 # |
| 4 # The LLVM Compiler Infrastructure |
| 5 # |
| 6 # This file is distributed under the University of Illinois Open Source |
| 7 # License. See LICENSE.TXT for details. |
| 8 # |
| 9 #===------------------------------------------------------------------------===# |
| 10 import argparse |
| 11 import bisect |
| 12 import getopt |
| 13 import os |
| 14 import re |
| 15 import subprocess |
| 16 import sys |
| 17 |
| 18 symbolizers = {} |
| 19 DEBUG = False |
| 20 demangle = False |
| 21 binutils_prefix = None |
| 22 sysroot_path = None |
| 23 binary_name_filter = None |
| 24 fix_filename_patterns = None |
| 25 logfile = sys.stdin |
| 26 |
| 27 # FIXME: merge the code that calls fix_filename(). |
| 28 def fix_filename(file_name): |
| 29 if fix_filename_patterns: |
| 30 for path_to_cut in fix_filename_patterns: |
| 31 file_name = re.sub('.*' + path_to_cut, '', file_name) |
| 32 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) |
| 33 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) |
| 34 return file_name |
| 35 |
| 36 def sysroot_path_filter(binary_name): |
| 37 return sysroot_path + binary_name |
| 38 |
| 39 def guess_arch(addr): |
| 40 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. |
| 41 if len(addr) > 10: |
| 42 return 'x86_64' |
| 43 else: |
| 44 return 'i386' |
| 45 |
| 46 class Symbolizer(object): |
| 47 def __init__(self): |
| 48 pass |
| 49 |
| 50 def symbolize(self, addr, binary, offset): |
| 51 """Symbolize the given address (pair of binary and offset). |
| 52 |
| 53 Overriden in subclasses. |
| 54 Args: |
| 55 addr: virtual address of an instruction. |
| 56 binary: path to executable/shared object containing this instruction. |
| 57 offset: instruction offset in the @binary. |
| 58 Returns: |
| 59 list of strings (one string for each inlined frame) describing |
| 60 the code locations for this instruction (that is, function name, file |
| 61 name, line and column numbers). |
| 62 """ |
| 63 return None |
| 64 |
| 65 |
| 66 class LLVMSymbolizer(Symbolizer): |
| 67 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): |
| 68 super(LLVMSymbolizer, self).__init__() |
| 69 self.symbolizer_path = symbolizer_path |
| 70 self.default_arch = default_arch |
| 71 self.system = system |
| 72 self.dsym_hints = dsym_hints |
| 73 self.pipe = self.open_llvm_symbolizer() |
| 74 |
| 75 def open_llvm_symbolizer(self): |
| 76 cmd = [self.symbolizer_path, |
| 77 '--use-symbol-table=true', |
| 78 '--demangle=%s' % demangle, |
| 79 '--functions=short', |
| 80 '--inlining=true', |
| 81 '--default-arch=%s' % self.default_arch] |
| 82 if self.system == 'Darwin': |
| 83 for hint in self.dsym_hints: |
| 84 cmd.append('--dsym-hint=%s' % hint) |
| 85 if DEBUG: |
| 86 print ' '.join(cmd) |
| 87 try: |
| 88 result = subprocess.Popen(cmd, stdin=subprocess.PIPE, |
| 89 stdout=subprocess.PIPE) |
| 90 except OSError: |
| 91 result = None |
| 92 return result |
| 93 |
| 94 def symbolize(self, addr, binary, offset): |
| 95 """Overrides Symbolizer.symbolize.""" |
| 96 if not self.pipe: |
| 97 return None |
| 98 result = [] |
| 99 try: |
| 100 symbolizer_input = '"%s" %s' % (binary, offset) |
| 101 if DEBUG: |
| 102 print symbolizer_input |
| 103 print >> self.pipe.stdin, symbolizer_input |
| 104 while True: |
| 105 function_name = self.pipe.stdout.readline().rstrip() |
| 106 if not function_name: |
| 107 break |
| 108 file_name = self.pipe.stdout.readline().rstrip() |
| 109 file_name = fix_filename(file_name) |
| 110 if (not function_name.startswith('??') or |
| 111 not file_name.startswith('??')): |
| 112 # Append only non-trivial frames. |
| 113 result.append('%s in %s %s' % (addr, function_name, |
| 114 file_name)) |
| 115 except Exception: |
| 116 result = [] |
| 117 if not result: |
| 118 result = None |
| 119 return result |
| 120 |
| 121 |
| 122 def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): |
| 123 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') |
| 124 if not symbolizer_path: |
| 125 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') |
| 126 if not symbolizer_path: |
| 127 # Assume llvm-symbolizer is in PATH. |
| 128 symbolizer_path = 'llvm-symbolizer' |
| 129 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) |
| 130 |
| 131 |
| 132 class Addr2LineSymbolizer(Symbolizer): |
| 133 def __init__(self, binary): |
| 134 super(Addr2LineSymbolizer, self).__init__() |
| 135 self.binary = binary |
| 136 self.pipe = self.open_addr2line() |
| 137 |
| 138 def open_addr2line(self): |
| 139 addr2line_tool = 'addr2line' |
| 140 if binutils_prefix: |
| 141 addr2line_tool = binutils_prefix + addr2line_tool |
| 142 cmd = [addr2line_tool, '-f'] |
| 143 if demangle: |
| 144 cmd += ['--demangle'] |
| 145 cmd += ['-e', self.binary] |
| 146 if DEBUG: |
| 147 print ' '.join(cmd) |
| 148 return subprocess.Popen(cmd, |
| 149 stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 150 |
| 151 def symbolize(self, addr, binary, offset): |
| 152 """Overrides Symbolizer.symbolize.""" |
| 153 if self.binary != binary: |
| 154 return None |
| 155 try: |
| 156 print >> self.pipe.stdin, offset |
| 157 function_name = self.pipe.stdout.readline().rstrip() |
| 158 file_name = self.pipe.stdout.readline().rstrip() |
| 159 except Exception: |
| 160 function_name = '' |
| 161 file_name = '' |
| 162 file_name = fix_filename(file_name) |
| 163 return ['%s in %s %s' % (addr, function_name, file_name)] |
| 164 |
| 165 |
| 166 class UnbufferedLineConverter(object): |
| 167 """ |
| 168 Wrap a child process that responds to each line of input with one line of |
| 169 output. Uses pty to trick the child into providing unbuffered output. |
| 170 """ |
| 171 def __init__(self, args, close_stderr=False): |
| 172 # Local imports so that the script can start on Windows. |
| 173 import pty |
| 174 import termios |
| 175 pid, fd = pty.fork() |
| 176 if pid == 0: |
| 177 # We're the child. Transfer control to command. |
| 178 if close_stderr: |
| 179 dev_null = os.open('/dev/null', 0) |
| 180 os.dup2(dev_null, 2) |
| 181 os.execvp(args[0], args) |
| 182 else: |
| 183 # Disable echoing. |
| 184 attr = termios.tcgetattr(fd) |
| 185 attr[3] = attr[3] & ~termios.ECHO |
| 186 termios.tcsetattr(fd, termios.TCSANOW, attr) |
| 187 # Set up a file()-like interface to the child process |
| 188 self.r = os.fdopen(fd, "r", 1) |
| 189 self.w = os.fdopen(os.dup(fd), "w", 1) |
| 190 |
| 191 def convert(self, line): |
| 192 self.w.write(line + "\n") |
| 193 return self.readline() |
| 194 |
| 195 def readline(self): |
| 196 return self.r.readline().rstrip() |
| 197 |
| 198 |
| 199 class DarwinSymbolizer(Symbolizer): |
| 200 def __init__(self, addr, binary): |
| 201 super(DarwinSymbolizer, self).__init__() |
| 202 self.binary = binary |
| 203 self.arch = guess_arch(addr) |
| 204 self.open_atos() |
| 205 |
| 206 def open_atos(self): |
| 207 if DEBUG: |
| 208 print 'atos -o %s -arch %s' % (self.binary, self.arch) |
| 209 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] |
| 210 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) |
| 211 |
| 212 def symbolize(self, addr, binary, offset): |
| 213 """Overrides Symbolizer.symbolize.""" |
| 214 if self.binary != binary: |
| 215 return None |
| 216 atos_line = self.atos.convert('0x%x' % int(offset, 16)) |
| 217 while "got symbolicator for" in atos_line: |
| 218 atos_line = self.atos.readline() |
| 219 # A well-formed atos response looks like this: |
| 220 # foo(type1, type2) (in object.name) (filename.cc:80) |
| 221 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
| 222 if DEBUG: |
| 223 print 'atos_line: ', atos_line |
| 224 if match: |
| 225 function_name = match.group(1) |
| 226 function_name = re.sub('\(.*?\)', '', function_name) |
| 227 file_name = fix_filename(match.group(3)) |
| 228 return ['%s in %s %s' % (addr, function_name, file_name)] |
| 229 else: |
| 230 return ['%s in %s' % (addr, atos_line)] |
| 231 |
| 232 |
| 233 # Chain several symbolizers so that if one symbolizer fails, we fall back |
| 234 # to the next symbolizer in chain. |
| 235 class ChainSymbolizer(Symbolizer): |
| 236 def __init__(self, symbolizer_list): |
| 237 super(ChainSymbolizer, self).__init__() |
| 238 self.symbolizer_list = symbolizer_list |
| 239 |
| 240 def symbolize(self, addr, binary, offset): |
| 241 """Overrides Symbolizer.symbolize.""" |
| 242 for symbolizer in self.symbolizer_list: |
| 243 if symbolizer: |
| 244 result = symbolizer.symbolize(addr, binary, offset) |
| 245 if result: |
| 246 return result |
| 247 return None |
| 248 |
| 249 def append_symbolizer(self, symbolizer): |
| 250 self.symbolizer_list.append(symbolizer) |
| 251 |
| 252 |
| 253 def BreakpadSymbolizerFactory(binary): |
| 254 suffix = os.getenv('BREAKPAD_SUFFIX') |
| 255 if suffix: |
| 256 filename = binary + suffix |
| 257 if os.access(filename, os.F_OK): |
| 258 return BreakpadSymbolizer(filename) |
| 259 return None |
| 260 |
| 261 |
| 262 def SystemSymbolizerFactory(system, addr, binary): |
| 263 if system == 'Darwin': |
| 264 return DarwinSymbolizer(addr, binary) |
| 265 elif system == 'Linux': |
| 266 return Addr2LineSymbolizer(binary) |
| 267 |
| 268 |
| 269 class BreakpadSymbolizer(Symbolizer): |
| 270 def __init__(self, filename): |
| 271 super(BreakpadSymbolizer, self).__init__() |
| 272 self.filename = filename |
| 273 lines = file(filename).readlines() |
| 274 self.files = [] |
| 275 self.symbols = {} |
| 276 self.address_list = [] |
| 277 self.addresses = {} |
| 278 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t |
| 279 fragments = lines[0].rstrip().split() |
| 280 self.arch = fragments[2] |
| 281 self.debug_id = fragments[3] |
| 282 self.binary = ' '.join(fragments[4:]) |
| 283 self.parse_lines(lines[1:]) |
| 284 |
| 285 def parse_lines(self, lines): |
| 286 cur_function_addr = '' |
| 287 for line in lines: |
| 288 fragments = line.split() |
| 289 if fragments[0] == 'FILE': |
| 290 assert int(fragments[1]) == len(self.files) |
| 291 self.files.append(' '.join(fragments[2:])) |
| 292 elif fragments[0] == 'PUBLIC': |
| 293 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) |
| 294 elif fragments[0] in ['CFI', 'STACK']: |
| 295 pass |
| 296 elif fragments[0] == 'FUNC': |
| 297 cur_function_addr = int(fragments[1], 16) |
| 298 if not cur_function_addr in self.symbols.keys(): |
| 299 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) |
| 300 else: |
| 301 # Line starting with an address. |
| 302 addr = int(fragments[0], 16) |
| 303 self.address_list.append(addr) |
| 304 # Tuple of symbol address, size, line, file number. |
| 305 self.addresses[addr] = (cur_function_addr, |
| 306 int(fragments[1], 16), |
| 307 int(fragments[2]), |
| 308 int(fragments[3])) |
| 309 self.address_list.sort() |
| 310 |
| 311 def get_sym_file_line(self, addr): |
| 312 key = None |
| 313 if addr in self.addresses.keys(): |
| 314 key = addr |
| 315 else: |
| 316 index = bisect.bisect_left(self.address_list, addr) |
| 317 if index == 0: |
| 318 return None |
| 319 else: |
| 320 key = self.address_list[index - 1] |
| 321 sym_id, size, line_no, file_no = self.addresses[key] |
| 322 symbol = self.symbols[sym_id] |
| 323 filename = self.files[file_no] |
| 324 if addr < key + size: |
| 325 return symbol, filename, line_no |
| 326 else: |
| 327 return None |
| 328 |
| 329 def symbolize(self, addr, binary, offset): |
| 330 if self.binary != binary: |
| 331 return None |
| 332 res = self.get_sym_file_line(int(offset, 16)) |
| 333 if res: |
| 334 function_name, file_name, line_no = res |
| 335 result = ['%s in %s %s:%d' % ( |
| 336 addr, function_name, file_name, line_no)] |
| 337 print result |
| 338 return result |
| 339 else: |
| 340 return None |
| 341 |
| 342 |
| 343 class SymbolizationLoop(object): |
| 344 def __init__(self, binary_name_filter=None, dsym_hint_producer=None): |
| 345 if sys.platform == 'win32': |
| 346 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works |
| 347 # even in sandboxed processes. Nothing needs to be done here. |
| 348 self.process_line = self.process_line_echo |
| 349 else: |
| 350 # Used by clients who may want to supply a different binary name. |
| 351 # E.g. in Chrome several binaries may share a single .dSYM. |
| 352 self.binary_name_filter = binary_name_filter |
| 353 self.dsym_hint_producer = dsym_hint_producer |
| 354 self.system = os.uname()[0] |
| 355 if self.system not in ['Linux', 'Darwin', 'FreeBSD']: |
| 356 raise Exception('Unknown system') |
| 357 self.llvm_symbolizers = {} |
| 358 self.last_llvm_symbolizer = None |
| 359 self.dsym_hints = set([]) |
| 360 self.frame_no = 0 |
| 361 self.process_line = self.process_line_posix |
| 362 |
| 363 def symbolize_address(self, addr, binary, offset): |
| 364 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use |
| 365 # a single symbolizer binary. |
| 366 # On Darwin, if the dsym hint producer is present: |
| 367 # 1. check whether we've seen this binary already; if so, |
| 368 # use |llvm_symbolizers[binary]|, which has already loaded the debug |
| 369 # info for this binary (might not be the case for |
| 370 # |last_llvm_symbolizer|); |
| 371 # 2. otherwise check if we've seen all the hints for this binary already; |
| 372 # if so, reuse |last_llvm_symbolizer| which has the full set of hints; |
| 373 # 3. otherwise create a new symbolizer and pass all currently known |
| 374 # .dSYM hints to it. |
| 375 if not binary in self.llvm_symbolizers: |
| 376 use_new_symbolizer = True |
| 377 if self.system == 'Darwin' and self.dsym_hint_producer: |
| 378 dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) |
| 379 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) |
| 380 self.dsym_hints |= dsym_hints_for_binary |
| 381 if self.last_llvm_symbolizer and not use_new_symbolizer: |
| 382 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer |
| 383 else: |
| 384 self.last_llvm_symbolizer = LLVMSymbolizerFactory( |
| 385 self.system, guess_arch(addr), self.dsym_hints) |
| 386 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer |
| 387 # Use the chain of symbolizers: |
| 388 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
| 389 # (fall back to next symbolizer if the previous one fails). |
| 390 if not binary in symbolizers: |
| 391 symbolizers[binary] = ChainSymbolizer( |
| 392 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) |
| 393 result = symbolizers[binary].symbolize(addr, binary, offset) |
| 394 if result is None: |
| 395 # Initialize system symbolizer only if other symbolizers failed. |
| 396 symbolizers[binary].append_symbolizer( |
| 397 SystemSymbolizerFactory(self.system, addr, binary)) |
| 398 result = symbolizers[binary].symbolize(addr, binary, offset) |
| 399 # The system symbolizer must produce some result. |
| 400 assert result |
| 401 return result |
| 402 |
| 403 def get_symbolized_lines(self, symbolized_lines): |
| 404 if not symbolized_lines: |
| 405 return [self.current_line] |
| 406 else: |
| 407 result = [] |
| 408 for symbolized_frame in symbolized_lines: |
| 409 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstri
p())) |
| 410 self.frame_no += 1 |
| 411 return result |
| 412 |
| 413 def process_logfile(self): |
| 414 self.frame_no = 0 |
| 415 for line in logfile: |
| 416 processed = self.process_line(line) |
| 417 print '\n'.join(processed) |
| 418 |
| 419 def process_line_echo(self, line): |
| 420 return [line.rstrip()] |
| 421 |
| 422 def process_line_posix(self, line): |
| 423 self.current_line = line.rstrip() |
| 424 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| 425 stack_trace_line_format = ( |
| 426 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') |
| 427 match = re.match(stack_trace_line_format, line) |
| 428 if not match: |
| 429 return [self.current_line] |
| 430 if DEBUG: |
| 431 print line |
| 432 _, frameno_str, addr, binary, offset = match.groups() |
| 433 if frameno_str == '0': |
| 434 # Assume that frame #0 is the first frame of new stack trace. |
| 435 self.frame_no = 0 |
| 436 original_binary = binary |
| 437 if self.binary_name_filter: |
| 438 binary = self.binary_name_filter(binary) |
| 439 symbolized_line = self.symbolize_address(addr, binary, offset) |
| 440 if not symbolized_line: |
| 441 if original_binary != binary: |
| 442 symbolized_line = self.symbolize_address(addr, binary, offset) |
| 443 return self.get_symbolized_lines(symbolized_line) |
| 444 |
| 445 |
| 446 if __name__ == '__main__': |
| 447 parser = argparse.ArgumentParser( |
| 448 formatter_class=argparse.RawDescriptionHelpFormatter, |
| 449 description='ASan symbolization script', |
| 450 epilog='Example of use:\n' |
| 451 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" ' |
| 452 '-s "$HOME/SymbolFiles" < asan.log') |
| 453 parser.add_argument('path_to_cut', nargs='*', |
| 454 help='pattern to be cut from the result file path ') |
| 455 parser.add_argument('-d','--demangle', action='store_true', |
| 456 help='demangle function names') |
| 457 parser.add_argument('-s', metavar='SYSROOT', |
| 458 help='set path to sysroot for sanitized binaries') |
| 459 parser.add_argument('-c', metavar='CROSS_COMPILE', |
| 460 help='set prefix for binutils') |
| 461 parser.add_argument('-l','--logfile', default=sys.stdin, |
| 462 type=argparse.FileType('r'), |
| 463 help='set log file name to parse, default is stdin') |
| 464 args = parser.parse_args() |
| 465 if args.path_to_cut: |
| 466 fix_filename_patterns = args.path_to_cut |
| 467 if args.demangle: |
| 468 demangle = True |
| 469 if args.s: |
| 470 binary_name_filter = sysroot_path_filter |
| 471 sysroot_path = args.s |
| 472 if args.c: |
| 473 binutils_prefix = args.c |
| 474 if args.logfile: |
| 475 logfile = args.logfile |
| 476 else: |
| 477 logfile = sys.stdin |
| 478 loop = SymbolizationLoop(binary_name_filter) |
| 479 loop.process_logfile() |
OLD | NEW |