OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# | 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
3 # | 3 # |
4 # The LLVM Compiler Infrastructure | 4 # The LLVM Compiler Infrastructure |
5 # | 5 # |
6 # This file is distributed under the University of Illinois Open Source | 6 # This file is distributed under the University of Illinois Open Source |
7 # License. See LICENSE.TXT for details. | 7 # License. See LICENSE.TXT for details. |
8 # | 8 # |
9 #===------------------------------------------------------------------------===# | 9 #===------------------------------------------------------------------------===# |
| 10 import argparse |
10 import bisect | 11 import bisect |
11 import getopt | 12 import getopt |
12 import os | 13 import os |
13 import pty | 14 import pty |
14 import re | 15 import re |
15 import subprocess | 16 import subprocess |
16 import sys | 17 import sys |
17 import termios | 18 import termios |
18 | 19 |
19 symbolizers = {} | 20 symbolizers = {} |
20 DEBUG = False | 21 DEBUG = False |
21 demangle = False; | 22 demangle = False |
22 | 23 binutils_prefix = None |
| 24 sysroot_path = None |
| 25 binary_name_filter = None |
| 26 fix_filename_patterns = None |
| 27 logfile = sys.stdin |
23 | 28 |
24 # FIXME: merge the code that calls fix_filename(). | 29 # FIXME: merge the code that calls fix_filename(). |
25 def fix_filename(file_name): | 30 def fix_filename(file_name): |
26 for path_to_cut in sys.argv[1:]: | 31 if fix_filename_patterns: |
27 file_name = re.sub('.*' + path_to_cut, '', file_name) | 32 for path_to_cut in fix_filename_patterns: |
| 33 file_name = re.sub('.*' + path_to_cut, '', file_name) |
28 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) | 34 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) |
29 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) | 35 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) |
30 return file_name | 36 return file_name |
31 | 37 |
32 def GuessArch(addr): | 38 def sysroot_path_filter(binary_name): |
| 39 return sysroot_path + binary_name |
| 40 |
| 41 def guess_arch(addr): |
33 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. | 42 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. |
34 if len(addr) > 10: | 43 if len(addr) > 10: |
35 return 'x86_64' | 44 return 'x86_64' |
36 else: | 45 else: |
37 return 'i386' | 46 return 'i386' |
38 | 47 |
39 class Symbolizer(object): | 48 class Symbolizer(object): |
40 def __init__(self): | 49 def __init__(self): |
41 pass | 50 pass |
42 | 51 |
(...skipping 10 matching lines...) Expand all Loading... |
53 the code locations for this instruction (that is, function name, file | 62 the code locations for this instruction (that is, function name, file |
54 name, line and column numbers). | 63 name, line and column numbers). |
55 """ | 64 """ |
56 return None | 65 return None |
57 | 66 |
58 | 67 |
59 class LLVMSymbolizer(Symbolizer): | 68 class LLVMSymbolizer(Symbolizer): |
60 def __init__(self, symbolizer_path, addr): | 69 def __init__(self, symbolizer_path, addr): |
61 super(LLVMSymbolizer, self).__init__() | 70 super(LLVMSymbolizer, self).__init__() |
62 self.symbolizer_path = symbolizer_path | 71 self.symbolizer_path = symbolizer_path |
63 self.default_arch = GuessArch(addr) | 72 self.default_arch = guess_arch(addr) |
64 self.pipe = self.open_llvm_symbolizer() | 73 self.pipe = self.open_llvm_symbolizer() |
65 | 74 |
66 def open_llvm_symbolizer(self): | 75 def open_llvm_symbolizer(self): |
67 cmd = [self.symbolizer_path, | 76 cmd = [self.symbolizer_path, |
68 '--use-symbol-table=true', | 77 '--use-symbol-table=true', |
69 '--demangle=%s' % demangle, | 78 '--demangle=%s' % demangle, |
70 '--functions=short', | 79 '--functions=short', |
71 '--inlining=true', | 80 '--inlining=true', |
72 '--default-arch=%s' % self.default_arch] | 81 '--default-arch=%s' % self.default_arch] |
73 if DEBUG: | 82 if DEBUG: |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
117 return LLVMSymbolizer(symbolizer_path, addr) | 126 return LLVMSymbolizer(symbolizer_path, addr) |
118 | 127 |
119 | 128 |
120 class Addr2LineSymbolizer(Symbolizer): | 129 class Addr2LineSymbolizer(Symbolizer): |
121 def __init__(self, binary): | 130 def __init__(self, binary): |
122 super(Addr2LineSymbolizer, self).__init__() | 131 super(Addr2LineSymbolizer, self).__init__() |
123 self.binary = binary | 132 self.binary = binary |
124 self.pipe = self.open_addr2line() | 133 self.pipe = self.open_addr2line() |
125 | 134 |
126 def open_addr2line(self): | 135 def open_addr2line(self): |
127 cmd = ['addr2line', '-f'] | 136 addr2line_tool = 'addr2line' |
| 137 if binutils_prefix: |
| 138 addr2line_tool = binutils_prefix + addr2line_tool |
| 139 cmd = [addr2line_tool, '-f'] |
128 if demangle: | 140 if demangle: |
129 cmd += ['--demangle'] | 141 cmd += ['--demangle'] |
130 cmd += ['-e', self.binary] | 142 cmd += ['-e', self.binary] |
131 if DEBUG: | 143 if DEBUG: |
132 print ' '.join(cmd) | 144 print ' '.join(cmd) |
133 return subprocess.Popen(cmd, | 145 return subprocess.Popen(cmd, |
134 stdin=subprocess.PIPE, stdout=subprocess.PIPE) | 146 stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
135 | 147 |
136 def symbolize(self, addr, binary, offset): | 148 def symbolize(self, addr, binary, offset): |
137 """Overrides Symbolizer.symbolize.""" | 149 """Overrides Symbolizer.symbolize.""" |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
175 return self.readline() | 187 return self.readline() |
176 | 188 |
177 def readline(self): | 189 def readline(self): |
178 return self.r.readline().rstrip() | 190 return self.r.readline().rstrip() |
179 | 191 |
180 | 192 |
181 class DarwinSymbolizer(Symbolizer): | 193 class DarwinSymbolizer(Symbolizer): |
182 def __init__(self, addr, binary): | 194 def __init__(self, addr, binary): |
183 super(DarwinSymbolizer, self).__init__() | 195 super(DarwinSymbolizer, self).__init__() |
184 self.binary = binary | 196 self.binary = binary |
185 self.arch = GuessArch(addr) | 197 self.arch = guess_arch(addr) |
186 self.open_atos() | 198 self.open_atos() |
187 | 199 |
188 def open_atos(self): | 200 def open_atos(self): |
189 if DEBUG: | 201 if DEBUG: |
190 print 'atos -o %s -arch %s' % (self.binary, self.arch) | 202 print 'atos -o %s -arch %s' % (self.binary, self.arch) |
191 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] | 203 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] |
192 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) | 204 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) |
193 | 205 |
194 def symbolize(self, addr, binary, offset): | 206 def symbolize(self, addr, binary, offset): |
195 """Overrides Symbolizer.symbolize.""" | 207 """Overrides Symbolizer.symbolize.""" |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
321 else: | 333 else: |
322 return None | 334 return None |
323 | 335 |
324 | 336 |
325 class SymbolizationLoop(object): | 337 class SymbolizationLoop(object): |
326 def __init__(self, binary_name_filter=None): | 338 def __init__(self, binary_name_filter=None): |
327 # Used by clients who may want to supply a different binary name. | 339 # Used by clients who may want to supply a different binary name. |
328 # E.g. in Chrome several binaries may share a single .dSYM. | 340 # E.g. in Chrome several binaries may share a single .dSYM. |
329 self.binary_name_filter = binary_name_filter | 341 self.binary_name_filter = binary_name_filter |
330 self.system = os.uname()[0] | 342 self.system = os.uname()[0] |
331 if self.system not in ['Linux', 'Darwin']: | 343 if self.system not in ['Linux', 'Darwin', 'FreeBSD']: |
332 raise Exception('Unknown system') | 344 raise Exception('Unknown system') |
333 self.llvm_symbolizer = None | 345 self.llvm_symbolizer = None |
334 self.frame_no = 0 | 346 self.frame_no = 0 |
335 | 347 |
336 def symbolize_address(self, addr, binary, offset): | 348 def symbolize_address(self, addr, binary, offset): |
337 # Initialize llvm-symbolizer lazily. | 349 # Initialize llvm-symbolizer lazily. |
338 if not self.llvm_symbolizer: | 350 if not self.llvm_symbolizer: |
339 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr) | 351 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr) |
340 # Use the chain of symbolizers: | 352 # Use the chain of symbolizers: |
341 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos | 353 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
(...skipping 14 matching lines...) Expand all Loading... |
356 def get_symbolized_lines(self, symbolized_lines): | 368 def get_symbolized_lines(self, symbolized_lines): |
357 if not symbolized_lines: | 369 if not symbolized_lines: |
358 return [self.current_line] | 370 return [self.current_line] |
359 else: | 371 else: |
360 result = [] | 372 result = [] |
361 for symbolized_frame in symbolized_lines: | 373 for symbolized_frame in symbolized_lines: |
362 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstri
p())) | 374 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstri
p())) |
363 self.frame_no += 1 | 375 self.frame_no += 1 |
364 return result | 376 return result |
365 | 377 |
366 def process_stdin(self): | 378 def process_logfile(self): |
367 self.frame_no = 0 | 379 self.frame_no = 0 |
368 while True: | 380 while True: |
369 line = sys.stdin.readline() | 381 line = logfile.readline() |
370 if not line: | 382 if not line: |
371 break | 383 break |
372 processed = self.process_line(line) | 384 processed = self.process_line(line) |
373 print ''.join(processed) | 385 print '\n'.join(processed) |
374 | 386 |
375 def process_line(self, line): | 387 def process_line(self, line): |
376 self.current_line = line.rstrip() | 388 self.current_line = line.rstrip() |
377 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) | 389 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
378 stack_trace_line_format = ( | 390 stack_trace_line_format = ( |
379 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') | 391 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') |
380 match = re.match(stack_trace_line_format, line) | 392 match = re.match(stack_trace_line_format, line) |
381 if not match: | 393 if not match: |
382 return [self.current_line] | 394 return [self.current_line] |
383 if DEBUG: | 395 if DEBUG: |
384 print line | 396 print line |
385 _, frameno_str, addr, binary, offset = match.groups() | 397 _, frameno_str, addr, binary, offset = match.groups() |
386 if frameno_str == '0': | 398 if frameno_str == '0': |
387 # Assume that frame #0 is the first frame of new stack trace. | 399 # Assume that frame #0 is the first frame of new stack trace. |
388 self.frame_no = 0 | 400 self.frame_no = 0 |
389 original_binary = binary | 401 original_binary = binary |
390 if self.binary_name_filter: | 402 if self.binary_name_filter: |
391 binary = self.binary_name_filter(binary) | 403 binary = self.binary_name_filter(binary) |
392 symbolized_line = self.symbolize_address(addr, binary, offset) | 404 symbolized_line = self.symbolize_address(addr, binary, offset) |
393 if not symbolized_line: | 405 if not symbolized_line: |
394 if original_binary != binary: | 406 if original_binary != binary: |
395 symbolized_line = self.symbolize_address(addr, binary, offset) | 407 symbolized_line = self.symbolize_address(addr, binary, offset) |
396 return self.get_symbolized_lines(symbolized_line) | 408 return self.get_symbolized_lines(symbolized_line) |
397 | 409 |
398 | 410 |
399 if __name__ == '__main__': | 411 if __name__ == '__main__': |
400 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) | 412 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFo
rmatter, |
401 for o, a in opts: | 413 description='ASan symbolization script', |
402 if o in ("-d", "--demangle"): | 414 epilog='''Example of use: |
403 demangle = True; | 415 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/Symbol
Files" < asan.log''') |
404 loop = SymbolizationLoop() | 416 parser.add_argument('path_to_cut', nargs='*', |
405 loop.process_stdin() | 417 help='pattern to be cut from the result file path ') |
| 418 parser.add_argument('-d','--demangle', action='store_true', |
| 419 help='demangle function names') |
| 420 parser.add_argument('-s', metavar='SYSROOT', |
| 421 help='set path to sysroot for sanitized binaries') |
| 422 parser.add_argument('-c', metavar='CROSS_COMPILE', |
| 423 help='set prefix for binutils') |
| 424 parser.add_argument('-l','--logfile', default=sys.stdin, type=argparse.FileTyp
e('r'), |
| 425 help='set log file name to parse, default is stdin') |
| 426 args = parser.parse_args() |
| 427 if args.path_to_cut: |
| 428 fix_filename_patterns = args.path_to_cut |
| 429 if args.demangle: |
| 430 demangle = True |
| 431 if args.s: |
| 432 binary_name_filter = sysroot_path_filter |
| 433 sysroot_path = args.s |
| 434 if args.c: |
| 435 binutils_prefix = args.c |
| 436 if args.logfile: |
| 437 logfile = args.logfile |
| 438 else: |
| 439 logfile = sys.stdin |
| 440 loop = SymbolizationLoop(binary_name_filter) |
| 441 loop.process_logfile() |
OLD | NEW |