OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# | 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
3 # | 3 # |
4 # The LLVM Compiler Infrastructure | 4 # The LLVM Compiler Infrastructure |
5 # | 5 # |
6 # This file is distributed under the University of Illinois Open Source | 6 # This file is distributed under the University of Illinois Open Source |
7 # License. See LICENSE.TXT for details. | 7 # License. See LICENSE.TXT for details. |
8 # | 8 # |
9 #===------------------------------------------------------------------------===# | 9 #===------------------------------------------------------------------------===# |
10 import argparse | |
11 import bisect | 10 import bisect |
12 import getopt | 11 import getopt |
13 import os | 12 import os |
14 import pty | 13 import pty |
15 import re | 14 import re |
16 import subprocess | 15 import subprocess |
17 import sys | 16 import sys |
18 import termios | 17 import termios |
19 | 18 |
20 symbolizers = {} | 19 symbolizers = {} |
21 DEBUG = False | 20 DEBUG = False |
22 demangle = False | 21 demangle = False; |
23 binutils_prefix = None | 22 |
24 sysroot_path = None | |
25 binary_name_filter = None | |
26 fix_filename_patterns = None | |
27 logfile = None | |
28 | 23 |
29 # FIXME: merge the code that calls fix_filename(). | 24 # FIXME: merge the code that calls fix_filename(). |
30 def fix_filename(file_name): | 25 def fix_filename(file_name): |
31 if fix_filename_patterns: | 26 for path_to_cut in sys.argv[1:]: |
32 for path_to_cut in fix_filename_patterns: | 27 file_name = re.sub('.*' + path_to_cut, '', file_name) |
33 file_name = re.sub('.*' + path_to_cut, '', file_name) | |
34 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) | 28 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) |
35 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) | 29 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) |
36 return file_name | 30 return file_name |
37 | 31 |
38 def sysroot_path_filter(binary_name): | 32 def GuessArch(addr): |
39 return sysroot_path + binary_name | |
40 | |
41 def guess_arch(addr): | |
42 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. | 33 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. |
43 if len(addr) > 10: | 34 if len(addr) > 10: |
44 return 'x86_64' | 35 return 'x86_64' |
45 else: | 36 else: |
46 return 'i386' | 37 return 'i386' |
47 | 38 |
48 class Symbolizer(object): | 39 class Symbolizer(object): |
49 def __init__(self): | 40 def __init__(self): |
50 pass | 41 pass |
51 | 42 |
(...skipping 10 matching lines...) Expand all Loading... |
62 the code locations for this instruction (that is, function name, file | 53 the code locations for this instruction (that is, function name, file |
63 name, line and column numbers). | 54 name, line and column numbers). |
64 """ | 55 """ |
65 return None | 56 return None |
66 | 57 |
67 | 58 |
68 class LLVMSymbolizer(Symbolizer): | 59 class LLVMSymbolizer(Symbolizer): |
69 def __init__(self, symbolizer_path, addr): | 60 def __init__(self, symbolizer_path, addr): |
70 super(LLVMSymbolizer, self).__init__() | 61 super(LLVMSymbolizer, self).__init__() |
71 self.symbolizer_path = symbolizer_path | 62 self.symbolizer_path = symbolizer_path |
72 self.default_arch = guess_arch(addr) | 63 self.default_arch = GuessArch(addr) |
73 self.pipe = self.open_llvm_symbolizer() | 64 self.pipe = self.open_llvm_symbolizer() |
74 | 65 |
75 def open_llvm_symbolizer(self): | 66 def open_llvm_symbolizer(self): |
76 cmd = [self.symbolizer_path, | 67 cmd = [self.symbolizer_path, |
77 '--use-symbol-table=true', | 68 '--use-symbol-table=true', |
78 '--demangle=%s' % demangle, | 69 '--demangle=%s' % demangle, |
79 '--functions=short', | 70 '--functions=short', |
80 '--inlining=true', | 71 '--inlining=true', |
81 '--default-arch=%s' % self.default_arch] | 72 '--default-arch=%s' % self.default_arch] |
82 if DEBUG: | 73 if DEBUG: |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
126 return LLVMSymbolizer(symbolizer_path, addr) | 117 return LLVMSymbolizer(symbolizer_path, addr) |
127 | 118 |
128 | 119 |
129 class Addr2LineSymbolizer(Symbolizer): | 120 class Addr2LineSymbolizer(Symbolizer): |
130 def __init__(self, binary): | 121 def __init__(self, binary): |
131 super(Addr2LineSymbolizer, self).__init__() | 122 super(Addr2LineSymbolizer, self).__init__() |
132 self.binary = binary | 123 self.binary = binary |
133 self.pipe = self.open_addr2line() | 124 self.pipe = self.open_addr2line() |
134 | 125 |
135 def open_addr2line(self): | 126 def open_addr2line(self): |
136 addr2line_tool = 'addr2line' | 127 cmd = ['addr2line', '-f'] |
137 if binutils_prefix: | |
138 addr2line_tool = binutils_prefix + addr2line_tool | |
139 cmd = [addr2line_tool, '-f'] | |
140 if demangle: | 128 if demangle: |
141 cmd += ['--demangle'] | 129 cmd += ['--demangle'] |
142 cmd += ['-e', self.binary] | 130 cmd += ['-e', self.binary] |
143 if DEBUG: | 131 if DEBUG: |
144 print ' '.join(cmd) | 132 print ' '.join(cmd) |
145 return subprocess.Popen(cmd, | 133 return subprocess.Popen(cmd, |
146 stdin=subprocess.PIPE, stdout=subprocess.PIPE) | 134 stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
147 | 135 |
148 def symbolize(self, addr, binary, offset): | 136 def symbolize(self, addr, binary, offset): |
149 """Overrides Symbolizer.symbolize.""" | 137 """Overrides Symbolizer.symbolize.""" |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
187 return self.readline() | 175 return self.readline() |
188 | 176 |
189 def readline(self): | 177 def readline(self): |
190 return self.r.readline().rstrip() | 178 return self.r.readline().rstrip() |
191 | 179 |
192 | 180 |
193 class DarwinSymbolizer(Symbolizer): | 181 class DarwinSymbolizer(Symbolizer): |
194 def __init__(self, addr, binary): | 182 def __init__(self, addr, binary): |
195 super(DarwinSymbolizer, self).__init__() | 183 super(DarwinSymbolizer, self).__init__() |
196 self.binary = binary | 184 self.binary = binary |
197 self.arch = guess_arch(addr) | 185 self.arch = GuessArch(addr) |
198 self.open_atos() | 186 self.open_atos() |
199 | 187 |
200 def open_atos(self): | 188 def open_atos(self): |
201 if DEBUG: | 189 if DEBUG: |
202 print 'atos -o %s -arch %s' % (self.binary, self.arch) | 190 print 'atos -o %s -arch %s' % (self.binary, self.arch) |
203 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] | 191 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] |
204 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) | 192 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) |
205 | 193 |
206 def symbolize(self, addr, binary, offset): | 194 def symbolize(self, addr, binary, offset): |
207 """Overrides Symbolizer.symbolize.""" | 195 """Overrides Symbolizer.symbolize.""" |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
333 else: | 321 else: |
334 return None | 322 return None |
335 | 323 |
336 | 324 |
337 class SymbolizationLoop(object): | 325 class SymbolizationLoop(object): |
338 def __init__(self, binary_name_filter=None): | 326 def __init__(self, binary_name_filter=None): |
339 # Used by clients who may want to supply a different binary name. | 327 # Used by clients who may want to supply a different binary name. |
340 # E.g. in Chrome several binaries may share a single .dSYM. | 328 # E.g. in Chrome several binaries may share a single .dSYM. |
341 self.binary_name_filter = binary_name_filter | 329 self.binary_name_filter = binary_name_filter |
342 self.system = os.uname()[0] | 330 self.system = os.uname()[0] |
343 if self.system not in ['Linux', 'Darwin', 'FreeBSD']: | 331 if self.system not in ['Linux', 'Darwin']: |
344 raise Exception('Unknown system') | 332 raise Exception('Unknown system') |
345 self.llvm_symbolizer = None | 333 self.llvm_symbolizer = None |
346 self.frame_no = 0 | 334 self.frame_no = 0 |
347 | 335 |
348 def symbolize_address(self, addr, binary, offset): | 336 def symbolize_address(self, addr, binary, offset): |
349 # Initialize llvm-symbolizer lazily. | 337 # Initialize llvm-symbolizer lazily. |
350 if not self.llvm_symbolizer: | 338 if not self.llvm_symbolizer: |
351 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr) | 339 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr) |
352 # Use the chain of symbolizers: | 340 # Use the chain of symbolizers: |
353 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos | 341 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
(...skipping 14 matching lines...) Expand all Loading... |
368 def get_symbolized_lines(self, symbolized_lines): | 356 def get_symbolized_lines(self, symbolized_lines): |
369 if not symbolized_lines: | 357 if not symbolized_lines: |
370 return [self.current_line] | 358 return [self.current_line] |
371 else: | 359 else: |
372 result = [] | 360 result = [] |
373 for symbolized_frame in symbolized_lines: | 361 for symbolized_frame in symbolized_lines: |
374 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstri
p())) | 362 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstri
p())) |
375 self.frame_no += 1 | 363 self.frame_no += 1 |
376 return result | 364 return result |
377 | 365 |
378 def process_logfile(self): | 366 def process_stdin(self): |
379 self.frame_no = 0 | 367 self.frame_no = 0 |
380 while True: | 368 while True: |
381 line = logfile.readline() | 369 line = sys.stdin.readline() |
382 if not line: | 370 if not line: |
383 break | 371 break |
384 processed = self.process_line(line) | 372 processed = self.process_line(line) |
385 print '\n'.join(processed) | 373 print ''.join(processed) |
386 | 374 |
387 def process_line(self, line): | 375 def process_line(self, line): |
388 self.current_line = line.rstrip() | 376 self.current_line = line.rstrip() |
389 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) | 377 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
390 stack_trace_line_format = ( | 378 stack_trace_line_format = ( |
391 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') | 379 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') |
392 match = re.match(stack_trace_line_format, line) | 380 match = re.match(stack_trace_line_format, line) |
393 if not match: | 381 if not match: |
394 return [self.current_line] | 382 return [self.current_line] |
395 if DEBUG: | 383 if DEBUG: |
396 print line | 384 print line |
397 _, frameno_str, addr, binary, offset = match.groups() | 385 _, frameno_str, addr, binary, offset = match.groups() |
398 if frameno_str == '0': | 386 if frameno_str == '0': |
399 # Assume that frame #0 is the first frame of new stack trace. | 387 # Assume that frame #0 is the first frame of new stack trace. |
400 self.frame_no = 0 | 388 self.frame_no = 0 |
401 original_binary = binary | 389 original_binary = binary |
402 if self.binary_name_filter: | 390 if self.binary_name_filter: |
403 binary = self.binary_name_filter(binary) | 391 binary = self.binary_name_filter(binary) |
404 symbolized_line = self.symbolize_address(addr, binary, offset) | 392 symbolized_line = self.symbolize_address(addr, binary, offset) |
405 if not symbolized_line: | 393 if not symbolized_line: |
406 if original_binary != binary: | 394 if original_binary != binary: |
407 symbolized_line = self.symbolize_address(addr, binary, offset) | 395 symbolized_line = self.symbolize_address(addr, binary, offset) |
408 return self.get_symbolized_lines(symbolized_line) | 396 return self.get_symbolized_lines(symbolized_line) |
409 | 397 |
410 | 398 |
411 if __name__ == '__main__': | 399 if __name__ == '__main__': |
412 parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFo
rmatter, | 400 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) |
413 description='ASan symbolization script', | 401 for o, a in opts: |
414 epilog='''Example of use: | 402 if o in ("-d", "--demangle"): |
415 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/Symbol
Files" < asan.log''') | 403 demangle = True; |
416 parser.add_argument('path_to_cut', nargs='*', | 404 loop = SymbolizationLoop() |
417 help='pattern to be cut from the result file path ') | 405 loop.process_stdin() |
418 parser.add_argument('-d','--demangle', action='store_true', | |
419 help='demangle function names') | |
420 parser.add_argument('-s', metavar='SYSROOT', | |
421 help='set path to sysroot for sanitized binaries') | |
422 parser.add_argument('-c', metavar='CROSS_COMPILE', | |
423 help='set prefix for binutils') | |
424 parser.add_argument('-l','--logfile', default=sys.stdin, type=argparse.FileTyp
e('r'), | |
425 help='set log file name to parse, default is stdin') | |
426 args = parser.parse_args() | |
427 if args.path_to_cut: | |
428 fix_filename_patterns = args.path_to_cut | |
429 if args.demangle: | |
430 demangle = True | |
431 if args.s: | |
432 binary_name_filter = sysroot_path_filter | |
433 sysroot_path = args.s | |
434 if args.c: | |
435 binutils_prefix = args.c | |
436 if args.logfile: | |
437 logfile = args.logfile | |
438 else: | |
439 logfile = sys.stdin | |
440 loop = SymbolizationLoop(binary_name_filter) | |
441 loop.process_logfile() | |
OLD | NEW |