OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# | 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
3 # | 3 # |
4 # The LLVM Compiler Infrastructure | 4 # The LLVM Compiler Infrastructure |
5 # | 5 # |
6 # This file is distributed under the University of Illinois Open Source | 6 # This file is distributed under the University of Illinois Open Source |
7 # License. See LICENSE.TXT for details. | 7 # License. See LICENSE.TXT for details. |
8 # | 8 # |
9 #===------------------------------------------------------------------------===# | 9 #===------------------------------------------------------------------------===# |
10 import bisect | 10 import bisect |
11 import getopt | 11 import getopt |
12 import os | 12 import os |
| 13 import pty |
13 import re | 14 import re |
14 import subprocess | 15 import subprocess |
15 import sys | 16 import sys |
| 17 import termios |
16 | 18 |
17 llvm_symbolizer = None | |
18 symbolizers = {} | 19 symbolizers = {} |
19 DEBUG = False | 20 DEBUG = False |
20 demangle = False; | 21 demangle = False; |
21 | 22 |
22 | 23 |
23 # FIXME: merge the code that calls fix_filename(). | 24 # FIXME: merge the code that calls fix_filename(). |
24 def fix_filename(file_name): | 25 def fix_filename(file_name): |
25 for path_to_cut in sys.argv[1:]: | 26 for path_to_cut in sys.argv[1:]: |
26 file_name = re.sub('.*' + path_to_cut, '', file_name) | 27 file_name = re.sub('.*' + path_to_cut, '', file_name) |
27 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) | 28 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) |
28 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) | 29 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) |
29 return file_name | 30 return file_name |
30 | 31 |
| 32 def GuessArch(addr): |
| 33 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. |
| 34 if len(addr) > 10: |
| 35 return 'x86_64' |
| 36 else: |
| 37 return 'i386' |
31 | 38 |
32 class Symbolizer(object): | 39 class Symbolizer(object): |
33 def __init__(self): | 40 def __init__(self): |
34 pass | 41 pass |
35 | 42 |
36 def symbolize(self, addr, binary, offset): | 43 def symbolize(self, addr, binary, offset): |
37 """Symbolize the given address (pair of binary and offset). | 44 """Symbolize the given address (pair of binary and offset). |
38 | 45 |
39 Overriden in subclasses. | 46 Overriden in subclasses. |
40 Args: | 47 Args: |
41 addr: virtual address of an instruction. | 48 addr: virtual address of an instruction. |
42 binary: path to executable/shared object containing this instruction. | 49 binary: path to executable/shared object containing this instruction. |
43 offset: instruction offset in the @binary. | 50 offset: instruction offset in the @binary. |
44 Returns: | 51 Returns: |
45 list of strings (one string for each inlined frame) describing | 52 list of strings (one string for each inlined frame) describing |
46 the code locations for this instruction (that is, function name, file | 53 the code locations for this instruction (that is, function name, file |
47 name, line and column numbers). | 54 name, line and column numbers). |
48 """ | 55 """ |
49 return None | 56 return None |
50 | 57 |
51 | 58 |
52 class LLVMSymbolizer(Symbolizer): | 59 class LLVMSymbolizer(Symbolizer): |
53 def __init__(self, symbolizer_path): | 60 def __init__(self, symbolizer_path, addr): |
54 super(LLVMSymbolizer, self).__init__() | 61 super(LLVMSymbolizer, self).__init__() |
55 self.symbolizer_path = symbolizer_path | 62 self.symbolizer_path = symbolizer_path |
| 63 self.default_arch = GuessArch(addr) |
56 self.pipe = self.open_llvm_symbolizer() | 64 self.pipe = self.open_llvm_symbolizer() |
57 | 65 |
58 def open_llvm_symbolizer(self): | 66 def open_llvm_symbolizer(self): |
59 if not os.path.exists(self.symbolizer_path): | |
60 return None | |
61 cmd = [self.symbolizer_path, | 67 cmd = [self.symbolizer_path, |
62 '--use-symbol-table=true', | 68 '--use-symbol-table=true', |
63 '--demangle=%s' % demangle, | 69 '--demangle=%s' % demangle, |
64 '--functions=true', | 70 '--functions=short', |
65 '--inlining=true'] | 71 '--inlining=true', |
| 72 '--default-arch=%s' % self.default_arch] |
66 if DEBUG: | 73 if DEBUG: |
67 print ' '.join(cmd) | 74 print ' '.join(cmd) |
68 return subprocess.Popen(cmd, stdin=subprocess.PIPE, | 75 try: |
69 stdout=subprocess.PIPE) | 76 result = subprocess.Popen(cmd, stdin=subprocess.PIPE, |
| 77 stdout=subprocess.PIPE) |
| 78 except OSError: |
| 79 result = None |
| 80 return result |
70 | 81 |
71 def symbolize(self, addr, binary, offset): | 82 def symbolize(self, addr, binary, offset): |
72 """Overrides Symbolizer.symbolize.""" | 83 """Overrides Symbolizer.symbolize.""" |
73 if not self.pipe: | 84 if not self.pipe: |
74 return None | 85 return None |
75 result = [] | 86 result = [] |
76 try: | 87 try: |
77 symbolizer_input = '%s %s' % (binary, offset) | 88 symbolizer_input = '%s %s' % (binary, offset) |
78 if DEBUG: | 89 if DEBUG: |
79 print symbolizer_input | 90 print symbolizer_input |
80 print >> self.pipe.stdin, symbolizer_input | 91 print >> self.pipe.stdin, symbolizer_input |
81 while True: | 92 while True: |
82 function_name = self.pipe.stdout.readline().rstrip() | 93 function_name = self.pipe.stdout.readline().rstrip() |
83 if not function_name: | 94 if not function_name: |
84 break | 95 break |
85 file_name = self.pipe.stdout.readline().rstrip() | 96 file_name = self.pipe.stdout.readline().rstrip() |
86 file_name = fix_filename(file_name) | 97 file_name = fix_filename(file_name) |
87 if (not function_name.startswith('??') and | 98 if (not function_name.startswith('??') or |
88 not file_name.startswith('??')): | 99 not file_name.startswith('??')): |
89 # Append only valid frames. | 100 # Append only non-trivial frames. |
90 result.append('%s in %s %s' % (addr, function_name, | 101 result.append('%s in %s %s' % (addr, function_name, |
91 file_name)) | 102 file_name)) |
92 except Exception: | 103 except Exception: |
93 result = [] | 104 result = [] |
94 if not result: | 105 if not result: |
95 result = None | 106 result = None |
96 return result | 107 return result |
97 | 108 |
98 | 109 |
99 def LLVMSymbolizerFactory(system): | 110 def LLVMSymbolizerFactory(system, addr): |
100 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') | 111 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') |
101 if not symbolizer_path: | 112 if not symbolizer_path: |
102 # Assume llvm-symbolizer is in PATH. | 113 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') |
103 symbolizer_path = 'llvm-symbolizer' | 114 if not symbolizer_path: |
104 return LLVMSymbolizer(symbolizer_path) | 115 # Assume llvm-symbolizer is in PATH. |
| 116 symbolizer_path = 'llvm-symbolizer' |
| 117 return LLVMSymbolizer(symbolizer_path, addr) |
105 | 118 |
106 | 119 |
107 class Addr2LineSymbolizer(Symbolizer): | 120 class Addr2LineSymbolizer(Symbolizer): |
108 def __init__(self, binary): | 121 def __init__(self, binary): |
109 super(Addr2LineSymbolizer, self).__init__() | 122 super(Addr2LineSymbolizer, self).__init__() |
110 self.binary = binary | 123 self.binary = binary |
111 self.pipe = self.open_addr2line() | 124 self.pipe = self.open_addr2line() |
112 | 125 |
113 def open_addr2line(self): | 126 def open_addr2line(self): |
114 cmd = ['addr2line', '-f'] | 127 cmd = ['addr2line', '-f'] |
(...skipping 13 matching lines...) Expand all Loading... |
128 print >> self.pipe.stdin, offset | 141 print >> self.pipe.stdin, offset |
129 function_name = self.pipe.stdout.readline().rstrip() | 142 function_name = self.pipe.stdout.readline().rstrip() |
130 file_name = self.pipe.stdout.readline().rstrip() | 143 file_name = self.pipe.stdout.readline().rstrip() |
131 except Exception: | 144 except Exception: |
132 function_name = '' | 145 function_name = '' |
133 file_name = '' | 146 file_name = '' |
134 file_name = fix_filename(file_name) | 147 file_name = fix_filename(file_name) |
135 return ['%s in %s %s' % (addr, function_name, file_name)] | 148 return ['%s in %s %s' % (addr, function_name, file_name)] |
136 | 149 |
137 | 150 |
| 151 class UnbufferedLineConverter(object): |
| 152 """ |
| 153 Wrap a child process that responds to each line of input with one line of |
| 154 output. Uses pty to trick the child into providing unbuffered output. |
| 155 """ |
| 156 def __init__(self, args, close_stderr=False): |
| 157 pid, fd = pty.fork() |
| 158 if pid == 0: |
| 159 # We're the child. Transfer control to command. |
| 160 if close_stderr: |
| 161 dev_null = os.open('/dev/null', 0) |
| 162 os.dup2(dev_null, 2) |
| 163 os.execvp(args[0], args) |
| 164 else: |
| 165 # Disable echoing. |
| 166 attr = termios.tcgetattr(fd) |
| 167 attr[3] = attr[3] & ~termios.ECHO |
| 168 termios.tcsetattr(fd, termios.TCSANOW, attr) |
| 169 # Set up a file()-like interface to the child process |
| 170 self.r = os.fdopen(fd, "r", 1) |
| 171 self.w = os.fdopen(os.dup(fd), "w", 1) |
| 172 |
| 173 def convert(self, line): |
| 174 self.w.write(line + "\n") |
| 175 return self.readline() |
| 176 |
| 177 def readline(self): |
| 178 return self.r.readline().rstrip() |
| 179 |
| 180 |
138 class DarwinSymbolizer(Symbolizer): | 181 class DarwinSymbolizer(Symbolizer): |
139 def __init__(self, addr, binary): | 182 def __init__(self, addr, binary): |
140 super(DarwinSymbolizer, self).__init__() | 183 super(DarwinSymbolizer, self).__init__() |
141 self.binary = binary | 184 self.binary = binary |
142 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. | 185 self.arch = GuessArch(addr) |
143 if len(addr) > 10: | 186 self.open_atos() |
144 self.arch = 'x86_64' | |
145 else: | |
146 self.arch = 'i386' | |
147 self.pipe = None | |
148 | |
149 def write_addr_to_pipe(self, offset): | |
150 print >> self.pipe.stdin, '0x%x' % int(offset, 16) | |
151 | 187 |
152 def open_atos(self): | 188 def open_atos(self): |
153 if DEBUG: | 189 if DEBUG: |
154 print 'atos -o %s -arch %s' % (self.binary, self.arch) | 190 print 'atos -o %s -arch %s' % (self.binary, self.arch) |
155 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] | 191 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] |
156 self.pipe = subprocess.Popen(cmdline, | 192 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) |
157 stdin=subprocess.PIPE, | |
158 stdout=subprocess.PIPE, | |
159 stderr=subprocess.PIPE) | |
160 | 193 |
161 def symbolize(self, addr, binary, offset): | 194 def symbolize(self, addr, binary, offset): |
162 """Overrides Symbolizer.symbolize.""" | 195 """Overrides Symbolizer.symbolize.""" |
163 if self.binary != binary: | 196 if self.binary != binary: |
164 return None | 197 return None |
165 self.open_atos() | 198 atos_line = self.atos.convert('0x%x' % int(offset, 16)) |
166 self.write_addr_to_pipe(offset) | 199 while "got symbolicator for" in atos_line: |
167 self.pipe.stdin.close() | 200 atos_line = self.atos.readline() |
168 atos_line = self.pipe.stdout.readline().rstrip() | |
169 # A well-formed atos response looks like this: | 201 # A well-formed atos response looks like this: |
170 # foo(type1, type2) (in object.name) (filename.cc:80) | 202 # foo(type1, type2) (in object.name) (filename.cc:80) |
171 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) | 203 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
172 if DEBUG: | 204 if DEBUG: |
173 print 'atos_line: ', atos_line | 205 print 'atos_line: ', atos_line |
174 if match: | 206 if match: |
175 function_name = match.group(1) | 207 function_name = match.group(1) |
176 function_name = re.sub('\(.*?\)', '', function_name) | 208 function_name = re.sub('\(.*?\)', '', function_name) |
177 file_name = fix_filename(match.group(3)) | 209 file_name = fix_filename(match.group(3)) |
178 return ['%s in %s %s' % (addr, function_name, file_name)] | 210 return ['%s in %s %s' % (addr, function_name, file_name)] |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
289 else: | 321 else: |
290 return None | 322 return None |
291 | 323 |
292 | 324 |
293 class SymbolizationLoop(object): | 325 class SymbolizationLoop(object): |
294 def __init__(self, binary_name_filter=None): | 326 def __init__(self, binary_name_filter=None): |
295 # Used by clients who may want to supply a different binary name. | 327 # Used by clients who may want to supply a different binary name. |
296 # E.g. in Chrome several binaries may share a single .dSYM. | 328 # E.g. in Chrome several binaries may share a single .dSYM. |
297 self.binary_name_filter = binary_name_filter | 329 self.binary_name_filter = binary_name_filter |
298 self.system = os.uname()[0] | 330 self.system = os.uname()[0] |
299 if self.system in ['Linux', 'Darwin']: | 331 if self.system not in ['Linux', 'Darwin']: |
300 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) | |
301 else: | |
302 raise Exception('Unknown system') | 332 raise Exception('Unknown system') |
| 333 self.llvm_symbolizer = None |
303 | 334 |
304 def symbolize_address(self, addr, binary, offset): | 335 def symbolize_address(self, addr, binary, offset): |
| 336 # Initialize llvm-symbolizer lazily. |
| 337 if not self.llvm_symbolizer: |
| 338 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr) |
305 # Use the chain of symbolizers: | 339 # Use the chain of symbolizers: |
306 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos | 340 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
307 # (fall back to next symbolizer if the previous one fails). | 341 # (fall back to next symbolizer if the previous one fails). |
308 if not binary in symbolizers: | 342 if not binary in symbolizers: |
309 symbolizers[binary] = ChainSymbolizer( | 343 symbolizers[binary] = ChainSymbolizer( |
310 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) | 344 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) |
311 result = symbolizers[binary].symbolize(addr, binary, offset) | 345 result = symbolizers[binary].symbolize(addr, binary, offset) |
312 if result is None: | 346 if result is None: |
313 # Initialize system symbolizer only if other symbolizers failed. | 347 # Initialize system symbolizer only if other symbolizers failed. |
314 symbolizers[binary].append_symbolizer( | 348 symbolizers[binary].append_symbolizer( |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
356 self.print_symbolized_lines(symbolized_line) | 390 self.print_symbolized_lines(symbolized_line) |
357 | 391 |
358 | 392 |
359 if __name__ == '__main__': | 393 if __name__ == '__main__': |
360 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) | 394 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) |
361 for o, a in opts: | 395 for o, a in opts: |
362 if o in ("-d", "--demangle"): | 396 if o in ("-d", "--demangle"): |
363 demangle = True; | 397 demangle = True; |
364 loop = SymbolizationLoop() | 398 loop = SymbolizationLoop() |
365 loop.process_stdin() | 399 loop.process_stdin() |
OLD | NEW |