| OLD | NEW |
| (Empty) | |
| 1 #!/usr/bin/env python |
| 2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| 3 # |
| 4 # The LLVM Compiler Infrastructure |
| 5 # |
| 6 # This file is distributed under the University of Illinois Open Source |
| 7 # License. See LICENSE.TXT for details. |
| 8 # |
| 9 #===------------------------------------------------------------------------===# |
| 10 import bisect |
| 11 import os |
| 12 import re |
| 13 import subprocess |
| 14 import sys |
| 15 |
| 16 llvm_symbolizer = None |
| 17 symbolizers = {} |
| 18 filetypes = {} |
| 19 vmaddrs = {} |
| 20 DEBUG = False |
| 21 |
| 22 |
| 23 # FIXME: merge the code that calls fix_filename(). |
| 24 def fix_filename(file_name): |
| 25 for path_to_cut in sys.argv[1:]: |
| 26 file_name = re.sub('.*' + path_to_cut, '', file_name) |
| 27 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) |
| 28 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) |
| 29 return file_name |
| 30 |
| 31 |
| 32 class Symbolizer(object): |
| 33 def __init__(self): |
| 34 pass |
| 35 |
| 36 def symbolize(self, addr, binary, offset): |
| 37 """Symbolize the given address (pair of binary and offset). |
| 38 |
| 39 Overriden in subclasses. |
| 40 Args: |
| 41 addr: virtual address of an instruction. |
| 42 binary: path to executable/shared object containing this instruction. |
| 43 offset: instruction offset in the @binary. |
| 44 Returns: |
| 45 list of strings (one string for each inlined frame) describing |
| 46 the code locations for this instruction (that is, function name, file |
| 47 name, line and column numbers). |
| 48 """ |
| 49 return None |
| 50 |
| 51 |
| 52 class LLVMSymbolizer(Symbolizer): |
| 53 def __init__(self, symbolizer_path): |
| 54 super(LLVMSymbolizer, self).__init__() |
| 55 self.symbolizer_path = symbolizer_path |
| 56 self.pipe = self.open_llvm_symbolizer() |
| 57 |
| 58 def open_llvm_symbolizer(self): |
| 59 if not os.path.exists(self.symbolizer_path): |
| 60 return None |
| 61 cmd = [self.symbolizer_path, |
| 62 '--use-symbol-table=true', |
| 63 '--demangle=false', |
| 64 '--functions=true', |
| 65 '--inlining=true'] |
| 66 if DEBUG: |
| 67 print ' '.join(cmd) |
| 68 return subprocess.Popen(cmd, stdin=subprocess.PIPE, |
| 69 stdout=subprocess.PIPE) |
| 70 |
| 71 def symbolize(self, addr, binary, offset): |
| 72 """Overrides Symbolizer.symbolize.""" |
| 73 if not self.pipe: |
| 74 return None |
| 75 result = [] |
| 76 try: |
| 77 symbolizer_input = '%s %s' % (binary, offset) |
| 78 if DEBUG: |
| 79 print symbolizer_input |
| 80 print >> self.pipe.stdin, symbolizer_input |
| 81 while True: |
| 82 function_name = self.pipe.stdout.readline().rstrip() |
| 83 if not function_name: |
| 84 break |
| 85 file_name = self.pipe.stdout.readline().rstrip() |
| 86 file_name = fix_filename(file_name) |
| 87 if (not function_name.startswith('??') and |
| 88 not file_name.startswith('??')): |
| 89 # Append only valid frames. |
| 90 result.append('%s in %s %s' % (addr, function_name, |
| 91 file_name)) |
| 92 except Exception: |
| 93 result = [] |
| 94 if not result: |
| 95 result = None |
| 96 return result |
| 97 |
| 98 |
| 99 def LLVMSymbolizerFactory(system): |
| 100 if system == 'Linux': |
| 101 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') |
| 102 if not symbolizer_path: |
| 103 # Assume llvm-symbolizer is in PATH. |
| 104 symbolizer_path = 'llvm-symbolizer' |
| 105 return LLVMSymbolizer(symbolizer_path) |
| 106 return None |
| 107 |
| 108 |
| 109 class Addr2LineSymbolizer(Symbolizer): |
| 110 def __init__(self, binary): |
| 111 super(Addr2LineSymbolizer, self).__init__() |
| 112 self.binary = binary |
| 113 self.pipe = self.open_addr2line() |
| 114 |
| 115 def open_addr2line(self): |
| 116 cmd = ['addr2line', '-f', '-e', self.binary] |
| 117 if DEBUG: |
| 118 print ' '.join(cmd) |
| 119 return subprocess.Popen(cmd, |
| 120 stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 121 |
| 122 def symbolize(self, addr, binary, offset): |
| 123 """Overrides Symbolizer.symbolize.""" |
| 124 if self.binary != binary: |
| 125 return None |
| 126 try: |
| 127 print >> self.pipe.stdin, offset |
| 128 function_name = self.pipe.stdout.readline().rstrip() |
| 129 file_name = self.pipe.stdout.readline().rstrip() |
| 130 except Exception: |
| 131 function_name = '' |
| 132 file_name = '' |
| 133 file_name = fix_filename(file_name) |
| 134 return ['%s in %s %s' % (addr, function_name, file_name)] |
| 135 |
| 136 |
| 137 class DarwinSymbolizer(Symbolizer): |
| 138 def __init__(self, addr, binary): |
| 139 super(DarwinSymbolizer, self).__init__() |
| 140 self.binary = binary |
| 141 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. |
| 142 if len(addr) > 10: |
| 143 self.arch = 'x86_64' |
| 144 else: |
| 145 self.arch = 'i386' |
| 146 self.vmaddr = None |
| 147 self.pipe = None |
| 148 |
| 149 def get_binary_vmaddr(self): |
| 150 """Get the slide value to be added to the address. |
| 151 |
| 152 We're looking for the following piece in otool -l output: |
| 153 Load command 0 |
| 154 cmd LC_SEGMENT |
| 155 cmdsize 736 |
| 156 segname __TEXT |
| 157 vmaddr 0x00000000 |
| 158 """ |
| 159 if self.vmaddr: |
| 160 return self.vmaddr |
| 161 cmdline = ['otool', '-l', self.binary] |
| 162 pipe = subprocess.Popen(cmdline, |
| 163 stdin=subprocess.PIPE, |
| 164 stdout=subprocess.PIPE) |
| 165 is_text = False |
| 166 vmaddr = 0 |
| 167 for line in pipe.stdout: |
| 168 line = line.strip() |
| 169 if line.startswith('segname'): |
| 170 is_text = (line == 'segname __TEXT') |
| 171 continue |
| 172 if line.startswith('vmaddr') and is_text: |
| 173 sv = line.split(' ') |
| 174 vmaddr = int(sv[-1], 16) |
| 175 break |
| 176 self.vmaddr = vmaddr |
| 177 return self.vmaddr |
| 178 |
| 179 def write_addr_to_pipe(self, offset): |
| 180 slide = self.get_binary_vmaddr() |
| 181 print >> self.pipe.stdin, '0x%x' % (int(offset, 16) + slide) |
| 182 |
| 183 def open_atos(self): |
| 184 if DEBUG: |
| 185 print 'atos -o %s -arch %s' % (self.binary, self.arch) |
| 186 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] |
| 187 self.pipe = subprocess.Popen(cmdline, |
| 188 stdin=subprocess.PIPE, |
| 189 stdout=subprocess.PIPE, |
| 190 stderr=subprocess.PIPE) |
| 191 |
| 192 def symbolize(self, addr, binary, offset): |
| 193 """Overrides Symbolizer.symbolize.""" |
| 194 if self.binary != binary: |
| 195 return None |
| 196 self.open_atos() |
| 197 self.write_addr_to_pipe(offset) |
| 198 self.pipe.stdin.close() |
| 199 atos_line = self.pipe.stdout.readline().rstrip() |
| 200 # A well-formed atos response looks like this: |
| 201 # foo(type1, type2) (in object.name) (filename.cc:80) |
| 202 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
| 203 if DEBUG: |
| 204 print 'atos_line: ', atos_line |
| 205 if match: |
| 206 function_name = match.group(1) |
| 207 function_name = re.sub('\(.*?\)', '', function_name) |
| 208 file_name = fix_filename(match.group(3)) |
| 209 return ['%s in %s %s' % (addr, function_name, file_name)] |
| 210 else: |
| 211 return ['%s in %s' % (addr, atos_line)] |
| 212 |
| 213 |
| 214 # Chain several symbolizers so that if one symbolizer fails, we fall back |
| 215 # to the next symbolizer in chain. |
| 216 class ChainSymbolizer(Symbolizer): |
| 217 def __init__(self, symbolizer_list): |
| 218 super(ChainSymbolizer, self).__init__() |
| 219 self.symbolizer_list = symbolizer_list |
| 220 |
| 221 def symbolize(self, addr, binary, offset): |
| 222 """Overrides Symbolizer.symbolize.""" |
| 223 for symbolizer in self.symbolizer_list: |
| 224 if symbolizer: |
| 225 result = symbolizer.symbolize(addr, binary, offset) |
| 226 if result: |
| 227 return result |
| 228 return None |
| 229 |
| 230 def append_symbolizer(self, symbolizer): |
| 231 self.symbolizer_list.append(symbolizer) |
| 232 |
| 233 |
| 234 def BreakpadSymbolizerFactory(binary): |
| 235 suffix = os.getenv('BREAKPAD_SUFFIX') |
| 236 if suffix: |
| 237 filename = binary + suffix |
| 238 if os.access(filename, os.F_OK): |
| 239 return BreakpadSymbolizer(filename) |
| 240 return None |
| 241 |
| 242 |
| 243 def SystemSymbolizerFactory(system, addr, binary): |
| 244 if system == 'Darwin': |
| 245 return DarwinSymbolizer(addr, binary) |
| 246 elif system == 'Linux': |
| 247 return Addr2LineSymbolizer(binary) |
| 248 |
| 249 |
| 250 class BreakpadSymbolizer(Symbolizer): |
| 251 def __init__(self, filename): |
| 252 super(BreakpadSymbolizer, self).__init__() |
| 253 self.filename = filename |
| 254 lines = file(filename).readlines() |
| 255 self.files = [] |
| 256 self.symbols = {} |
| 257 self.address_list = [] |
| 258 self.addresses = {} |
| 259 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t |
| 260 fragments = lines[0].rstrip().split() |
| 261 self.arch = fragments[2] |
| 262 self.debug_id = fragments[3] |
| 263 self.binary = ' '.join(fragments[4:]) |
| 264 self.parse_lines(lines[1:]) |
| 265 |
| 266 def parse_lines(self, lines): |
| 267 cur_function_addr = '' |
| 268 for line in lines: |
| 269 fragments = line.split() |
| 270 if fragments[0] == 'FILE': |
| 271 assert int(fragments[1]) == len(self.files) |
| 272 self.files.append(' '.join(fragments[2:])) |
| 273 elif fragments[0] == 'PUBLIC': |
| 274 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) |
| 275 elif fragments[0] in ['CFI', 'STACK']: |
| 276 pass |
| 277 elif fragments[0] == 'FUNC': |
| 278 cur_function_addr = int(fragments[1], 16) |
| 279 if not cur_function_addr in self.symbols.keys(): |
| 280 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) |
| 281 else: |
| 282 # Line starting with an address. |
| 283 addr = int(fragments[0], 16) |
| 284 self.address_list.append(addr) |
| 285 # Tuple of symbol address, size, line, file number. |
| 286 self.addresses[addr] = (cur_function_addr, |
| 287 int(fragments[1], 16), |
| 288 int(fragments[2]), |
| 289 int(fragments[3])) |
| 290 self.address_list.sort() |
| 291 |
| 292 def get_sym_file_line(self, addr): |
| 293 key = None |
| 294 if addr in self.addresses.keys(): |
| 295 key = addr |
| 296 else: |
| 297 index = bisect.bisect_left(self.address_list, addr) |
| 298 if index == 0: |
| 299 return None |
| 300 else: |
| 301 key = self.address_list[index - 1] |
| 302 sym_id, size, line_no, file_no = self.addresses[key] |
| 303 symbol = self.symbols[sym_id] |
| 304 filename = self.files[file_no] |
| 305 if addr < key + size: |
| 306 return symbol, filename, line_no |
| 307 else: |
| 308 return None |
| 309 |
| 310 def symbolize(self, addr, binary, offset): |
| 311 if self.binary != binary: |
| 312 return None |
| 313 res = self.get_sym_file_line(int(offset, 16)) |
| 314 if res: |
| 315 function_name, file_name, line_no = res |
| 316 result = ['%s in %s %s:%d' % ( |
| 317 addr, function_name, file_name, line_no)] |
| 318 print result |
| 319 return result |
| 320 else: |
| 321 return None |
| 322 |
| 323 |
| 324 class SymbolizationLoop(object): |
| 325 def __init__(self, binary_name_filter=None): |
| 326 # Used by clients who may want to supply a different binary name. |
| 327 # E.g. in Chrome several binaries may share a single .dSYM. |
| 328 self.binary_name_filter = binary_name_filter |
| 329 self.system = os.uname()[0] |
| 330 if self.system in ['Linux', 'Darwin']: |
| 331 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) |
| 332 else: |
| 333 raise Exception('Unknown system') |
| 334 |
| 335 def symbolize_address(self, addr, binary, offset): |
| 336 # Use the chain of symbolizers: |
| 337 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
| 338 # (fall back to next symbolizer if the previous one fails). |
| 339 if not binary in symbolizers: |
| 340 symbolizers[binary] = ChainSymbolizer( |
| 341 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) |
| 342 result = symbolizers[binary].symbolize(addr, binary, offset) |
| 343 if result is None: |
| 344 # Initialize system symbolizer only if other symbolizers failed. |
| 345 symbolizers[binary].append_symbolizer( |
| 346 SystemSymbolizerFactory(self.system, addr, binary)) |
| 347 result = symbolizers[binary].symbolize(addr, binary, offset) |
| 348 # The system symbolizer must produce some result. |
| 349 assert result |
| 350 return result |
| 351 |
| 352 def print_symbolized_lines(self, symbolized_lines): |
| 353 if not symbolized_lines: |
| 354 print self.current_line |
| 355 else: |
| 356 for symbolized_frame in symbolized_lines: |
| 357 print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip() |
| 358 self.frame_no += 1 |
| 359 |
| 360 def process_stdin(self): |
| 361 self.frame_no = 0 |
| 362 for line in sys.stdin: |
| 363 self.current_line = line.rstrip() |
| 364 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| 365 stack_trace_line_format = ( |
| 366 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') |
| 367 match = re.match(stack_trace_line_format, line) |
| 368 if not match: |
| 369 print self.current_line |
| 370 continue |
| 371 if DEBUG: |
| 372 print line |
| 373 _, frameno_str, addr, binary, offset = match.groups() |
| 374 if frameno_str == '0': |
| 375 # Assume that frame #0 is the first frame of new stack trace. |
| 376 self.frame_no = 0 |
| 377 original_binary = binary |
| 378 if self.binary_name_filter: |
| 379 binary = self.binary_name_filter(binary) |
| 380 symbolized_line = self.symbolize_address(addr, binary, offset) |
| 381 if not symbolized_line: |
| 382 if original_binary != binary: |
| 383 symbolized_line = self.symbolize_address(addr, binary, offset) |
| 384 self.print_symbolized_lines(symbolized_line) |
| 385 |
| 386 |
| 387 if __name__ == '__main__': |
| 388 loop = SymbolizationLoop() |
| 389 loop.process_stdin() |
| OLD | NEW |