Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(194)

Side by Side Diff: tools/valgrind/asan/asan_symbolize_trunk.py

Issue 10987049: Move the symbolization code to tools/valgrind/asan/asan_symbolize_trunk.py (it's now a copy of the … (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:executable
+ *
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3 #
4 # The LLVM Compiler Infrastructure
5 #
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
8 #
9 #===------------------------------------------------------------------------===#
10 import bisect
11 import os
12 import re
13 import subprocess
14 import sys
15
16 llvm_symbolizer = None
17 symbolizers = {}
18 filetypes = {}
19 vmaddrs = {}
20 DEBUG = False
21
22
23 # FIXME: merge the code that calls fix_filename().
24 def fix_filename(file_name):
25 for path_to_cut in sys.argv[1:]:
26 file_name = re.sub('.*' + path_to_cut, '', file_name)
27 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
28 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
29 return file_name
30
31
32 class Symbolizer(object):
33 def __init__(self):
34 pass
35
36 def symbolize(self, addr, binary, offset):
37 """Symbolize the given address (pair of binary and offset).
38
39 Overriden in subclasses.
40 Args:
41 addr: virtual address of an instruction.
42 binary: path to executable/shared object containing this instruction.
43 offset: instruction offset in the @binary.
44 Returns:
45 list of strings (one string for each inlined frame) describing
46 the code locations for this instruction (that is, function name, file
47 name, line and column numbers).
48 """
49 return None
50
51
52 class LLVMSymbolizer(Symbolizer):
53 def __init__(self, symbolizer_path):
54 super(LLVMSymbolizer, self).__init__()
55 self.symbolizer_path = symbolizer_path
56 self.pipe = self.open_llvm_symbolizer()
57
58 def open_llvm_symbolizer(self):
59 if not os.path.exists(self.symbolizer_path):
60 return None
61 cmd = [self.symbolizer_path,
62 '--use-symbol-table=true',
63 '--demangle=false',
64 '--functions=true',
65 '--inlining=true']
66 if DEBUG:
67 print ' '.join(cmd)
68 return subprocess.Popen(cmd, stdin=subprocess.PIPE,
69 stdout=subprocess.PIPE)
70
71 def symbolize(self, addr, binary, offset):
72 """Overrides Symbolizer.symbolize."""
73 if not self.pipe:
74 return None
75 result = []
76 try:
77 symbolizer_input = '%s %s' % (binary, offset)
78 if DEBUG:
79 print symbolizer_input
80 print >> self.pipe.stdin, symbolizer_input
81 while True:
82 function_name = self.pipe.stdout.readline().rstrip()
83 if not function_name:
84 break
85 file_name = self.pipe.stdout.readline().rstrip()
86 file_name = fix_filename(file_name)
87 if (not function_name.startswith('??') and
88 not file_name.startswith('??')):
89 # Append only valid frames.
90 result.append('%s in %s %s' % (addr, function_name,
91 file_name))
92 except Exception:
93 result = []
94 if not result:
95 result = None
96 return result
97
98
99 def LLVMSymbolizerFactory(system):
100 if system == 'Linux':
101 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
102 if not symbolizer_path:
103 # Assume llvm-symbolizer is in PATH.
104 symbolizer_path = 'llvm-symbolizer'
105 return LLVMSymbolizer(symbolizer_path)
106 return None
107
108
109 class Addr2LineSymbolizer(Symbolizer):
110 def __init__(self, binary):
111 super(Addr2LineSymbolizer, self).__init__()
112 self.binary = binary
113 self.pipe = self.open_addr2line()
114
115 def open_addr2line(self):
116 cmd = ['addr2line', '-f', '-e', self.binary]
117 if DEBUG:
118 print ' '.join(cmd)
119 return subprocess.Popen(cmd,
120 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
121
122 def symbolize(self, addr, binary, offset):
123 """Overrides Symbolizer.symbolize."""
124 if self.binary != binary:
125 return None
126 try:
127 print >> self.pipe.stdin, offset
128 function_name = self.pipe.stdout.readline().rstrip()
129 file_name = self.pipe.stdout.readline().rstrip()
130 except Exception:
131 function_name = ''
132 file_name = ''
133 file_name = fix_filename(file_name)
134 return ['%s in %s %s' % (addr, function_name, file_name)]
135
136
137 class DarwinSymbolizer(Symbolizer):
138 def __init__(self, addr, binary):
139 super(DarwinSymbolizer, self).__init__()
140 self.binary = binary
141 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
142 if len(addr) > 10:
143 self.arch = 'x86_64'
144 else:
145 self.arch = 'i386'
146 self.vmaddr = None
147 self.pipe = None
148
149 def get_binary_vmaddr(self):
150 """Get the slide value to be added to the address.
151
152 We're looking for the following piece in otool -l output:
153 Load command 0
154 cmd LC_SEGMENT
155 cmdsize 736
156 segname __TEXT
157 vmaddr 0x00000000
158 """
159 if self.vmaddr:
160 return self.vmaddr
161 cmdline = ['otool', '-l', self.binary]
162 pipe = subprocess.Popen(cmdline,
163 stdin=subprocess.PIPE,
164 stdout=subprocess.PIPE)
165 is_text = False
166 vmaddr = 0
167 for line in pipe.stdout:
168 line = line.strip()
169 if line.startswith('segname'):
170 is_text = (line == 'segname __TEXT')
171 continue
172 if line.startswith('vmaddr') and is_text:
173 sv = line.split(' ')
174 vmaddr = int(sv[-1], 16)
175 break
176 self.vmaddr = vmaddr
177 return self.vmaddr
178
179 def write_addr_to_pipe(self, offset):
180 slide = self.get_binary_vmaddr()
181 print >> self.pipe.stdin, '0x%x' % (int(offset, 16) + slide)
182
183 def open_atos(self):
184 if DEBUG:
185 print 'atos -o %s -arch %s' % (self.binary, self.arch)
186 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
187 self.pipe = subprocess.Popen(cmdline,
188 stdin=subprocess.PIPE,
189 stdout=subprocess.PIPE,
190 stderr=subprocess.PIPE)
191
192 def symbolize(self, addr, binary, offset):
193 """Overrides Symbolizer.symbolize."""
194 if self.binary != binary:
195 return None
196 self.open_atos()
197 self.write_addr_to_pipe(offset)
198 self.pipe.stdin.close()
199 atos_line = self.pipe.stdout.readline().rstrip()
200 # A well-formed atos response looks like this:
201 # foo(type1, type2) (in object.name) (filename.cc:80)
202 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
203 if DEBUG:
204 print 'atos_line: ', atos_line
205 if match:
206 function_name = match.group(1)
207 function_name = re.sub('\(.*?\)', '', function_name)
208 file_name = fix_filename(match.group(3))
209 return ['%s in %s %s' % (addr, function_name, file_name)]
210 else:
211 return ['%s in %s' % (addr, atos_line)]
212
213
214 # Chain several symbolizers so that if one symbolizer fails, we fall back
215 # to the next symbolizer in chain.
216 class ChainSymbolizer(Symbolizer):
217 def __init__(self, symbolizer_list):
218 super(ChainSymbolizer, self).__init__()
219 self.symbolizer_list = symbolizer_list
220
221 def symbolize(self, addr, binary, offset):
222 """Overrides Symbolizer.symbolize."""
223 for symbolizer in self.symbolizer_list:
224 if symbolizer:
225 result = symbolizer.symbolize(addr, binary, offset)
226 if result:
227 return result
228 return None
229
230 def append_symbolizer(self, symbolizer):
231 self.symbolizer_list.append(symbolizer)
232
233
234 def BreakpadSymbolizerFactory(binary):
235 suffix = os.getenv('BREAKPAD_SUFFIX')
236 if suffix:
237 filename = binary + suffix
238 if os.access(filename, os.F_OK):
239 return BreakpadSymbolizer(filename)
240 return None
241
242
243 def SystemSymbolizerFactory(system, addr, binary):
244 if system == 'Darwin':
245 return DarwinSymbolizer(addr, binary)
246 elif system == 'Linux':
247 return Addr2LineSymbolizer(binary)
248
249
250 class BreakpadSymbolizer(Symbolizer):
251 def __init__(self, filename):
252 super(BreakpadSymbolizer, self).__init__()
253 self.filename = filename
254 lines = file(filename).readlines()
255 self.files = []
256 self.symbols = {}
257 self.address_list = []
258 self.addresses = {}
259 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
260 fragments = lines[0].rstrip().split()
261 self.arch = fragments[2]
262 self.debug_id = fragments[3]
263 self.binary = ' '.join(fragments[4:])
264 self.parse_lines(lines[1:])
265
266 def parse_lines(self, lines):
267 cur_function_addr = ''
268 for line in lines:
269 fragments = line.split()
270 if fragments[0] == 'FILE':
271 assert int(fragments[1]) == len(self.files)
272 self.files.append(' '.join(fragments[2:]))
273 elif fragments[0] == 'PUBLIC':
274 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
275 elif fragments[0] in ['CFI', 'STACK']:
276 pass
277 elif fragments[0] == 'FUNC':
278 cur_function_addr = int(fragments[1], 16)
279 if not cur_function_addr in self.symbols.keys():
280 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
281 else:
282 # Line starting with an address.
283 addr = int(fragments[0], 16)
284 self.address_list.append(addr)
285 # Tuple of symbol address, size, line, file number.
286 self.addresses[addr] = (cur_function_addr,
287 int(fragments[1], 16),
288 int(fragments[2]),
289 int(fragments[3]))
290 self.address_list.sort()
291
292 def get_sym_file_line(self, addr):
293 key = None
294 if addr in self.addresses.keys():
295 key = addr
296 else:
297 index = bisect.bisect_left(self.address_list, addr)
298 if index == 0:
299 return None
300 else:
301 key = self.address_list[index - 1]
302 sym_id, size, line_no, file_no = self.addresses[key]
303 symbol = self.symbols[sym_id]
304 filename = self.files[file_no]
305 if addr < key + size:
306 return symbol, filename, line_no
307 else:
308 return None
309
310 def symbolize(self, addr, binary, offset):
311 if self.binary != binary:
312 return None
313 res = self.get_sym_file_line(int(offset, 16))
314 if res:
315 function_name, file_name, line_no = res
316 result = ['%s in %s %s:%d' % (
317 addr, function_name, file_name, line_no)]
318 print result
319 return result
320 else:
321 return None
322
323
324 class SymbolizationLoop(object):
325 def __init__(self, binary_name_filter=None):
326 # Used by clients who may want to supply a different binary name.
327 # E.g. in Chrome several binaries may share a single .dSYM.
328 self.binary_name_filter = binary_name_filter
329 self.system = os.uname()[0]
330 if self.system in ['Linux', 'Darwin']:
331 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
332 else:
333 raise Exception('Unknown system')
334
335 def symbolize_address(self, addr, binary, offset):
336 # Use the chain of symbolizers:
337 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
338 # (fall back to next symbolizer if the previous one fails).
339 if not binary in symbolizers:
340 symbolizers[binary] = ChainSymbolizer(
341 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
342 result = symbolizers[binary].symbolize(addr, binary, offset)
343 if result is None:
344 # Initialize system symbolizer only if other symbolizers failed.
345 symbolizers[binary].append_symbolizer(
346 SystemSymbolizerFactory(self.system, addr, binary))
347 result = symbolizers[binary].symbolize(addr, binary, offset)
348 # The system symbolizer must produce some result.
349 assert result
350 return result
351
352 def print_symbolized_lines(self, symbolized_lines):
353 if not symbolized_lines:
354 print self.current_line
355 else:
356 for symbolized_frame in symbolized_lines:
357 print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
358 self.frame_no += 1
359
360 def process_stdin(self):
361 self.frame_no = 0
362 for line in sys.stdin:
363 self.current_line = line.rstrip()
364 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
365 stack_trace_line_format = (
366 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
367 match = re.match(stack_trace_line_format, line)
368 if not match:
369 print self.current_line
370 continue
371 if DEBUG:
372 print line
373 _, frameno_str, addr, binary, offset = match.groups()
374 if frameno_str == '0':
375 # Assume that frame #0 is the first frame of new stack trace.
376 self.frame_no = 0
377 original_binary = binary
378 if self.binary_name_filter:
379 binary = self.binary_name_filter(binary)
380 symbolized_line = self.symbolize_address(addr, binary, offset)
381 if not symbolized_line:
382 if original_binary != binary:
383 symbolized_line = self.symbolize_address(addr, binary, offset)
384 self.print_symbolized_lines(symbolized_line)
385
386
387 if __name__ == '__main__':
388 loop = SymbolizationLoop()
389 loop.process_stdin()
OLDNEW
« tools/valgrind/asan/asan_symbolize.py ('K') | « tools/valgrind/asan/asan_symbolize.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698