Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(185)

Side by Side Diff: build/android/pylib/symbols/elf_symbolizer.py

Issue 339853004: binary_size_tool: fix for ambiguous addr2line output (Closed) Base URL: https://chromium.googlesource.com/chromium/src@master
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2014 The Chromium Authors. All rights reserved. 1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import collections 5 import collections
6 import datetime 6 import datetime
7 import logging 7 import logging
8 import multiprocessing 8 import multiprocessing
9 import os 9 import os
10 import posixpath 10 import posixpath
11 import Queue 11 import Queue
12 import re 12 import re
13 import subprocess 13 import subprocess
14 import sys 14 import sys
15 import threading 15 import threading
16 16 from sets import Set
17 17
18 # addr2line builds a possibly infinite memory cache that can exhaust 18 # addr2line builds a possibly infinite memory cache that can exhaust
19 # the computer's memory if allowed to grow for too long. This constant 19 # the computer's memory if allowed to grow for too long. This constant
20 # controls how many lookups we do before restarting the process. 4000 20 # controls how many lookups we do before restarting the process. 4000
21 # gives near peak performance without extreme memory usage. 21 # gives near peak performance without extreme memory usage.
22 ADDR2LINE_RECYCLE_LIMIT = 4000 22 ADDR2LINE_RECYCLE_LIMIT = 4000
23 23
24 24
25 class ELFSymbolizer(object): 25 class ELFSymbolizer(object):
26 """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer. 26 """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer.
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
68 |max_queue_size| bound, a new addr2line instance is kicked in. 68 |max_queue_size| bound, a new addr2line instance is kicked in.
69 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances 69 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances
70 have a backlog of |max_queue_size|), back-pressure is applied on the caller by 70 have a backlog of |max_queue_size|), back-pressure is applied on the caller by
71 blocking the SymbolizeAsync method. 71 blocking the SymbolizeAsync method.
72 72
73 This module has been deliberately designed to be dependency free (w.r.t. of 73 This module has been deliberately designed to be dependency free (w.r.t. of
74 other modules in this project), to allow easy reuse in external projects. 74 other modules in this project), to allow easy reuse in external projects.
75 """ 75 """
76 76
77 def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, 77 def __init__(self, elf_file_path, addr2line_path, callback, inlines=False,
78 max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50): 78 max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50,
79 disambiguate=False, disambiguation_source_path=''):
79 """Args: 80 """Args:
80 elf_file_path: path of the elf file to be symbolized. 81 elf_file_path: path of the elf file to be symbolized.
81 addr2line_path: path of the toolchain's addr2line binary. 82 addr2line_path: path of the toolchain's addr2line binary.
82 callback: a callback which will be invoked for each resolved symbol with 83 callback: a callback which will be invoked for each resolved symbol with
83 the two args (sym_info, callback_arg). The former is an instance of 84 the two args (sym_info, callback_arg). The former is an instance of
84 |ELFSymbolInfo| and contains the symbol information. The latter is an 85 |ELFSymbolInfo| and contains the symbol information. The latter is an
85 embedder-provided argument which is passed to SymbolizeAsync(). 86 embedder-provided argument which is passed to SymbolizeAsync().
86 inlines: when True, the ELFSymbolInfo will contain also the details about 87 inlines: when True, the ELFSymbolInfo will contain also the details about
87 the outer inlining functions. When False, only the innermost function 88 the outer inlining functions. When False, only the innermost function
88 will be provided. 89 will be provided.
89 max_concurrent_jobs: Max number of addr2line instances spawned. 90 max_concurrent_jobs: Max number of addr2line instances spawned.
90 Parallelize responsibly, addr2line is a memory and I/O monster. 91 Parallelize responsibly, addr2line is a memory and I/O monster.
91 max_queue_size: Max number of outstanding requests per addr2line instance. 92 max_queue_size: Max number of outstanding requests per addr2line instance.
92 addr2line_timeout: Max time (in seconds) to wait for a addr2line response. 93 addr2line_timeout: Max time (in seconds) to wait for a addr2line response.
93 After the timeout, the instance will be considered hung and respawned. 94 After the timeout, the instance will be considered hung and respawned.
95 disambiguate: Whether to run a disambiguation process or not.
96 Disambiguation means to resolve ambigious source_paths,
Andrew Hayden (chromium.org) 2014/06/17 11:36:27 ambigious -> ambiguous (Spelling) You may also wa
97 example turn addr2line output "unicode.cc" into a full and absolute
Andrew Hayden (chromium.org) 2014/06/17 11:36:27 "for example" instead of "example"
98 path
99 disambiguate_source_path: The path of the source code that the
Andrew Hayden (chromium.org) 2014/06/17 11:36:27 I'd say: the path to the directory where source fi
100 disambiguation will lookup source files
94 """ 101 """
95 assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path 102 assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path
96 self.elf_file_path = elf_file_path 103 self.elf_file_path = elf_file_path
97 self.addr2line_path = addr2line_path 104 self.addr2line_path = addr2line_path
98 self.callback = callback 105 self.callback = callback
99 self.inlines = inlines 106 self.inlines = inlines
100 self.max_concurrent_jobs = (max_concurrent_jobs or 107 self.max_concurrent_jobs = (max_concurrent_jobs or
101 min(multiprocessing.cpu_count(), 4)) 108 min(multiprocessing.cpu_count(), 4))
102 self.max_queue_size = max_queue_size 109 self.max_queue_size = max_queue_size
103 self.addr2line_timeout = addr2line_timeout 110 self.addr2line_timeout = addr2line_timeout
104 self.requests_counter = 0 # For generating monotonic request IDs. 111 self.requests_counter = 0 # For generating monotonic request IDs.
105 self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. 112 self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst.
106 113
114 # If necessary, create disambiguation lookup table
115 self.disambiguate = disambiguate
116 self.commonprefix = ''
117 self.lookup_table = {}
118 if(self.disambiguate):
119 self._CreateDisambiguationTable(disambiguation_source_path)
120
107 # Create one addr2line instance. More instances will be created on demand 121 # Create one addr2line instance. More instances will be created on demand
108 # (up to |max_concurrent_jobs|) depending on the rate of the requests. 122 # (up to |max_concurrent_jobs|) depending on the rate of the requests.
109 self._CreateNewA2LInstance() 123 self._CreateNewA2LInstance()
110 124
111 def SymbolizeAsync(self, addr, callback_arg=None): 125 def SymbolizeAsync(self, addr, callback_arg=None):
112 """Requests symbolization of a given address. 126 """Requests symbolization of a given address.
113 127
114 This method is not guaranteed to return immediately. It generally does, but 128 This method is not guaranteed to return immediately. It generally does, but
115 in some scenarios (e.g. all addr2line instances have full queues) it can 129 in some scenarios (e.g. all addr2line instances have full queues) it can
116 block to create back-pressure. 130 block to create back-pressure.
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 for a2l in self._a2l_instances: 168 for a2l in self._a2l_instances:
155 a2l.WaitForIdle() 169 a2l.WaitForIdle()
156 a2l.Terminate() 170 a2l.Terminate()
157 171
158 def _CreateNewA2LInstance(self): 172 def _CreateNewA2LInstance(self):
159 assert(len(self._a2l_instances) < self.max_concurrent_jobs) 173 assert(len(self._a2l_instances) < self.max_concurrent_jobs)
160 a2l = ELFSymbolizer.Addr2Line(self) 174 a2l = ELFSymbolizer.Addr2Line(self)
161 self._a2l_instances.append(a2l) 175 self._a2l_instances.append(a2l)
162 return a2l 176 return a2l
163 177
178 def _CreateDisambiguationTable(self, src_root_path):
179 """ Creates a table of files used for disambiguation later
180 Disambiguation:
181 addr2line sometimes return an ambigous file-name rather than the
182 full path of the file where the symbol is located.
183
184 adopted from andrewhaydens implementation in earlier commits """
185 interesting_file_endings = { ".c", ".cc", ".h", ".cp", ".cpp", ".cxx",
186 ".c++", ".asm", ".inc", ".s", ".hxx" }
187 duplicates = Set()
188 self.lookup_table = {}
189 src_root_path = os.path.abspath(src_root_path)
190
191 for root, _, filenames in os.walk(src_root_path):
192 for f in filenames:
193 _, ext = os.path.splitext(f)
194 if not ext in interesting_file_endings:
195 continue
196
197 base = os.path.basename(f) # Just in case
198 if self.lookup_table.get(base) is None:
199 self.lookup_table[base] = "%s/%s" % (root, f)
200 else:
201 duplicates.add(base)
202
203 # Duplicates can not be used for disambiguation, as we can not determine
204 # the true source if we have more than one to choose from
205 for d in duplicates:
206 del self.lookup_table[d]
207
208 # Get the common prefix for the source paths
209 self.commonprefix = os.path.commonprefix(self.lookup_table.values())
164 210
165 class Addr2Line(object): 211 class Addr2Line(object):
166 """A python wrapper around an addr2line instance. 212 """A python wrapper around an addr2line instance.
167 213
168 The communication with the addr2line process looks as follows: 214 The communication with the addr2line process looks as follows:
169 [STDIN] [STDOUT] (from addr2line's viewpoint) 215 [STDIN] [STDOUT] (from addr2line's viewpoint)
170 > f001111 216 > f001111
171 > f002222 217 > f002222
172 < Symbol::Name(foo, bar) for f001111 218 < Symbol::Name(foo, bar) for f001111
173 < /path/to/source/file.c:line_number 219 < /path/to/source/file.c:line_number
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
299 innermost_sym_info = None 345 innermost_sym_info = None
300 sym_info = None 346 sym_info = None
301 for (line1, line2) in lines: 347 for (line1, line2) in lines:
302 prev_sym_info = sym_info 348 prev_sym_info = sym_info
303 name = line1 if not line1.startswith('?') else None 349 name = line1 if not line1.startswith('?') else None
304 source_path = None 350 source_path = None
305 source_line = None 351 source_line = None
306 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2) 352 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2)
307 if m: 353 if m:
308 if not m.group(1).startswith('?'): 354 if not m.group(1).startswith('?'):
309 source_path = m.group(1) 355 source_path = os.path.abspath(m.group(1))
310 if not m.group(2).startswith('?'): 356 if not m.group(2).startswith('?'):
311 source_line = int(m.group(2)) 357 source_line = int(m.group(2))
312 else: 358 else:
313 logging.warning('Got invalid symbol path from addr2line: %s' % line2) 359 logging.warning('Got invalid symbol path from addr2line: %s' % line2)
314 360
315 sym_info = ELFSymbolInfo(name, source_path, source_line) 361 # In case disambiguation is on, and needed
362 disambiguated = False
363 if self._symbolizer.disambiguate:
364 # Strip the common prefix to determine whether the source is ambigous
Andrew Hayden (chromium.org) 2014/06/17 11:36:27 "ambiguous"
365 # or not (we assume that there are no source files in the common
366 # prefix of the paths)
367 if not source_path is None:
368 common_prefix = self._symbolizer.commonprefix
369 if source_path.startswith(common_prefix):
370 path = source_path[len(common_prefix):]
371 # In case no '/' character is found in this "relative" path
372 # it is most likely that addr2line did not get the whole path
373 if not path.find('/') != -1:
Andrew Hayden (chromium.org) 2014/06/17 11:36:27 Double negation is hard to reason about. How about
374 source_path = self._symbolizer.lookup_table.get(path)
375 disambiguated = True
376
377 sym_info = ELFSymbolInfo(name, source_path, source_line, disambiguated)
316 if prev_sym_info: 378 if prev_sym_info:
317 prev_sym_info.inlined_by = sym_info 379 prev_sym_info.inlined_by = sym_info
318 if not innermost_sym_info: 380 if not innermost_sym_info:
319 innermost_sym_info = sym_info 381 innermost_sym_info = sym_info
320 382
321 self._processed_symbols_count += 1 383 self._processed_symbols_count += 1
322 self._symbolizer.callback(innermost_sym_info, callback_arg) 384 self._symbolizer.callback(innermost_sym_info, callback_arg)
323 385
324 def _RestartAddr2LineProcess(self): 386 def _RestartAddr2LineProcess(self):
325 if self._proc: 387 if self._proc:
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
386 448
387 @property 449 @property
388 def first_request_id(self): 450 def first_request_id(self):
389 """Returns the request_id of the oldest pending request in the queue.""" 451 """Returns the request_id of the oldest pending request in the queue."""
390 return self._request_queue[0][2] if self._request_queue else 0 452 return self._request_queue[0][2] if self._request_queue else 0
391 453
392 454
393 class ELFSymbolInfo(object): 455 class ELFSymbolInfo(object):
394 """The result of the symbolization passed as first arg. of each callback.""" 456 """The result of the symbolization passed as first arg. of each callback."""
395 457
396 def __init__(self, name, source_path, source_line): 458 def __init__(self, name, source_path, source_line, disambiguated=False):
397 """All the fields here can be None (if addr2line replies with '??').""" 459 """All the fields here can be None (if addr2line replies with '??')."""
398 self.name = name 460 self.name = name
399 self.source_path = source_path 461 self.source_path = source_path
400 self.source_line = source_line 462 self.source_line = source_line
401 # In the case of |inlines|=True, the |inlined_by| points to the outer 463 # In the case of |inlines|=True, the |inlined_by| points to the outer
402 # function inlining the current one (and so on, to form a chain). 464 # function inlining the current one (and so on, to form a chain).
403 self.inlined_by = None 465 self.inlined_by = None
466 self.disambiguated = disambiguated
404 467
405 def __str__(self): 468 def __str__(self):
406 return '%s [%s:%d]' % ( 469 return '%s [%s:%d]' % (
407 self.name or '??', self.source_path or '??', self.source_line or 0) 470 self.name or '??', self.source_path or '??', self.source_line or 0)
OLDNEW
« no previous file with comments | « no previous file | build/android/pylib/symbols/elf_symbolizer_unittest.py » ('j') | tools/binary_size/run_binary_size_analysis.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698