 Chromium Code Reviews
 Chromium Code Reviews Issue 339853004:
  binary_size_tool: fix for ambiguous addr2line output  (Closed) 
  Base URL: https://chromium.googlesource.com/chromium/src@master
    
  
    Issue 339853004:
  binary_size_tool: fix for ambiguous addr2line output  (Closed) 
  Base URL: https://chromium.googlesource.com/chromium/src@master| OLD | NEW | 
|---|---|
| 1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright 2014 The Chromium Authors. All rights reserved. | 
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be | 
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. | 
| 4 | 4 | 
| 5 import collections | 5 import collections | 
| 6 import datetime | 6 import datetime | 
| 7 import logging | 7 import logging | 
| 8 import multiprocessing | 8 import multiprocessing | 
| 9 import os | 9 import os | 
| 10 import posixpath | 10 import posixpath | 
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 68 |max_queue_size| bound, a new addr2line instance is kicked in. | 68 |max_queue_size| bound, a new addr2line instance is kicked in. | 
| 69 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances | 69 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances | 
| 70 have a backlog of |max_queue_size|), back-pressure is applied on the caller by | 70 have a backlog of |max_queue_size|), back-pressure is applied on the caller by | 
| 71 blocking the SymbolizeAsync method. | 71 blocking the SymbolizeAsync method. | 
| 72 | 72 | 
| 73 This module has been deliberately designed to be dependency free (w.r.t. of | 73 This module has been deliberately designed to be dependency free (w.r.t. of | 
| 74 other modules in this project), to allow easy reuse in external projects. | 74 other modules in this project), to allow easy reuse in external projects. | 
| 75 """ | 75 """ | 
| 76 | 76 | 
| 77 def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, | 77 def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, | 
| 78 max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50): | 78 max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50, | 
| 79 source_root_path=None, strip_base_path=None): | |
| 79 """Args: | 80 """Args: | 
| 80 elf_file_path: path of the elf file to be symbolized. | 81 elf_file_path: path of the elf file to be symbolized. | 
| 81 addr2line_path: path of the toolchain's addr2line binary. | 82 addr2line_path: path of the toolchain's addr2line binary. | 
| 82 callback: a callback which will be invoked for each resolved symbol with | 83 callback: a callback which will be invoked for each resolved symbol with | 
| 83 the two args (sym_info, callback_arg). The former is an instance of | 84 the two args (sym_info, callback_arg). The former is an instance of | 
| 84 |ELFSymbolInfo| and contains the symbol information. The latter is an | 85 |ELFSymbolInfo| and contains the symbol information. The latter is an | 
| 85 embedder-provided argument which is passed to SymbolizeAsync(). | 86 embedder-provided argument which is passed to SymbolizeAsync(). | 
| 86 inlines: when True, the ELFSymbolInfo will contain also the details about | 87 inlines: when True, the ELFSymbolInfo will contain also the details about | 
| 87 the outer inlining functions. When False, only the innermost function | 88 the outer inlining functions. When False, only the innermost function | 
| 88 will be provided. | 89 will be provided. | 
| 89 max_concurrent_jobs: Max number of addr2line instances spawned. | 90 max_concurrent_jobs: Max number of addr2line instances spawned. | 
| 90 Parallelize responsibly, addr2line is a memory and I/O monster. | 91 Parallelize responsibly, addr2line is a memory and I/O monster. | 
| 91 max_queue_size: Max number of outstanding requests per addr2line instance. | 92 max_queue_size: Max number of outstanding requests per addr2line instance. | 
| 92 addr2line_timeout: Max time (in seconds) to wait for a addr2line response. | 93 addr2line_timeout: Max time (in seconds) to wait for a addr2line response. | 
| 93 After the timeout, the instance will be considered hung and respawned. | 94 After the timeout, the instance will be considered hung and respawned. | 
| 95 source_root_path: The path to the library source code. Which is used in | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:58
Nit: s/. Which//
 | |
| 96 the disambiguation process. Disambiguation means to resolve ambiguous | |
| 97 source paths, for example turn addr2line output "unicode.cc" into a | |
| 98 full and absolute path. In some toolchains only the name of the source | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:59
Uh, what is the difference between a "full" and a
 | |
| 99 file is output, without any path information; disambiguation searches | |
| 100 through the source directory specified by |source_root_path| argument | |
| 101 for files whose name matches. If there are multiple files with the | |
| 102 same name, disambiguation will fail. | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:58
Can we reword this a bit and make it a bit more cl
 | |
| 103 strip_base_path: If this base path is found in the beginning of any | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:58
"rebases the symbols source paths onto |source_roo
 | |
| 104 result, we strip it and replace it with |source_root_path|. | |
| 94 """ | 105 """ | 
| 95 assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path | 106 assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path | 
| 96 self.elf_file_path = elf_file_path | 107 self.elf_file_path = elf_file_path | 
| 97 self.addr2line_path = addr2line_path | 108 self.addr2line_path = addr2line_path | 
| 98 self.callback = callback | 109 self.callback = callback | 
| 99 self.inlines = inlines | 110 self.inlines = inlines | 
| 100 self.max_concurrent_jobs = (max_concurrent_jobs or | 111 self.max_concurrent_jobs = (max_concurrent_jobs or | 
| 101 min(multiprocessing.cpu_count(), 4)) | 112 min(multiprocessing.cpu_count(), 4)) | 
| 102 self.max_queue_size = max_queue_size | 113 self.max_queue_size = max_queue_size | 
| 103 self.addr2line_timeout = addr2line_timeout | 114 self.addr2line_timeout = addr2line_timeout | 
| 104 self.requests_counter = 0 # For generating monotonic request IDs. | 115 self.requests_counter = 0 # For generating monotonic request IDs. | 
| 105 self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. | 116 self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. | 
| 106 | 117 | 
| 118 # If necessary, create disambiguation lookup table | |
| 119 self.disambiguate = source_root_path is not None | |
| 120 self.lookup_table = {} | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:59
What about:
self.disambiguation_table = {}  # 'foo
 | |
| 121 self.source_root_path = source_root_path | |
| 122 self.strip_base_path = strip_base_path | |
| 123 if(self.disambiguate): | |
| 124 self._CreateDisambiguationTable() | |
| 125 | |
| 107 # Create one addr2line instance. More instances will be created on demand | 126 # Create one addr2line instance. More instances will be created on demand | 
| 108 # (up to |max_concurrent_jobs|) depending on the rate of the requests. | 127 # (up to |max_concurrent_jobs|) depending on the rate of the requests. | 
| 109 self._CreateNewA2LInstance() | 128 self._CreateNewA2LInstance() | 
| 110 | 129 | 
| 111 def SymbolizeAsync(self, addr, callback_arg=None): | 130 def SymbolizeAsync(self, addr, callback_arg=None): | 
| 112 """Requests symbolization of a given address. | 131 """Requests symbolization of a given address. | 
| 113 | 132 | 
| 114 This method is not guaranteed to return immediately. It generally does, but | 133 This method is not guaranteed to return immediately. It generally does, but | 
| 115 in some scenarios (e.g. all addr2line instances have full queues) it can | 134 in some scenarios (e.g. all addr2line instances have full queues) it can | 
| 116 block to create back-pressure. | 135 block to create back-pressure. | 
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 154 for a2l in self._a2l_instances: | 173 for a2l in self._a2l_instances: | 
| 155 a2l.WaitForIdle() | 174 a2l.WaitForIdle() | 
| 156 a2l.Terminate() | 175 a2l.Terminate() | 
| 157 | 176 | 
| 158 def _CreateNewA2LInstance(self): | 177 def _CreateNewA2LInstance(self): | 
| 159 assert(len(self._a2l_instances) < self.max_concurrent_jobs) | 178 assert(len(self._a2l_instances) < self.max_concurrent_jobs) | 
| 160 a2l = ELFSymbolizer.Addr2Line(self) | 179 a2l = ELFSymbolizer.Addr2Line(self) | 
| 161 self._a2l_instances.append(a2l) | 180 self._a2l_instances.append(a2l) | 
| 162 return a2l | 181 return a2l | 
| 163 | 182 | 
| 183 def _CreateDisambiguationTable(self): | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:59
Just add a one line comment please:
"""Non-unique
 | |
| 184 self.lookup_table = {} | |
| 185 source_root_path = os.path.abspath(self.source_root_path) | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:59
I'd do this on line 121 and save one line:
self.so
 | |
| 186 | |
| 187 for root, _, filenames in os.walk(source_root_path): | |
| 188 for f in filenames: | |
| 189 self.lookup_table[f] = os.path.join(root, f) if (f not in | |
| 190 self.lookup_table) else None | |
| 191 | |
| 164 | 192 | 
| 165 class Addr2Line(object): | 193 class Addr2Line(object): | 
| 166 """A python wrapper around an addr2line instance. | 194 """A python wrapper around an addr2line instance. | 
| 167 | 195 | 
| 168 The communication with the addr2line process looks as follows: | 196 The communication with the addr2line process looks as follows: | 
| 169 [STDIN] [STDOUT] (from addr2line's viewpoint) | 197 [STDIN] [STDOUT] (from addr2line's viewpoint) | 
| 170 > f001111 | 198 > f001111 | 
| 171 > f002222 | 199 > f002222 | 
| 172 < Symbol::Name(foo, bar) for f001111 | 200 < Symbol::Name(foo, bar) for f001111 | 
| 173 < /path/to/source/file.c:line_number | 201 < /path/to/source/file.c:line_number | 
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 305 source_line = None | 333 source_line = None | 
| 306 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2) | 334 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2) | 
| 307 if m: | 335 if m: | 
| 308 if not m.group(1).startswith('?'): | 336 if not m.group(1).startswith('?'): | 
| 309 source_path = m.group(1) | 337 source_path = m.group(1) | 
| 310 if not m.group(2).startswith('?'): | 338 if not m.group(2).startswith('?'): | 
| 311 source_line = int(m.group(2)) | 339 source_line = int(m.group(2)) | 
| 312 else: | 340 else: | 
| 313 logging.warning('Got invalid symbol path from addr2line: %s' % line2) | 341 logging.warning('Got invalid symbol path from addr2line: %s' % line2) | 
| 314 | 342 | 
| 315 sym_info = ELFSymbolInfo(name, source_path, source_line) | 343 # In case disambiguation is on, and needed | 
| 344 was_ambiguous = False | |
| 345 disambiguated = False | |
| 346 if self._symbolizer.disambiguate: | |
| 347 if source_path and not source_path.startswith('/'): | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:58
Probably you can just use: posixpath.isabs(source_
 | |
| 348 source_path = self._symbolizer.lookup_table.get(source_path) | |
| 349 was_ambiguous = True | |
| 350 disambiguated = source_path is not None | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:59
Just a doubt: at this point if a2l here returns 'f
 | |
| 351 | |
| 352 if source_path and self._symbolizer.strip_base_path: | |
| 353 # Strip the base path | |
| 354 source_path = re.sub('^' + self._symbolizer.strip_base_path, | |
| 355 self._symbolizer.source_root_path or '', source_path) | |
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:58
Nit: indentation should be 4 spaces here, or inlin
 | |
| 356 | |
| 357 sym_info = ELFSymbolInfo(name, source_path, source_line, was_ambiguous, | |
| 358 disambiguated) | |
| 316 if prev_sym_info: | 359 if prev_sym_info: | 
| 317 prev_sym_info.inlined_by = sym_info | 360 prev_sym_info.inlined_by = sym_info | 
| 318 if not innermost_sym_info: | 361 if not innermost_sym_info: | 
| 319 innermost_sym_info = sym_info | 362 innermost_sym_info = sym_info | 
| 320 | 363 | 
| 321 self._processed_symbols_count += 1 | 364 self._processed_symbols_count += 1 | 
| 322 self._symbolizer.callback(innermost_sym_info, callback_arg) | 365 self._symbolizer.callback(innermost_sym_info, callback_arg) | 
| 323 | 366 | 
| 324 def _RestartAddr2LineProcess(self): | 367 def _RestartAddr2LineProcess(self): | 
| 325 if self._proc: | 368 if self._proc: | 
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 386 | 429 | 
| 387 @property | 430 @property | 
| 388 def first_request_id(self): | 431 def first_request_id(self): | 
| 389 """Returns the request_id of the oldest pending request in the queue.""" | 432 """Returns the request_id of the oldest pending request in the queue.""" | 
| 390 return self._request_queue[0][2] if self._request_queue else 0 | 433 return self._request_queue[0][2] if self._request_queue else 0 | 
| 391 | 434 | 
| 392 | 435 | 
| 393 class ELFSymbolInfo(object): | 436 class ELFSymbolInfo(object): | 
| 394 """The result of the symbolization passed as first arg. of each callback.""" | 437 """The result of the symbolization passed as first arg. of each callback.""" | 
| 395 | 438 | 
| 396 def __init__(self, name, source_path, source_line): | 439 def __init__(self, name, source_path, source_line, was_ambiguous=False, | 
| 440 disambiguated=False): | |
| 397 """All the fields here can be None (if addr2line replies with '??').""" | 441 """All the fields here can be None (if addr2line replies with '??').""" | 
| 398 self.name = name | 442 self.name = name | 
| 399 self.source_path = source_path | 443 self.source_path = (None if source_path is None else | 
| 
Primiano Tucci (use gerrit)
2014/06/25 09:02:58
why do you need this logic (the abs path) here? Di
 | |
| 444 os.path.abspath(source_path)) | |
| 400 self.source_line = source_line | 445 self.source_line = source_line | 
| 401 # In the case of |inlines|=True, the |inlined_by| points to the outer | 446 # In the case of |inlines|=True, the |inlined_by| points to the outer | 
| 402 # function inlining the current one (and so on, to form a chain). | 447 # function inlining the current one (and so on, to form a chain). | 
| 403 self.inlined_by = None | 448 self.inlined_by = None | 
| 449 self.disambiguated = disambiguated | |
| 450 self.was_ambiguous = was_ambiguous | |
| 404 | 451 | 
| 405 def __str__(self): | 452 def __str__(self): | 
| 406 return '%s [%s:%d]' % ( | 453 return '%s [%s:%d]' % ( | 
| 407 self.name or '??', self.source_path or '??', self.source_line or 0) | 454 self.name or '??', self.source_path or '??', self.source_line or 0) | 
| OLD | NEW |