OLD | NEW |
---|---|
1 # Copyright 2014 The Chromium Authors. All rights reserved. | 1 # Copyright 2014 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import collections | 5 import collections |
6 import datetime | 6 import datetime |
7 import logging | 7 import logging |
8 import multiprocessing | 8 import multiprocessing |
9 import os | 9 import os |
10 import posixpath | 10 import posixpath |
11 import Queue | 11 import Queue |
12 import re | 12 import re |
13 import subprocess | 13 import subprocess |
14 import sys | 14 import sys |
15 import threading | 15 import threading |
16 | 16 |
17 | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
Nit: Keep the extra blankline here. Code styles pr
| |
18 # addr2line builds a possibly infinite memory cache that can exhaust | 17 # addr2line builds a possibly infinite memory cache that can exhaust |
19 # the computer's memory if allowed to grow for too long. This constant | 18 # the computer's memory if allowed to grow for too long. This constant |
20 # controls how many lookups we do before restarting the process. 4000 | 19 # controls how many lookups we do before restarting the process. 4000 |
21 # gives near peak performance without extreme memory usage. | 20 # gives near peak performance without extreme memory usage. |
22 ADDR2LINE_RECYCLE_LIMIT = 4000 | 21 ADDR2LINE_RECYCLE_LIMIT = 4000 |
23 | 22 |
24 | 23 |
25 class ELFSymbolizer(object): | 24 class ELFSymbolizer(object): |
26 """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer. | 25 """An uber-fast (multiprocessing, pipelined and asynchronous) ELF symbolizer. |
27 | 26 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
68 |max_queue_size| bound, a new addr2line instance is kicked in. | 67 |max_queue_size| bound, a new addr2line instance is kicked in. |
69 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances | 68 In the case of a very eager producer (i.e. all |max_concurrent_jobs| instances |
70 have a backlog of |max_queue_size|), back-pressure is applied on the caller by | 69 have a backlog of |max_queue_size|), back-pressure is applied on the caller by |
71 blocking the SymbolizeAsync method. | 70 blocking the SymbolizeAsync method. |
72 | 71 |
73 This module has been deliberately designed to be dependency free (w.r.t. of | 72 This module has been deliberately designed to be dependency free (w.r.t. of |
74 other modules in this project), to allow easy reuse in external projects. | 73 other modules in this project), to allow easy reuse in external projects. |
75 """ | 74 """ |
76 | 75 |
77 def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, | 76 def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, |
78 max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50): | 77 max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50, |
78 source_root_path=None, strip_base_path=None): | |
79 """Args: | 79 """Args: |
80 elf_file_path: path of the elf file to be symbolized. | 80 elf_file_path: path of the elf file to be symbolized. |
81 addr2line_path: path of the toolchain's addr2line binary. | 81 addr2line_path: path of the toolchain's addr2line binary. |
82 callback: a callback which will be invoked for each resolved symbol with | 82 callback: a callback which will be invoked for each resolved symbol with |
83 the two args (sym_info, callback_arg). The former is an instance of | 83 the two args (sym_info, callback_arg). The former is an instance of |
84 |ELFSymbolInfo| and contains the symbol information. The latter is an | 84 |ELFSymbolInfo| and contains the symbol information. The latter is an |
85 embedder-provided argument which is passed to SymbolizeAsync(). | 85 embedder-provided argument which is passed to SymbolizeAsync(). |
86 inlines: when True, the ELFSymbolInfo will contain also the details about | 86 inlines: when True, the ELFSymbolInfo will contain also the details about |
87 the outer inlining functions. When False, only the innermost function | 87 the outer inlining functions. When False, only the innermost function |
88 will be provided. | 88 will be provided. |
89 max_concurrent_jobs: Max number of addr2line instances spawned. | 89 max_concurrent_jobs: Max number of addr2line instances spawned. |
90 Parallelize responsibly, addr2line is a memory and I/O monster. | 90 Parallelize responsibly, addr2line is a memory and I/O monster. |
91 max_queue_size: Max number of outstanding requests per addr2line instance. | 91 max_queue_size: Max number of outstanding requests per addr2line instance. |
92 addr2line_timeout: Max time (in seconds) to wait for a addr2line response. | 92 addr2line_timeout: Max time (in seconds) to wait for a addr2line response. |
93 After the timeout, the instance will be considered hung and respawned. | 93 After the timeout, the instance will be considered hung and respawned. |
94 disambiguate: Whether to run a disambiguation process or not. | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
Please update the doc here with the renamed variab
| |
95 Disambiguation means to resolve ambiguous source_paths, for | |
96 example turn addr2line output "unicode.cc" into a full and absolute | |
97 path. In some toolchains only the name of the source file is output, | |
98 without any path information; disambiguation searches through the | |
99 source directory specified by 'disambiguate_source_path' argument | |
100 for files whose name matches. If there are multiple files with the | |
101 same name, disambiguation will fail. | |
102 disambiguate_source_path: The path to the directory where the source | |
103 files are located, used for disambiguating paths. | |
94 """ | 104 """ |
95 assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path | 105 assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path |
96 self.elf_file_path = elf_file_path | 106 self.elf_file_path = elf_file_path |
97 self.addr2line_path = addr2line_path | 107 self.addr2line_path = addr2line_path |
98 self.callback = callback | 108 self.callback = callback |
99 self.inlines = inlines | 109 self.inlines = inlines |
100 self.max_concurrent_jobs = (max_concurrent_jobs or | 110 self.max_concurrent_jobs = (max_concurrent_jobs or |
101 min(multiprocessing.cpu_count(), 4)) | 111 min(multiprocessing.cpu_count(), 4)) |
102 self.max_queue_size = max_queue_size | 112 self.max_queue_size = max_queue_size |
103 self.addr2line_timeout = addr2line_timeout | 113 self.addr2line_timeout = addr2line_timeout |
104 self.requests_counter = 0 # For generating monotonic request IDs. | 114 self.requests_counter = 0 # For generating monotonic request IDs. |
105 self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. | 115 self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. |
106 | 116 |
117 # If necessary, create disambiguation lookup table | |
118 self.disambiguate = source_root_path is not None | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
You don't seem to make any use of self.disambiguat
| |
119 self.lookup_table = {} | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
nit: _lookup_table (add _ prefix) as this is a pri
| |
120 self.source_root_path = source_root_path | |
121 self.strip_base_path = strip_base_path | |
122 if(self.disambiguate): | |
123 self._CreateDisambiguationTable(source_root_path) | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
Ehm, if you store source_root_path as a field into
| |
124 | |
107 # Create one addr2line instance. More instances will be created on demand | 125 # Create one addr2line instance. More instances will be created on demand |
108 # (up to |max_concurrent_jobs|) depending on the rate of the requests. | 126 # (up to |max_concurrent_jobs|) depending on the rate of the requests. |
109 self._CreateNewA2LInstance() | 127 self._CreateNewA2LInstance() |
110 | 128 |
111 def SymbolizeAsync(self, addr, callback_arg=None): | 129 def SymbolizeAsync(self, addr, callback_arg=None): |
112 """Requests symbolization of a given address. | 130 """Requests symbolization of a given address. |
113 | 131 |
114 This method is not guaranteed to return immediately. It generally does, but | 132 This method is not guaranteed to return immediately. It generally does, but |
115 in some scenarios (e.g. all addr2line instances have full queues) it can | 133 in some scenarios (e.g. all addr2line instances have full queues) it can |
116 block to create back-pressure. | 134 block to create back-pressure. |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
154 for a2l in self._a2l_instances: | 172 for a2l in self._a2l_instances: |
155 a2l.WaitForIdle() | 173 a2l.WaitForIdle() |
156 a2l.Terminate() | 174 a2l.Terminate() |
157 | 175 |
158 def _CreateNewA2LInstance(self): | 176 def _CreateNewA2LInstance(self): |
159 assert(len(self._a2l_instances) < self.max_concurrent_jobs) | 177 assert(len(self._a2l_instances) < self.max_concurrent_jobs) |
160 a2l = ELFSymbolizer.Addr2Line(self) | 178 a2l = ELFSymbolizer.Addr2Line(self) |
161 self._a2l_instances.append(a2l) | 179 self._a2l_instances.append(a2l) |
162 return a2l | 180 return a2l |
163 | 181 |
182 def _CreateDisambiguationTable(self, src_root_path): | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
Is there a reason why you couldn't use those 4 lin
| |
183 """ Creates a table of files used for disambiguation later | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
I think this comment is pleonastic.
It tells that
| |
184 adopted from andrewhaydens implementation in earlier commits """ | |
185 duplicates = set() | |
186 self.lookup_table = {} | |
187 src_root_path = os.path.abspath(src_root_path) | |
188 | |
189 for root, _, filenames in os.walk(src_root_path): | |
190 for f in filenames: | |
191 base = os.path.basename(f) # Just in case | |
192 if self.lookup_table.get(base) is None: | |
193 self.lookup_table[base] = os.path.join(root, f) | |
194 else: | |
195 duplicates.add(base) | |
196 | |
197 # Duplicates can not be used for disambiguation, as we can not determine | |
198 # the true source if we have more than one to choose from | |
199 for d in duplicates: | |
200 del self.lookup_table[d] | |
201 | |
164 | 202 |
165 class Addr2Line(object): | 203 class Addr2Line(object): |
166 """A python wrapper around an addr2line instance. | 204 """A python wrapper around an addr2line instance. |
167 | 205 |
168 The communication with the addr2line process looks as follows: | 206 The communication with the addr2line process looks as follows: |
169 [STDIN] [STDOUT] (from addr2line's viewpoint) | 207 [STDIN] [STDOUT] (from addr2line's viewpoint) |
170 > f001111 | 208 > f001111 |
171 > f002222 | 209 > f002222 |
172 < Symbol::Name(foo, bar) for f001111 | 210 < Symbol::Name(foo, bar) for f001111 |
173 < /path/to/source/file.c:line_number | 211 < /path/to/source/file.c:line_number |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
305 source_line = None | 343 source_line = None |
306 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2) | 344 m = ELFSymbolizer.Addr2Line.SYM_ADDR_RE.match(line2) |
307 if m: | 345 if m: |
308 if not m.group(1).startswith('?'): | 346 if not m.group(1).startswith('?'): |
309 source_path = m.group(1) | 347 source_path = m.group(1) |
310 if not m.group(2).startswith('?'): | 348 if not m.group(2).startswith('?'): |
311 source_line = int(m.group(2)) | 349 source_line = int(m.group(2)) |
312 else: | 350 else: |
313 logging.warning('Got invalid symbol path from addr2line: %s' % line2) | 351 logging.warning('Got invalid symbol path from addr2line: %s' % line2) |
314 | 352 |
315 sym_info = ELFSymbolInfo(name, source_path, source_line) | 353 # In case disambiguation is on, and needed |
354 disambiguated = False | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:11
I think that Andrew suggested that the two bools a
| |
355 failed_disambiguation = False | |
356 if self._symbolizer.disambiguate: | |
357 if source_path and not source_path.startswith('/'): | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
I think this line is completely unnecessary.
Just
| |
358 source_path = self._symbolizer.lookup_table.get(source_path) | |
359 failed_disambiguation = source_path is None | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
I'm not sure that you need at all to keep failed_d
| |
360 disambiguated = not failed_disambiguation | |
361 | |
362 if source_path is not None: | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
nit: if source_path:
| |
363 # Strip the base path | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:10
I think that lines 363-368 can be expressed with
| |
364 strip = self._symbolizer.strip_base_path | |
365 if strip is not None and source_path.startswith(strip): | |
366 source_path = source_path[len(strip):] # Remove strip | |
367 source_path = os.path.join(self._symbolizer.source_root_path, | |
368 source_path) | |
369 # Use the absolute path | |
370 source_path = os.path.abspath(source_path) | |
Primiano Tucci (use gerrit)
2014/06/18 15:11:11
Good catch but move to line 120 in __init__ where
| |
371 | |
372 sym_info = ELFSymbolInfo(name, source_path, source_line, disambiguated, | |
373 failed_disambiguation) | |
316 if prev_sym_info: | 374 if prev_sym_info: |
317 prev_sym_info.inlined_by = sym_info | 375 prev_sym_info.inlined_by = sym_info |
318 if not innermost_sym_info: | 376 if not innermost_sym_info: |
319 innermost_sym_info = sym_info | 377 innermost_sym_info = sym_info |
320 | 378 |
321 self._processed_symbols_count += 1 | 379 self._processed_symbols_count += 1 |
322 self._symbolizer.callback(innermost_sym_info, callback_arg) | 380 self._symbolizer.callback(innermost_sym_info, callback_arg) |
323 | 381 |
324 def _RestartAddr2LineProcess(self): | 382 def _RestartAddr2LineProcess(self): |
325 if self._proc: | 383 if self._proc: |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
386 | 444 |
387 @property | 445 @property |
388 def first_request_id(self): | 446 def first_request_id(self): |
389 """Returns the request_id of the oldest pending request in the queue.""" | 447 """Returns the request_id of the oldest pending request in the queue.""" |
390 return self._request_queue[0][2] if self._request_queue else 0 | 448 return self._request_queue[0][2] if self._request_queue else 0 |
391 | 449 |
392 | 450 |
393 class ELFSymbolInfo(object): | 451 class ELFSymbolInfo(object): |
394 """The result of the symbolization passed as first arg. of each callback.""" | 452 """The result of the symbolization passed as first arg. of each callback.""" |
395 | 453 |
396 def __init__(self, name, source_path, source_line): | 454 def __init__(self, name, source_path, source_line, disambiguated=False, |
455 failed_disambiguation=False): | |
397 """All the fields here can be None (if addr2line replies with '??').""" | 456 """All the fields here can be None (if addr2line replies with '??').""" |
398 self.name = name | 457 self.name = name |
399 self.source_path = source_path | 458 self.source_path = source_path |
400 self.source_line = source_line | 459 self.source_line = source_line |
401 # In the case of |inlines|=True, the |inlined_by| points to the outer | 460 # In the case of |inlines|=True, the |inlined_by| points to the outer |
402 # function inlining the current one (and so on, to form a chain). | 461 # function inlining the current one (and so on, to form a chain). |
403 self.inlined_by = None | 462 self.inlined_by = None |
463 self.disambiguated = disambiguated | |
464 self.failed_disambiguation = failed_disambiguation | |
404 | 465 |
405 def __str__(self): | 466 def __str__(self): |
406 return '%s [%s:%d]' % ( | 467 return '%s [%s:%d]' % ( |
407 self.name or '??', self.source_path or '??', self.source_line or 0) | 468 self.name or '??', self.source_path or '??', self.source_line or 0) |
OLD | NEW |