Index: build/android/pylib/symbols/elf_symbolizer.py |
diff --git a/build/android/pylib/symbols/elf_symbolizer.py b/build/android/pylib/symbols/elf_symbolizer.py |
index b294654f20b1ee3c7f0cdb5c142789f3bc1b5933..cc0d3304243515dc1949533d3e99010d1af08699 100644 |
--- a/build/android/pylib/symbols/elf_symbolizer.py |
+++ b/build/android/pylib/symbols/elf_symbolizer.py |
@@ -13,7 +13,7 @@ import re |
import subprocess |
import sys |
import threading |
- |
+from sets import Set |
Primiano Tucci (use gerrit)
2014/06/18 09:49:11
just use the builtin set. We don't target python <
|
# addr2line builds a possibly infinite memory cache that can exhaust |
# the computer's memory if allowed to grow for too long. This constant |
@@ -75,7 +75,8 @@ class ELFSymbolizer(object): |
""" |
def __init__(self, elf_file_path, addr2line_path, callback, inlines=False, |
- max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50): |
+ max_concurrent_jobs=None, addr2line_timeout=30, max_queue_size=50, |
+ disambiguate=False, disambiguation_source_path=''): |
Primiano Tucci (use gerrit)
2014/06/18 10:48:53
I had a quick chat with Andrew and we feel the bes
|
"""Args: |
elf_file_path: path of the elf file to be symbolized. |
addr2line_path: path of the toolchain's addr2line binary. |
@@ -91,6 +92,16 @@ class ELFSymbolizer(object): |
max_queue_size: Max number of outstanding requests per addr2line instance. |
addr2line_timeout: Max time (in seconds) to wait for a addr2line response. |
After the timeout, the instance will be considered hung and respawned. |
+ disambiguate: Whether to run a disambiguation process or not. |
Primiano Tucci (use gerrit)
2014/06/18 09:49:11
Can we use just one variable (call it source_root_
|
+ Disambiguation means to resolve ambiguous source_paths, for |
+ example turn addr2line output "unicode.cc" into a full and absolute |
+ path. In some toolchains only the name of the source file is output, |
Primiano Tucci (use gerrit)
2014/06/18 09:49:11
I'd love to know more about this btw. In which cas
|
+ without any path information; disambiguation searches through the |
+ source directory specified by 'disambiguate_source_path' argument |
+ for files whose name matches. If there are multiple files with the |
+ same name, disambiguation will fail. |
+ disambiguate_source_path: The path to the directory where the source |
+ files are located, used for disambiguating paths. |
""" |
assert(os.path.isfile(addr2line_path)), 'Cannot find ' + addr2line_path |
self.elf_file_path = elf_file_path |
@@ -104,6 +115,13 @@ class ELFSymbolizer(object): |
self.requests_counter = 0 # For generating monotonic request IDs. |
self._a2l_instances = [] # Up to |max_concurrent_jobs| _Addr2Line inst. |
+ # If necessary, create disambiguation lookup table |
+ self.disambiguate = disambiguate |
+ self.commonprefix = '' |
+ self.lookup_table = {} |
+ if(self.disambiguate): |
+ self._CreateDisambiguationTable(disambiguation_source_path) |
+ |
# Create one addr2line instance. More instances will be created on demand |
# (up to |max_concurrent_jobs|) depending on the rate of the requests. |
self._CreateNewA2LInstance() |
@@ -161,6 +179,38 @@ class ELFSymbolizer(object): |
self._a2l_instances.append(a2l) |
return a2l |
+ def _CreateDisambiguationTable(self, src_root_path): |
Primiano Tucci (use gerrit)
2014/06/18 09:49:11
I don't really like this approach. This is trying
Primiano Tucci (use gerrit)
2014/06/18 10:48:53
After talking with Andrew, given your use case, I'
|
+ """ Creates a table of files used for disambiguation later |
+ Disambiguation: |
Primiano Tucci (use gerrit)
2014/06/18 09:49:11
This comment is redundant. You already explained t
|
+ addr2line sometimes return an ambigous file-name rather than the |
+ full path of the file where the symbol is located. |
+ |
+ adopted from andrewhaydens implementation in earlier commits """ |
+ interesting_file_endings = { ".c", ".cc", ".h", ".cp", ".cpp", ".cxx", |
+ ".c++", ".asm", ".inc", ".s", ".hxx" } |
+ duplicates = Set() |
+ self.lookup_table = {} |
+ src_root_path = os.path.abspath(src_root_path) |
+ |
+ for root, _, filenames in os.walk(src_root_path): |
+ for f in filenames: |
+ _, ext = os.path.splitext(f) |
+ if not ext in interesting_file_endings: |
+ continue |
+ |
+ base = os.path.basename(f) # Just in case |
+ if self.lookup_table.get(base) is None: |
+ self.lookup_table[base] = "%s/%s" % (root, f) |
+ else: |
+ duplicates.add(base) |
+ |
+ # Duplicates can not be used for disambiguation, as we can not determine |
+ # the true source if we have more than one to choose from |
+ for d in duplicates: |
+ del self.lookup_table[d] |
+ |
+ # Get the common prefix for the source paths |
+ self.commonprefix = os.path.commonprefix(self.lookup_table.values()) |
class Addr2Line(object): |
"""A python wrapper around an addr2line instance. |
@@ -312,7 +362,21 @@ class ELFSymbolizer(object): |
else: |
logging.warning('Got invalid symbol path from addr2line: %s' % line2) |
- sym_info = ELFSymbolInfo(name, source_path, source_line) |
+ # In case disambiguation is on, and needed |
+ disambiguated = False |
+ failed_disambiguation = False |
+ if self._symbolizer.disambiguate: |
+ if not source_path is None and not source_path.startswith('/'): |
Andrew Hayden (chromium.org)
2014/06/18 09:10:59
Again, let's avoid double-negation:
if source_path
|
+ source_path = self._symbolizer.lookup_table.get(source_path) |
+ failed_disambiguation = source_path is None |
+ disambiguated = not failed_disambiguation |
Andrew Hayden (chromium.org)
2014/06/18 09:10:59
As written, you don't need two booleans. They're j
|
+ |
+ # Use the absolute path |
+ if not source_path is None: |
Andrew Hayden (chromium.org)
2014/06/18 09:10:59
Again let's avoid double negation:
if source_path
|
+ source_path = os.path.abspath(source_path) |
Primiano Tucci (use gerrit)
2014/06/18 09:49:11
What is this for?
Looks like that even if you're n
Primiano Tucci (use gerrit)
2014/06/18 10:48:53
At this point you also check if source_path.starts
|
+ |
+ sym_info = ELFSymbolInfo(name, source_path, source_line, disambiguated, |
+ failed_disambiguation) |
if prev_sym_info: |
prev_sym_info.inlined_by = sym_info |
if not innermost_sym_info: |
@@ -393,7 +457,8 @@ class ELFSymbolizer(object): |
class ELFSymbolInfo(object): |
"""The result of the symbolization passed as first arg. of each callback.""" |
- def __init__(self, name, source_path, source_line): |
+ def __init__(self, name, source_path, source_line, disambiguated=False, |
Primiano Tucci (use gerrit)
2014/06/18 09:49:11
Do you need to pass these booleans at all? Can't y
|
+ failed_disambiguation=False): |
"""All the fields here can be None (if addr2line replies with '??').""" |
self.name = name |
self.source_path = source_path |
@@ -401,6 +466,8 @@ class ELFSymbolInfo(object): |
# In the case of |inlines|=True, the |inlined_by| points to the outer |
# function inlining the current one (and so on, to form a chain). |
self.inlined_by = None |
+ self.disambiguated = disambiguated |
+ self.failed_disambiguation = failed_disambiguation |
def __str__(self): |
return '%s [%s:%d]' % ( |