Index: tools/binary_size/run_binary_size_analysis.py |
diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py |
index 2a75faf0ac585289abb15587a269a27fc8937835..26be022acd956e6178be824fa33f625dd4648fbc 100755 |
--- a/tools/binary_size/run_binary_size_analysis.py |
+++ b/tools/binary_size/run_binary_size_analysis.py |
@@ -13,6 +13,7 @@ you desire. |
import collections |
import fileinput |
import json |
+import multiprocessing |
import optparse |
import os |
import pprint |
@@ -21,6 +22,19 @@ import shutil |
import subprocess |
import sys |
import tempfile |
+import time |
+ |
+import binary_size_utils |
+ |
+elf_symbolizer_path = os.path.abspath(os.path.join( |
+ os.path.dirname(__file__), |
+ '..', |
+ '..', |
+ 'build', |
+ 'android', |
+ 'pylib')) |
+sys.path.append(elf_symbolizer_path) |
+import symbols.elf_symbolizer as elf_symbolizer |
# TODO(andrewhayden): Only used for legacy reports. Delete. |
@@ -46,66 +60,31 @@ def SymbolTypeToHuman(type): |
'v': 'weak symbol'}[type] |
-def ParseNm(input): |
- """Parse nm output. |
- |
- Argument: an iterable over lines of nm output. |
- |
- Yields: (symbol name, symbol type, symbol size, source file path). |
- Path may be None if nm couldn't figure out the source file. |
- """ |
- |
- # Match lines with size, symbol, optional location, optional discriminator |
- sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits) |
- '([0-9a-f]{8}) ' # size (8 hex digits) |
- '(.) ' # symbol type, one character |
- '([^\t]+)' # symbol name, separated from next by tab |
- '(?:\t(.*):[\d\?]+)?.*$') # location |
- # Match lines with addr but no size. |
- addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$') |
- # Match lines that don't have an address at all -- typically external symbols. |
- noaddr_re = re.compile(r'^ {8} (.) (.*)$') |
- |
- for line in input: |
- line = line.rstrip() |
- match = sym_re.match(line) |
- if match: |
- size, type, sym = match.groups()[0:3] |
- size = int(size, 16) |
- if type.lower() == 'b': |
- continue # skip all BSS for now |
- path = match.group(4) |
- yield sym, type, size, path |
- continue |
- match = addr_re.match(line) |
- if match: |
- type, sym = match.groups()[0:2] |
- # No size == we don't care. |
- continue |
- match = noaddr_re.match(line) |
- if match: |
- type, sym = match.groups() |
- if type in ('U', 'w'): |
- # external or weak symbol |
- continue |
- |
- print >>sys.stderr, 'unparsed:', repr(line) |
- |
- |
def _MkChild(node, name): |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
_MkChild looks a "long"cut for
return node.setdefa
Daniel Bratell
2014/05/21 08:42:13
Hmm, yes. Border case. Using setdefault forces you
|
- child = None |
- for test in node['children']: |
- if test['n'] == name: |
- child = test |
- break |
- if not child: |
- child = {'n': name, 'children': []} |
- node['children'].append(child) |
+ child = node['children'].get(name) |
+ if child is None: |
+ child = {'n': name, 'children': {}} |
+ node['children'][name] = child |
return child |
+def MakeChildrenDictsIntoLists(node): |
+ largest_list_len = 0 |
+ if 'children' in node: |
+ largest_list_len = len(node['children']) |
+ child_list = [] |
+ for child in node['children'].itervalues(): |
+ child_largest_list_len = MakeChildrenDictsIntoLists(child) |
+ if child_largest_list_len > largest_list_len: |
+ largest_list_len = child_largest_list_len |
+ child_list.append(child) |
+ node['children'] = child_list |
+ |
+ return largest_list_len |
+ |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
Nit: add extra newline
|
def MakeCompactTree(symbols): |
- result = {'n': '/', 'children': [], 'k': 'p', 'maxDepth': 0} |
+ result = {'n': '/', 'children': {}, 'k': 'p', 'maxDepth': 0} |
+ seen_symbol_with_path = False |
for symbol_name, symbol_type, symbol_size, file_path in symbols: |
if 'vtable for ' in symbol_name: |
@@ -113,6 +92,7 @@ def MakeCompactTree(symbols): |
# Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz'] |
if file_path: |
file_path = os.path.normpath(file_path) |
+ seen_symbol_with_path = True |
else: |
file_path = '(No Path)' |
@@ -129,18 +109,23 @@ def MakeCompactTree(symbols): |
continue |
depth += 1 |
node = _MkChild(node, path_part); |
+ assert not 'k' in node or node['k'] == 'p' |
node['k'] = 'p' # p for path |
# 'node' is now the file node. Find the symbol-type bucket. |
node['lastPathElement'] = True |
node = _MkChild(node, symbol_type) |
+ assert not 'k' in node or node['k'] == 'b' |
node['t'] = symbol_type |
node['k'] = 'b' # b for bucket |
depth += 1 |
# 'node' is now the symbol-type bucket. Make the child entry. |
node = _MkChild(node, symbol_name) |
- if 'children' in node: # Only possible if we're adding duplicate entries!!! |
+ if 'children' in node: |
+ if node['children']: |
+ print('A container node used as symbol for %s.' % symbol_name) |
+ # This is going to be used as a leaf so no use for child list. |
del node['children'] |
node['value'] = symbol_size |
node['t'] = symbol_type |
@@ -148,6 +133,14 @@ def MakeCompactTree(symbols): |
depth += 1 |
result['maxDepth'] = max(result['maxDepth'], depth); |
+ if not seen_symbol_with_path: |
+ print('Symbols lack paths. Data will not be structured.') |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
shouldn't all these prints be logging.warning/erro
|
+ |
+ largest_list_len = MakeChildrenDictsIntoLists(result) |
+ |
+ if largest_list_len > 1000: |
+ print('There are sections with %d nodes. Results might be unusable.' % |
+ largest_list_len) |
return result |
@@ -285,12 +278,11 @@ def JsonifyTree(tree, name): |
'children': children } |
def DumpCompactTree(symbols, outfile): |
- out = open(outfile, 'w') |
- try: |
- out.write('var tree_data = ' + json.dumps(MakeCompactTree(symbols))) |
- finally: |
- out.flush() |
- out.close() |
+ tree_root = MakeCompactTree(symbols) |
+ json_string = json.dumps(tree_root) |
+ print('Writing %d bytes json' % len(json_string)) |
+ with open(outfile, 'w') as out: |
+ out.write('var tree_data = ' + json_string) |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
I've no idea how big this string is going to be.
I
Daniel Bratell
2014/05/21 08:42:13
It can be big so this is a nice suggestion. Won't
|
# TODO(andrewhayden): Only used for legacy reports. Delete. |
@@ -397,61 +389,156 @@ def DumpLargestVTables(symbols, outfile, n): |
out.close() |
-# TODO(andrewhayden): Switch to Primiano's python-based version. |
-def RunParallelAddress2Line(outfile, library, arch, jobs, verbose): |
- """Run a parallel addr2line processing engine to dump and resolve symbols.""" |
- out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out') |
- build_type = os.getenv('BUILDTYPE', 'Release') |
- classpath = os.path.join(out_dir, build_type, 'lib.java', |
- 'binary_size_java.jar') |
- cmd = ['java', |
- '-classpath', classpath, |
- 'org.chromium.tools.binary_size.ParallelAddress2Line', |
- '--disambiguate', |
- '--outfile', outfile, |
- '--library', library, |
- '--threads', jobs] |
- if verbose is True: |
- cmd.append('--verbose') |
- prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains') |
- if arch == 'android-arm': |
- prefix = os.path.join(prefix, 'arm-linux-androideabi-4.8', 'prebuilt', |
- 'linux-x86_64', 'bin', 'arm-linux-androideabi-') |
- cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
- elif arch == 'android-mips': |
- prefix = os.path.join(prefix, 'mipsel-linux-android-4.8', 'prebuilt', |
- 'linux-x86_64', 'bin', 'mipsel-linux-android-') |
- cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
- elif arch == 'android-x86': |
- prefix = os.path.join(prefix, 'x86-4.8', 'prebuilt', |
- 'linux-x86_64', 'bin', 'i686-linux-android-') |
- cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line']) |
- # else, use whatever is in PATH (don't pass --nm or --addr2line) |
- |
- if verbose: |
- print cmd |
- |
- return_code = subprocess.call(cmd) |
- if return_code: |
- raise RuntimeError('Failed to run ParallelAddress2Line: returned ' + |
- str(return_code)) |
- |
- |
-def GetNmSymbols(infile, outfile, library, arch, jobs, verbose): |
- if infile is None: |
- if outfile is None: |
- infile = tempfile.NamedTemporaryFile(delete=False).name |
+# Regex for parsing "nm" output. A sample line looks like this: |
+# 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95 |
+# |
+# The fields are: address, size, type, name, source location |
+# Regular expression explained ( see also: https://xkcd.com/208 ): |
+# ([0-9a-f]{8,}+) The address |
+# [\s]+ Whitespace separator |
+# ([0-9a-f]{8,}+) The size. From here on out it's all optional. |
+# [\s]+ Whitespace separator |
+# (\S?) The symbol type, which is any non-whitespace char |
+# [\s*] Whitespace separator |
+# ([^\t]*) Symbol name, any non-tab character (spaces ok!) |
+# [\t]? Tab separator |
+# (.*) The location (filename[:linennum|?][ (discriminator n)] |
+sNmPattern = re.compile( |
+ r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)') |
+ |
+class Progress(): |
+ def __init__(self): |
+ self.count = 0 |
+ self.skip_count = 0 |
+ self.collisions = 0 |
+ self.time_last_output = time.time() |
+ self.count_last_output = 0 |
+ |
+ |
+def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary): |
+ nm_output = run_nm(library, nm_binary) |
+ nm_output_lines = nm_output.splitlines() |
+ nm_output_lines_len = len(nm_output_lines) |
+ address_symbol = {} |
+ progress = Progress() |
+ def map_address_symbol(symbol, addr): |
+ progress.count += 1 |
+ if addr in address_symbol: |
+ # 'Collision between %s and %s.' % (str(symbol.name), |
+ # str(address_symbol[addr].name)) |
+ progress.collisions += 1 |
else: |
- infile = outfile |
+ address_symbol[addr] = symbol |
+ |
+ progress_chunk = 100 |
+ if progress.count % progress_chunk == 0: |
+ time_now = time.time() |
+ time_spent = time_now - progress.time_last_output |
+ if time_spent > 1.0: |
+ # Only output at most once per second. |
+ progress.time_last_output = time_now |
+ chunk_size = progress.count - progress.count_last_output |
+ progress.count_last_output = progress.count |
+ if time_spent > 0: |
+ speed = chunk_size / time_spent |
+ else: |
+ speed = 0 |
+ progress_percent = (100.0 * (progress.count + progress.skip_count) / |
+ nm_output_lines_len) |
+ print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' % |
+ (progress_percent, progress.count, progress.collisions, speed)) |
+ |
+ symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary, |
+ map_address_symbol, |
+ max_concurrent_jobs=2) |
+ for line in nm_output_lines: |
+ match = sNmPattern.match(line) |
+ if match: |
+ location = match.group(5) |
+ if not location: |
+ addr = int(match.group(1), 16) |
+ size = int(match.group(2), 16) |
+ if addr in address_symbol: # Already looked up, shortcut ELFSymbolizer. |
+ map_address_symbol(address_symbol[addr], addr) |
+ continue |
+ elif size == 0: |
+ # Save time by not looking up empty symbols (do they even exist?) |
+ print('Empty symbol: ' + line) |
+ else: |
+ symbolizer.SymbolizeAsync(addr, addr) |
+ continue |
+ |
+ progress.skip_count += 1 |
+ |
+ symbolizer.Join() |
+ |
+ with open(outfile, 'w') as out: |
+ for line in nm_output_lines: |
+ match = sNmPattern.match(line) |
+ if match: |
+ location = match.group(5) |
+ if not location: |
+ addr = int(match.group(1), 16) |
+ symbol = address_symbol[addr] |
+ path = '??' |
+ if symbol.source_path is not None: |
+ path = symbol.source_path |
+ line_number = 0 |
+ if symbol.source_line is not None: |
+ line_number = symbol.source_line |
+ out.write('%s\t%s:%d\n' % (line, path, line_number)) |
+ continue |
+ |
+ out.write('%s\n' % line) |
+ |
+ print('%d symbols in the results.' % len(address_symbol)) |
+ |
+ |
+def run_nm(binary, nm_binary): |
Primiano Tucci (use gerrit)
2014/05/20 15:22:57
nit: RunNm
|
+ print('Starting nm') |
+ cmd = [nm_binary, '-C', '--print-size', binary] |
+ nm_process = subprocess.Popen(cmd, |
+ stdout=subprocess.PIPE, |
+ stderr=subprocess.PIPE) |
+ (process_output, err_output) = nm_process.communicate() |
+ |
+ if nm_process.returncode != 0: |
+ if err_output: |
+ raise Exception, err_output |
+ else: |
+ raise Exception, process_output |
+ |
+ print('Finished nm') |
+ return process_output |
+ |
+ |
+def GetNmSymbols(nm_infile, outfile, library, jobs, verbose, |
+ addr2line_binary, nm_binary): |
+ if nm_infile is None: |
+ if outfile is None: |
+ outfile = tempfile.NamedTemporaryFile(delete=False).name |
if verbose: |
- print 'Running parallel addr2line, dumping symbols to ' + infile; |
- RunParallelAddress2Line(outfile=infile, library=library, arch=arch, |
- jobs=jobs, verbose=verbose) |
+ print 'Running parallel addr2line, dumping symbols to ' + outfile; |
+ RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary) |
+ |
+ nm_infile = outfile |
+ |
elif verbose: |
- print 'Using nm input from ' + infile |
- with file(infile, 'r') as infile: |
- return list(ParseNm(infile)) |
+ print 'Using nm input from ' + nm_infile |
+ with file(nm_infile, 'r') as infile: |
+ return list(binary_size_utils.ParseNm(infile)) |
+ |
+ |
+def _find_in_system_path(binary): |
+ """Locate the full path to binary in the system path or return None |
+ if not found.""" |
+ system_path = os.environ["PATH"].split(os.pathsep) |
+ for path in system_path: |
+ binary_path = os.path.join(path, binary) |
+ if os.path.isfile(binary_path): |
+ return binary_path |
+ return None |
def main(): |
@@ -486,17 +573,15 @@ def main(): |
parser.add_option('--library', metavar='PATH', |
help='if specified, process symbols in the library at ' |
'the specified path. Mutually exclusive with --nm-in.') |
- parser.add_option('--arch', |
- help='the architecture that the library is targeted to. ' |
- 'Determines which nm/addr2line binaries are used. When ' |
- '\'host-native\' is chosen, the program will use whichever ' |
- 'nm/addr2line binaries are on the PATH. This is ' |
- 'appropriate when you are analyzing a binary by and for ' |
- 'your computer. ' |
- 'This argument is only valid when using --library. ' |
- 'Default is \'host-native\'.', |
- choices=['host-native', 'android-arm', |
- 'android-mips', 'android-x86'],) |
+ parser.add_option('--nm-binary', |
+ help='use the specified nm binary to analyze library. ' |
+ 'This is to be used when the nm in the path is not for ' |
+ 'the right architecture or of the right version.') |
+ parser.add_option('--addr2line-binary', |
+ help='use the specified addr2line binary to analyze ' |
+ 'library. This is to be used when the addr2line in ' |
+ 'the path is not for the right architecture or ' |
+ 'of the right version.') |
parser.add_option('--jobs', |
help='number of jobs to use for the parallel ' |
'addr2line processing pool; defaults to 1. More ' |
@@ -524,18 +609,33 @@ def main(): |
if opts.jobs: |
print >> sys.stderr, ('WARNING: --jobs has no effect ' |
'when used with --nm-in') |
- if opts.arch: |
- print >> sys.stderr, ('WARNING: --arch has no effect ' |
- 'when used with --nm-in') |
if not opts.destdir: |
parser.error('--destdir is required argument') |
if not opts.jobs: |
- opts.jobs = '1' |
- if not opts.arch: |
- opts.arch = 'host-native' |
+ opts.jobs = str(multiprocessing.cpu_count()) |
+ |
+ if opts.addr2line_binary: |
+ assert os.path.isfile(opts.addr2line_binary) |
+ addr2line_binary = opts.addr2line_binary |
+ else: |
+ addr2line_binary = _find_in_system_path('addr2line') |
+ assert addr2line_binary, 'Unable to find addr2line in the path. '\ |
+ 'Use --addr2line-binary to specify location.' |
+ |
+ if opts.nm_binary: |
+ assert os.path.isfile(opts.nm_binary) |
+ nm_binary = opts.nm_binary |
+ else: |
+ nm_binary = _find_in_system_path('nm') |
+ assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\ |
+ 'to specify location.' |
+ |
+ print('nm: %s' % nm_binary) |
+ print('addr2line: %s' % addr2line_binary) |
- symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch, |
- opts.jobs, opts.verbose is True) |
+ symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, |
+ opts.jobs, opts.verbose is True, |
+ addr2line_binary, nm_binary) |
if not os.path.exists(opts.destdir): |
os.makedirs(opts.destdir, 0755) |
@@ -562,11 +662,15 @@ def main(): |
d3_out = os.path.join(opts.destdir, 'd3') |
if not os.path.exists(d3_out): |
os.makedirs(d3_out, 0755) |
- d3_src = os.path.join('third_party', 'd3', 'src') |
- template_src = os.path.join('tools', 'binary_size', |
+ d3_src = os.path.join(os.path.dirname(__file__), |
+ '..', |
+ '..', |
+ 'third_party', 'd3', 'src') |
+ template_src = os.path.join(os.path.dirname(__file__), |
'template') |
shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out) |
shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out) |
+ print('Copying index.html') |
shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir) |
shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir) |