Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(14)

Unified Diff: tools/binary_size/run_binary_size_analysis.py

Issue 258633003: Graphical version of the run_binary_size_analysis tool. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Using the python addr2line wrapper. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/binary_size/run_binary_size_analysis.py
diff --git a/tools/binary_size/run_binary_size_analysis.py b/tools/binary_size/run_binary_size_analysis.py
index 2a75faf0ac585289abb15587a269a27fc8937835..26be022acd956e6178be824fa33f625dd4648fbc 100755
--- a/tools/binary_size/run_binary_size_analysis.py
+++ b/tools/binary_size/run_binary_size_analysis.py
@@ -13,6 +13,7 @@ you desire.
import collections
import fileinput
import json
+import multiprocessing
import optparse
import os
import pprint
@@ -21,6 +22,19 @@ import shutil
import subprocess
import sys
import tempfile
+import time
+
+import binary_size_utils
+
+elf_symbolizer_path = os.path.abspath(os.path.join(
+ os.path.dirname(__file__),
+ '..',
+ '..',
+ 'build',
+ 'android',
+ 'pylib'))
+sys.path.append(elf_symbolizer_path)
+import symbols.elf_symbolizer as elf_symbolizer
# TODO(andrewhayden): Only used for legacy reports. Delete.
@@ -46,66 +60,31 @@ def SymbolTypeToHuman(type):
'v': 'weak symbol'}[type]
-def ParseNm(input):
- """Parse nm output.
-
- Argument: an iterable over lines of nm output.
-
- Yields: (symbol name, symbol type, symbol size, source file path).
- Path may be None if nm couldn't figure out the source file.
- """
-
- # Match lines with size, symbol, optional location, optional discriminator
- sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits)
- '([0-9a-f]{8}) ' # size (8 hex digits)
- '(.) ' # symbol type, one character
- '([^\t]+)' # symbol name, separated from next by tab
- '(?:\t(.*):[\d\?]+)?.*$') # location
- # Match lines with addr but no size.
- addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')
- # Match lines that don't have an address at all -- typically external symbols.
- noaddr_re = re.compile(r'^ {8} (.) (.*)$')
-
- for line in input:
- line = line.rstrip()
- match = sym_re.match(line)
- if match:
- size, type, sym = match.groups()[0:3]
- size = int(size, 16)
- if type.lower() == 'b':
- continue # skip all BSS for now
- path = match.group(4)
- yield sym, type, size, path
- continue
- match = addr_re.match(line)
- if match:
- type, sym = match.groups()[0:2]
- # No size == we don't care.
- continue
- match = noaddr_re.match(line)
- if match:
- type, sym = match.groups()
- if type in ('U', 'w'):
- # external or weak symbol
- continue
-
- print >>sys.stderr, 'unparsed:', repr(line)
-
-
def _MkChild(node, name):
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 _MkChild looks a "long"cut for return node.setdefa
Daniel Bratell 2014/05/21 08:42:13 Hmm, yes. Border case. Using setdefault forces you
- child = None
- for test in node['children']:
- if test['n'] == name:
- child = test
- break
- if not child:
- child = {'n': name, 'children': []}
- node['children'].append(child)
+ child = node['children'].get(name)
+ if child is None:
+ child = {'n': name, 'children': {}}
+ node['children'][name] = child
return child
+def MakeChildrenDictsIntoLists(node):
+ largest_list_len = 0
+ if 'children' in node:
+ largest_list_len = len(node['children'])
+ child_list = []
+ for child in node['children'].itervalues():
+ child_largest_list_len = MakeChildrenDictsIntoLists(child)
+ if child_largest_list_len > largest_list_len:
+ largest_list_len = child_largest_list_len
+ child_list.append(child)
+ node['children'] = child_list
+
+ return largest_list_len
+
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 Nit: add extra newline
def MakeCompactTree(symbols):
- result = {'n': '/', 'children': [], 'k': 'p', 'maxDepth': 0}
+ result = {'n': '/', 'children': {}, 'k': 'p', 'maxDepth': 0}
+ seen_symbol_with_path = False
for symbol_name, symbol_type, symbol_size, file_path in symbols:
if 'vtable for ' in symbol_name:
@@ -113,6 +92,7 @@ def MakeCompactTree(symbols):
# Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']
if file_path:
file_path = os.path.normpath(file_path)
+ seen_symbol_with_path = True
else:
file_path = '(No Path)'
@@ -129,18 +109,23 @@ def MakeCompactTree(symbols):
continue
depth += 1
node = _MkChild(node, path_part);
+ assert not 'k' in node or node['k'] == 'p'
node['k'] = 'p' # p for path
# 'node' is now the file node. Find the symbol-type bucket.
node['lastPathElement'] = True
node = _MkChild(node, symbol_type)
+ assert not 'k' in node or node['k'] == 'b'
node['t'] = symbol_type
node['k'] = 'b' # b for bucket
depth += 1
# 'node' is now the symbol-type bucket. Make the child entry.
node = _MkChild(node, symbol_name)
- if 'children' in node: # Only possible if we're adding duplicate entries!!!
+ if 'children' in node:
+ if node['children']:
+ print('A container node used as symbol for %s.' % symbol_name)
+ # This is going to be used as a leaf so no use for child list.
del node['children']
node['value'] = symbol_size
node['t'] = symbol_type
@@ -148,6 +133,14 @@ def MakeCompactTree(symbols):
depth += 1
result['maxDepth'] = max(result['maxDepth'], depth);
+ if not seen_symbol_with_path:
+ print('Symbols lack paths. Data will not be structured.')
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 shouldn't all these prints be logging.warning/erro
+
+ largest_list_len = MakeChildrenDictsIntoLists(result)
+
+ if largest_list_len > 1000:
+ print('There are sections with %d nodes. Results might be unusable.' %
+ largest_list_len)
return result
@@ -285,12 +278,11 @@ def JsonifyTree(tree, name):
'children': children }
def DumpCompactTree(symbols, outfile):
- out = open(outfile, 'w')
- try:
- out.write('var tree_data = ' + json.dumps(MakeCompactTree(symbols)))
- finally:
- out.flush()
- out.close()
+ tree_root = MakeCompactTree(symbols)
+ json_string = json.dumps(tree_root)
+ print('Writing %d bytes json' % len(json_string))
+ with open(outfile, 'w') as out:
+ out.write('var tree_data = ' + json_string)
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 I've no idea how big this string is going to be. I
Daniel Bratell 2014/05/21 08:42:13 It can be big so this is a nice suggestion. Won't
# TODO(andrewhayden): Only used for legacy reports. Delete.
@@ -397,61 +389,156 @@ def DumpLargestVTables(symbols, outfile, n):
out.close()
-# TODO(andrewhayden): Switch to Primiano's python-based version.
-def RunParallelAddress2Line(outfile, library, arch, jobs, verbose):
- """Run a parallel addr2line processing engine to dump and resolve symbols."""
- out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out')
- build_type = os.getenv('BUILDTYPE', 'Release')
- classpath = os.path.join(out_dir, build_type, 'lib.java',
- 'binary_size_java.jar')
- cmd = ['java',
- '-classpath', classpath,
- 'org.chromium.tools.binary_size.ParallelAddress2Line',
- '--disambiguate',
- '--outfile', outfile,
- '--library', library,
- '--threads', jobs]
- if verbose is True:
- cmd.append('--verbose')
- prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains')
- if arch == 'android-arm':
- prefix = os.path.join(prefix, 'arm-linux-androideabi-4.8', 'prebuilt',
- 'linux-x86_64', 'bin', 'arm-linux-androideabi-')
- cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
- elif arch == 'android-mips':
- prefix = os.path.join(prefix, 'mipsel-linux-android-4.8', 'prebuilt',
- 'linux-x86_64', 'bin', 'mipsel-linux-android-')
- cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
- elif arch == 'android-x86':
- prefix = os.path.join(prefix, 'x86-4.8', 'prebuilt',
- 'linux-x86_64', 'bin', 'i686-linux-android-')
- cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])
- # else, use whatever is in PATH (don't pass --nm or --addr2line)
-
- if verbose:
- print cmd
-
- return_code = subprocess.call(cmd)
- if return_code:
- raise RuntimeError('Failed to run ParallelAddress2Line: returned ' +
- str(return_code))
-
-
-def GetNmSymbols(infile, outfile, library, arch, jobs, verbose):
- if infile is None:
- if outfile is None:
- infile = tempfile.NamedTemporaryFile(delete=False).name
+# Regex for parsing "nm" output. A sample line looks like this:
+# 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
+#
+# The fields are: address, size, type, name, source location
+# Regular expression explained ( see also: https://xkcd.com/208 ):
+# ([0-9a-f]{8,}+) The address
+# [\s]+ Whitespace separator
+# ([0-9a-f]{8,}+) The size. From here on out it's all optional.
+# [\s]+ Whitespace separator
+# (\S?) The symbol type, which is any non-whitespace char
+# [\s*] Whitespace separator
+# ([^\t]*) Symbol name, any non-tab character (spaces ok!)
+# [\t]? Tab separator
+# (.*) The location (filename[:linennum|?][ (discriminator n)]
+sNmPattern = re.compile(
+ r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
+
+class Progress():
+ def __init__(self):
+ self.count = 0
+ self.skip_count = 0
+ self.collisions = 0
+ self.time_last_output = time.time()
+ self.count_last_output = 0
+
+
+def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary):
+ nm_output = run_nm(library, nm_binary)
+ nm_output_lines = nm_output.splitlines()
+ nm_output_lines_len = len(nm_output_lines)
+ address_symbol = {}
+ progress = Progress()
+ def map_address_symbol(symbol, addr):
+ progress.count += 1
+ if addr in address_symbol:
+ # 'Collision between %s and %s.' % (str(symbol.name),
+ # str(address_symbol[addr].name))
+ progress.collisions += 1
else:
- infile = outfile
+ address_symbol[addr] = symbol
+
+ progress_chunk = 100
+ if progress.count % progress_chunk == 0:
+ time_now = time.time()
+ time_spent = time_now - progress.time_last_output
+ if time_spent > 1.0:
+ # Only output at most once per second.
+ progress.time_last_output = time_now
+ chunk_size = progress.count - progress.count_last_output
+ progress.count_last_output = progress.count
+ if time_spent > 0:
+ speed = chunk_size / time_spent
+ else:
+ speed = 0
+ progress_percent = (100.0 * (progress.count + progress.skip_count) /
+ nm_output_lines_len)
+ print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' %
+ (progress_percent, progress.count, progress.collisions, speed))
+
+ symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
+ map_address_symbol,
+ max_concurrent_jobs=2)
+ for line in nm_output_lines:
+ match = sNmPattern.match(line)
+ if match:
+ location = match.group(5)
+ if not location:
+ addr = int(match.group(1), 16)
+ size = int(match.group(2), 16)
+ if addr in address_symbol: # Already looked up, shortcut ELFSymbolizer.
+ map_address_symbol(address_symbol[addr], addr)
+ continue
+ elif size == 0:
+ # Save time by not looking up empty symbols (do they even exist?)
+ print('Empty symbol: ' + line)
+ else:
+ symbolizer.SymbolizeAsync(addr, addr)
+ continue
+
+ progress.skip_count += 1
+
+ symbolizer.Join()
+
+ with open(outfile, 'w') as out:
+ for line in nm_output_lines:
+ match = sNmPattern.match(line)
+ if match:
+ location = match.group(5)
+ if not location:
+ addr = int(match.group(1), 16)
+ symbol = address_symbol[addr]
+ path = '??'
+ if symbol.source_path is not None:
+ path = symbol.source_path
+ line_number = 0
+ if symbol.source_line is not None:
+ line_number = symbol.source_line
+ out.write('%s\t%s:%d\n' % (line, path, line_number))
+ continue
+
+ out.write('%s\n' % line)
+
+ print('%d symbols in the results.' % len(address_symbol))
+
+
+def run_nm(binary, nm_binary):
Primiano Tucci (use gerrit) 2014/05/20 15:22:57 nit: RunNm
+ print('Starting nm')
+ cmd = [nm_binary, '-C', '--print-size', binary]
+ nm_process = subprocess.Popen(cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (process_output, err_output) = nm_process.communicate()
+
+ if nm_process.returncode != 0:
+ if err_output:
+ raise Exception, err_output
+ else:
+ raise Exception, process_output
+
+ print('Finished nm')
+ return process_output
+
+
+def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
+ addr2line_binary, nm_binary):
+ if nm_infile is None:
+ if outfile is None:
+ outfile = tempfile.NamedTemporaryFile(delete=False).name
if verbose:
- print 'Running parallel addr2line, dumping symbols to ' + infile;
- RunParallelAddress2Line(outfile=infile, library=library, arch=arch,
- jobs=jobs, verbose=verbose)
+ print 'Running parallel addr2line, dumping symbols to ' + outfile;
+ RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary)
+
+ nm_infile = outfile
+
elif verbose:
- print 'Using nm input from ' + infile
- with file(infile, 'r') as infile:
- return list(ParseNm(infile))
+ print 'Using nm input from ' + nm_infile
+ with file(nm_infile, 'r') as infile:
+ return list(binary_size_utils.ParseNm(infile))
+
+
+def _find_in_system_path(binary):
+ """Locate the full path to binary in the system path or return None
+ if not found."""
+ system_path = os.environ["PATH"].split(os.pathsep)
+ for path in system_path:
+ binary_path = os.path.join(path, binary)
+ if os.path.isfile(binary_path):
+ return binary_path
+ return None
def main():
@@ -486,17 +573,15 @@ def main():
parser.add_option('--library', metavar='PATH',
help='if specified, process symbols in the library at '
'the specified path. Mutually exclusive with --nm-in.')
- parser.add_option('--arch',
- help='the architecture that the library is targeted to. '
- 'Determines which nm/addr2line binaries are used. When '
- '\'host-native\' is chosen, the program will use whichever '
- 'nm/addr2line binaries are on the PATH. This is '
- 'appropriate when you are analyzing a binary by and for '
- 'your computer. '
- 'This argument is only valid when using --library. '
- 'Default is \'host-native\'.',
- choices=['host-native', 'android-arm',
- 'android-mips', 'android-x86'],)
+ parser.add_option('--nm-binary',
+ help='use the specified nm binary to analyze library. '
+ 'This is to be used when the nm in the path is not for '
+ 'the right architecture or of the right version.')
+ parser.add_option('--addr2line-binary',
+ help='use the specified addr2line binary to analyze '
+ 'library. This is to be used when the addr2line in '
+ 'the path is not for the right architecture or '
+ 'of the right version.')
parser.add_option('--jobs',
help='number of jobs to use for the parallel '
'addr2line processing pool; defaults to 1. More '
@@ -524,18 +609,33 @@ def main():
if opts.jobs:
print >> sys.stderr, ('WARNING: --jobs has no effect '
'when used with --nm-in')
- if opts.arch:
- print >> sys.stderr, ('WARNING: --arch has no effect '
- 'when used with --nm-in')
if not opts.destdir:
parser.error('--destdir is required argument')
if not opts.jobs:
- opts.jobs = '1'
- if not opts.arch:
- opts.arch = 'host-native'
+ opts.jobs = str(multiprocessing.cpu_count())
+
+ if opts.addr2line_binary:
+ assert os.path.isfile(opts.addr2line_binary)
+ addr2line_binary = opts.addr2line_binary
+ else:
+ addr2line_binary = _find_in_system_path('addr2line')
+ assert addr2line_binary, 'Unable to find addr2line in the path. '\
+ 'Use --addr2line-binary to specify location.'
+
+ if opts.nm_binary:
+ assert os.path.isfile(opts.nm_binary)
+ nm_binary = opts.nm_binary
+ else:
+ nm_binary = _find_in_system_path('nm')
+ assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
+ 'to specify location.'
+
+ print('nm: %s' % nm_binary)
+ print('addr2line: %s' % addr2line_binary)
- symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch,
- opts.jobs, opts.verbose is True)
+ symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
+ opts.jobs, opts.verbose is True,
+ addr2line_binary, nm_binary)
if not os.path.exists(opts.destdir):
os.makedirs(opts.destdir, 0755)
@@ -562,11 +662,15 @@ def main():
d3_out = os.path.join(opts.destdir, 'd3')
if not os.path.exists(d3_out):
os.makedirs(d3_out, 0755)
- d3_src = os.path.join('third_party', 'd3', 'src')
- template_src = os.path.join('tools', 'binary_size',
+ d3_src = os.path.join(os.path.dirname(__file__),
+ '..',
+ '..',
+ 'third_party', 'd3', 'src')
+ template_src = os.path.join(os.path.dirname(__file__),
'template')
shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)
shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)
+ print('Copying index.html')
shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)
shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)

Powered by Google App Engine
This is Rietveld 408576698