tools/binary_size/run_binary_size_analysis.py - Issue 258633003: Graphical version of the run_binary_size_analysis tool.

Side by Side Diff: tools/binary_size/run_binary_size_analysis.py

Issue 258633003: Graphical version of the run_binary_size_analysis tool. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Made the code fully pylint clean. Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 #!/usr/bin/python	1 #!/usr/bin/env python

2 # Copyright 2014 The Chromium Authors. All rights reserved.	2 # Copyright 2014 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Generate a spatial analysis against an arbitrary library.	6 """Generate a spatial analysis against an arbitrary library.

7	7

8 To use, build the 'binary_size_tool' target. Then run this tool, passing	8 To use, build the 'binary_size_tool' target. Then run this tool, passing

9 in the location of the library to be analyzed along with any other options	9 in the location of the library to be analyzed along with any other options

10 you desire.	10 you desire.

11 """	11 """

12	12

13 import collections	13 import collections

14 import fileinput

15 import json	14 import json

	15 import logging

	16 import multiprocessing

16 import optparse	17 import optparse

17 import os	18 import os

18 import pprint

19 import re	19 import re

20 import shutil	20 import shutil

21 import subprocess	21 import subprocess

22 import sys	22 import sys

23 import tempfile	23 import tempfile

	24 import time

	25

	26 import binary_size_utils

	27

	28 # This path changee is not beautiful. Temporary (I hope) measure until

	29 # the chromium project has figured out a proper way to organize the

	30 # library of python tools. http://crbug.com/375725

	31 elf_symbolizer_path = os.path.abspath(os.path.join(

	32 os.path.dirname(__file__),

	33 '..',

	34 '..',

	35 'build',

	36 'android',

	37 'pylib'))

	38 sys.path.append(elf_symbolizer_path)

	39 import symbols.elf_symbolizer as elf_symbolizer

24	40

25	41

26 # TODO(andrewhayden): Only used for legacy reports. Delete.	42 # TODO(andrewhayden): Only used for legacy reports. Delete.

27 def FormatBytes(bytes):	43 def FormatBytes(byte_count):

28 """Pretty-print a number of bytes."""	44 """Pretty-print a number of bytes."""

29 if bytes > 1e6:	45 if byte_count > 1e6:

30 bytes = bytes / 1.0e6	46 byte_count = byte_count / 1.0e6

31 return '%.1fm' % bytes	47 return '%.1fm' % byte_count

32 if bytes > 1e3:	48 if byte_count > 1e3:

33 bytes = bytes / 1.0e3	49 byte_count = byte_count / 1.0e3

34 return '%.1fk' % bytes	50 return '%.1fk' % byte_count

35 return str(bytes)	51 return str(byte_count)

36	52

37	53

38 # TODO(andrewhayden): Only used for legacy reports. Delete.	54 # TODO(andrewhayden): Only used for legacy reports. Delete.

39 def SymbolTypeToHuman(type):	55 def SymbolTypeToHuman(symbol_type):

40 """Convert a symbol type as printed by nm into a human-readable name."""	56 """Convert a symbol type as printed by nm into a human-readable name."""

41 return {'b': 'bss',	57 return {'b': 'bss',

42 'd': 'data',	58 'd': 'data',

43 'r': 'read-only data',	59 'r': 'read-only data',

44 't': 'code',	60 't': 'code',

45 'w': 'weak symbol',	61 'w': 'weak symbol',

46 'v': 'weak symbol'}[type]	62 'v': 'weak symbol'}[symbol_type]

47

48

49 def ParseNm(input):

50 """Parse nm output.

51

52 Argument: an iterable over lines of nm output.

53

54 Yields: (symbol name, symbol type, symbol size, source file path).

55 Path may be None if nm couldn't figure out the source file.

56 """

57

58 # Match lines with size, symbol, optional location, optional discriminator

59 sym_re = re.compile(r'^[0-9a-f]{8} ' # address (8 hex digits)

60 '([0-9a-f]{8}) ' # size (8 hex digits)

61 '(.) ' # symbol type, one character

62 '([^\t]+)' # symbol name, separated from next by tab

63 '(?:\t(.):[\d\?]+)?.$') # location

64 # Match lines with addr but no size.

65 addr_re = re.compile(r'^[0-9a-f]{8} (.) ([^\t]+)(?:\t.*)?$')

66 # Match lines that don't have an address at all -- typically external symbols.

67 noaddr_re = re.compile(r'^ {8} (.) (.*)$')

68

69 for line in input:

70 line = line.rstrip()

71 match = sym_re.match(line)

72 if match:

73 size, type, sym = match.groups()[0:3]

74 size = int(size, 16)

75 if type.lower() == 'b':

76 continue # skip all BSS for now

77 path = match.group(4)

78 yield sym, type, size, path

79 continue

80 match = addr_re.match(line)

81 if match:

82 type, sym = match.groups()[0:2]

83 # No size == we don't care.

84 continue

85 match = noaddr_re.match(line)

86 if match:

87 type, sym = match.groups()

88 if type in ('U', 'w'):

89 # external or weak symbol

90 continue

91

92 print >>sys.stderr, 'unparsed:', repr(line)

93	63

94	64

95 def _MkChild(node, name):	65 def _MkChild(node, name):

96 child = None	66 child = node['children'].get(name)

97 for test in node['children']:	67 if child is None:

98 if test['n'] == name:	68 child = {'n': name, 'children': {}}

99 child = test	69 node['children'][name] = child

100 break

101 if not child:

102 child = {'n': name, 'children': []}

103 node['children'].append(child)

104 return child	70 return child

105	71

106	72

	73 def MakeChildrenDictsIntoLists(node):

	74 largest_list_len = 0

	75 if 'children' in node:

	76 largest_list_len = len(node['children'])

	77 child_list = []

	78 for child in node['children'].itervalues():

	79 child_largest_list_len = MakeChildrenDictsIntoLists(child)

	80 if child_largest_list_len > largest_list_len:

	81 largest_list_len = child_largest_list_len

	82 child_list.append(child)

	83 node['children'] = child_list

	84

	85 return largest_list_len

	86

	87

107 def MakeCompactTree(symbols):	88 def MakeCompactTree(symbols):

108 result = {'n': '/', 'children': [], 'k': 'p', 'maxDepth': 0}	89 result = {'n': '/', 'children': {}, 'k': 'p', 'maxDepth': 0}

	90 seen_symbol_with_path = False

109 for symbol_name, symbol_type, symbol_size, file_path in symbols:	91 for symbol_name, symbol_type, symbol_size, file_path in symbols:

110	92

111 if 'vtable for ' in symbol_name:	93 if 'vtable for ' in symbol_name:

112 symbol_type = '@' # hack to categorize these separately	94 symbol_type = '@' # hack to categorize these separately

113 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']	95 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']

114 if file_path:	96 if file_path:

115 file_path = os.path.normpath(file_path)	97 file_path = os.path.normpath(file_path)

	98 seen_symbol_with_path = True

116 else:	99 else:

117 file_path = '(No Path)'	100 file_path = '(No Path)'

118	101

119 if file_path.startswith('/'):	102 if file_path.startswith('/'):

120 file_path = file_path[1:]	103 file_path = file_path[1:]

121 path_parts = file_path.split('/')	104 path_parts = file_path.split('/')

122	105

123 # Find pre-existing node in tree, or update if it already exists	106 # Find pre-existing node in tree, or update if it already exists

124 node = result	107 node = result

125 depth = 0	108 depth = 0

126 while len(path_parts) > 0:	109 while len(path_parts) > 0:

127 path_part = path_parts.pop(0)	110 path_part = path_parts.pop(0)

128 if len(path_part) == 0:	111 if len(path_part) == 0:

129 continue	112 continue

130 depth += 1	113 depth += 1

131 node = _MkChild(node, path_part);	114 node = _MkChild(node, path_part)

	115 assert not 'k' in node or node['k'] == 'p'

132 node['k'] = 'p' # p for path	116 node['k'] = 'p' # p for path

133	117

134 # 'node' is now the file node. Find the symbol-type bucket.	118 # 'node' is now the file node. Find the symbol-type bucket.

135 node['lastPathElement'] = True	119 node['lastPathElement'] = True

136 node = _MkChild(node, symbol_type)	120 node = _MkChild(node, symbol_type)

	121 assert not 'k' in node or node['k'] == 'b'

137 node['t'] = symbol_type	122 node['t'] = symbol_type

138 node['k'] = 'b' # b for bucket	123 node['k'] = 'b' # b for bucket

139 depth += 1	124 depth += 1

140	125

141 # 'node' is now the symbol-type bucket. Make the child entry.	126 # 'node' is now the symbol-type bucket. Make the child entry.

142 node = _MkChild(node, symbol_name)	127 node = _MkChild(node, symbol_name)

143 if 'children' in node: # Only possible if we're adding duplicate entries!!!	128 if 'children' in node:

	129 if node['children']:

	130 logging.warning('A container node used as symbol for %s.' % symbol_name)

	131 # This is going to be used as a leaf so no use for child list.

144 del node['children']	132 del node['children']

145 node['value'] = symbol_size	133 node['value'] = symbol_size

146 node['t'] = symbol_type	134 node['t'] = symbol_type

147 node['k'] = 's' # s for symbol	135 node['k'] = 's' # s for symbol

148 depth += 1	136 depth += 1

149 result['maxDepth'] = max(result['maxDepth'], depth);	137 result['maxDepth'] = max(result['maxDepth'], depth)

150	138

	139 if not seen_symbol_with_path:

	140 logging.warning('Symbols lack paths. Data will not be structured.')

	141

	142 largest_list_len = MakeChildrenDictsIntoLists(result)

	143

	144 if largest_list_len > 1000:

	145 logging.warning('There are sections with %d nodes. '

	146 'Results might be unusable.' % largest_list_len)

151 return result	147 return result

152	148

153	149

154 # TODO(andrewhayden): Only used for legacy reports. Delete.	150 # TODO(andrewhayden): Only used for legacy reports. Delete.

155 def TreeifySymbols(symbols):	151 def TreeifySymbols(symbols):

156 """Convert symbols into a path-based tree, calculating size information	152 """Convert symbols into a path-based tree, calculating size information

157 along the way.	153 along the way.

158	154

159 The result is a dictionary that contains two kinds of nodes:	155 The result is a dictionary that contains two kinds of nodes:

160 1. Leaf nodes, representing source code locations (e.g., c++ files)	156 1. Leaf nodes, representing source code locations (e.g., c++ files)

161 These nodes have the following dictionary entries:	157 These nodes have the following dictionary entries:

162 sizes: a dictionary whose keys are categories (such as code, data,	158 sizes: a dictionary whose keys are categories (such as code, data,

163 vtable, etceteras) and whose values are the size, in bytes, of	159 vtable, etceteras) and whose values are the size, in bytes, of

164 those categories;	160 those categories;

165 size: the total size, in bytes, of all the entries in the sizes dict	161 size: the total size, in bytes, of all the entries in the sizes dict

166 2. Non-leaf nodes, representing directories	162 2. Non-leaf nodes, representing directories

167 These nodes have the following dictionary entries:	163 These nodes have the following dictionary entries:

168 children: a dictionary whose keys are names (path entries; either	164 children: a dictionary whose keys are names (path entries; either

169 directory or file names) and whose values are other nodes;	165 directory or file names) and whose values are other nodes;

170 size: the total size, in bytes, of all the leaf nodes that are	166 size: the total size, in bytes, of all the leaf nodes that are

171 contained within the children dict (recursively expanded)	167 contained within the children dict (recursively expanded)

172	168

173 The result object is itself a dictionary that represents the common ancestor	169 The result object is itself a dictionary that represents the common ancestor

174 of all child nodes, e.g. a path to which all other nodes beneath it are	170 of all child nodes, e.g. a path to which all other nodes beneath it are

175 relative. The 'size' attribute of this dict yields the sum of the size of all	171 relative. The 'size' attribute of this dict yields the sum of the size of all

176 leaf nodes within the data structure.	172 leaf nodes within the data structure.

177 """	173 """

178 dirs = {'children': {}, 'size': 0}	174 dirs = {'children': {}, 'size': 0}

179 for sym, type, size, path in symbols:	175 for sym, symbol_type, size, path in symbols:

180 dirs['size'] += size	176 dirs['size'] += size

181 if path:	177 if path:

182 path = os.path.normpath(path)	178 path = os.path.normpath(path)

183 if path.startswith('/'):	179 if path.startswith('/'):

184 path = path[1:]	180 path = path[1:]

185	181

186 parts = None	182 parts = None

187 if path:	183 if path:

188 parts = path.split('/')	184 parts = path.split('/')

189	185

(...skipping 12 matching lines...) Expand all Loading...
202	198

203 # Get (creating if necessary) the node for the file	199 # Get (creating if necessary) the node for the file

204 # This node doesn't have a 'children' attribute	200 # This node doesn't have a 'children' attribute

205 if file_key not in tree['children']:	201 if file_key not in tree['children']:

206 tree['children'][file_key] = {'sizes': collections.defaultdict(int),	202 tree['children'][file_key] = {'sizes': collections.defaultdict(int),

207 'size': 0}	203 'size': 0}

208 tree = tree['children'][file_key]	204 tree = tree['children'][file_key]

209 tree['size'] += size	205 tree['size'] += size

210	206

211 # Accumulate size into a bucket within the file	207 # Accumulate size into a bucket within the file

212 type = type.lower()	208 symbol_type = symbol_type.lower()

213 if 'vtable for ' in sym:	209 if 'vtable for ' in sym:

214 tree['sizes']['[vtable]'] += size	210 tree['sizes']['[vtable]'] += size

215 elif 'r' == type:	211 elif 'r' == symbol_type:

216 tree['sizes']['[rodata]'] += size	212 tree['sizes']['[rodata]'] += size

217 elif 'd' == type:	213 elif 'd' == symbol_type:

218 tree['sizes']['[data]'] += size	214 tree['sizes']['[data]'] += size

219 elif 'b' == type:	215 elif 'b' == symbol_type:

220 tree['sizes']['[bss]'] += size	216 tree['sizes']['[bss]'] += size

221 elif 't' == type:	217 elif 't' == symbol_type:

222 # 'text' in binary parlance means 'code'.	218 # 'text' in binary parlance means 'code'.

223 tree['sizes']['[code]'] += size	219 tree['sizes']['[code]'] += size

224 elif 'w' == type:	220 elif 'w' == symbol_type:

225 tree['sizes']['[weak]'] += size	221 tree['sizes']['[weak]'] += size

226 else:	222 else:

227 tree['sizes']['[other]'] += size	223 tree['sizes']['[other]'] += size

228 except:	224 except:

229 print >>sys.stderr, sym, parts, key	225 print >> sys.stderr, sym, parts, file_key

230 raise	226 raise

231 else:	227 else:

232 key = 'symbols without paths'	228 key = 'symbols without paths'

233 if key not in dirs['children']:	229 if key not in dirs['children']:

234 dirs['children'][key] = {'sizes': collections.defaultdict(int),	230 dirs['children'][key] = {'sizes': collections.defaultdict(int),

235 'size': 0}	231 'size': 0}

236 tree = dirs['children'][key]	232 tree = dirs['children'][key]

237 subkey = 'misc'	233 subkey = 'misc'

238 if (sym.endswith('::__FUNCTION__') or	234 if (sym.endswith('::__FUNCTION__') or

239 sym.endswith('::__PRETTY_FUNCTION__')):	235 sym.endswith('::__PRETTY_FUNCTION__')):

(...skipping 25 matching lines...) Expand all Loading...
265 if 'children' in tree:	261 if 'children' in tree:

266 # Non-leaf node. Recurse.	262 # Non-leaf node. Recurse.

267 for child_name, child in tree['children'].iteritems():	263 for child_name, child in tree['children'].iteritems():

268 children.append(JsonifyTree(child, child_name))	264 children.append(JsonifyTree(child, child_name))

269 else:	265 else:

270 # Leaf node; dump per-file stats as entries in the treemap	266 # Leaf node; dump per-file stats as entries in the treemap

271 for kind, size in tree['sizes'].iteritems():	267 for kind, size in tree['sizes'].iteritems():

272 child_json = {'name': kind + ' (' + FormatBytes(size) + ')',	268 child_json = {'name': kind + ' (' + FormatBytes(size) + ')',

273 'data': { '$area': size }}	269 'data': { '$area': size }}

274 css_class = css_class_map.get(kind)	270 css_class = css_class_map.get(kind)

275 if css_class is not None: child_json['data']['$symbol'] = css_class	271 if css_class is not None:

	272 child_json['data']['$symbol'] = css_class

276 children.append(child_json)	273 children.append(child_json)

277 # Sort children by size, largest to smallest.	274 # Sort children by size, largest to smallest.

278 children.sort(key=lambda child: -child['data']['$area'])	275 children.sort(key=lambda child: -child['data']['$area'])

279	276

280 # For leaf nodes, the 'size' attribute is the size of the leaf;	277 # For leaf nodes, the 'size' attribute is the size of the leaf;

281 # Non-leaf nodes don't really have a size, but their 'size' attribute is	278 # Non-leaf nodes don't really have a size, but their 'size' attribute is

282 # the sum of the sizes of all their children.	279 # the sum of the sizes of all their children.

283 return {'name': name + ' (' + FormatBytes(tree['size']) + ')',	280 return {'name': name + ' (' + FormatBytes(tree['size']) + ')',

284 'data': { '$area': tree['size'] },	281 'data': { '$area': tree['size'] },

285 'children': children }	282 'children': children }

286	283

287 def DumpCompactTree(symbols, outfile):	284 def DumpCompactTree(symbols, outfile):

288 out = open(outfile, 'w')	285 tree_root = MakeCompactTree(symbols)

289 try:	286 with open(outfile, 'w') as out:

290 out.write('var tree_data = ' + json.dumps(MakeCompactTree(symbols)))	287 out.write('var tree_data = ')

291 finally:	288 json.dump(tree_root, out)

292 out.flush()	289 print('Writing %d bytes json' % os.path.getsize(outfile))

293 out.close()

294	290

295	291

296 # TODO(andrewhayden): Only used for legacy reports. Delete.	292 # TODO(andrewhayden): Only used for legacy reports. Delete.

297 def DumpTreemap(symbols, outfile):	293 def DumpTreemap(symbols, outfile):

298 dirs = TreeifySymbols(symbols)	294 dirs = TreeifySymbols(symbols)

299 out = open(outfile, 'w')	295 out = open(outfile, 'w')

300 try:	296 try:

301 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))	297 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))

302 finally:	298 finally:

303 out.flush()	299 out.flush()

304 out.close()	300 out.close()

305	301

306	302

307 # TODO(andrewhayden): Only used for legacy reports. Delete.	303 # TODO(andrewhayden): Only used for legacy reports. Delete.

308 def DumpLargestSymbols(symbols, outfile, n):	304 def DumpLargestSymbols(symbols, outfile, n):

309 # a list of (sym, type, size, path); sort by size.	305 # a list of (sym, symbol_type, size, path); sort by size.

310 symbols = sorted(symbols, key=lambda x: -x[2])	306 symbols = sorted(symbols, key=lambda x: -x[2])

311 dumped = 0	307 dumped = 0

312 out = open(outfile, 'w')	308 out = open(outfile, 'w')

313 try:	309 try:

314 out.write('var largestSymbols = [\n')	310 out.write('var largestSymbols = [\n')

315 for sym, type, size, path in symbols:	311 for sym, symbol_type, size, path in symbols:

316 if type in ('b', 'w'):	312 if symbol_type in ('b', 'w'):

317 continue # skip bss and weak symbols	313 continue # skip bss and weak symbols

318 if path is None:	314 if path is None:

319 path = ''	315 path = ''

320 entry = {'size': FormatBytes(size),	316 entry = {'size': FormatBytes(size),

321 'symbol': sym,	317 'symbol': sym,

322 'type': SymbolTypeToHuman(type),	318 'type': SymbolTypeToHuman(symbol_type),

323 'location': path }	319 'location': path }

324 out.write(json.dumps(entry))	320 out.write(json.dumps(entry))

325 out.write(',\n')	321 out.write(',\n')

326 dumped += 1	322 dumped += 1

327 if dumped >= n:	323 if dumped >= n:

328 return	324 return

329 finally:	325 finally:

330 out.write('];\n')	326 out.write('];\n')

331 out.flush()	327 out.flush()

332 out.close()	328 out.close()

333	329

334	330

335 def MakeSourceMap(symbols):	331 def MakeSourceMap(symbols):

336 sources = {}	332 sources = {}

337 for sym, type, size, path in symbols:	333 for _sym, _symbol_type, size, path in symbols:

338 key = None	334 key = None

339 if path:	335 if path:

340 key = os.path.normpath(path)	336 key = os.path.normpath(path)

341 else:	337 else:

342 key = '[no path]'	338 key = '[no path]'

343 if key not in sources:	339 if key not in sources:

344 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}	340 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}

345 record = sources[key]	341 record = sources[key]

346 record['size'] += size	342 record['size'] += size

347 record['symbol_count'] += 1	343 record['symbol_count'] += 1

348 return sources	344 return sources

349	345

350	346

351 # TODO(andrewhayden): Only used for legacy reports. Delete.	347 # TODO(andrewhayden): Only used for legacy reports. Delete.

352 def DumpLargestSources(symbols, outfile, n):	348 def DumpLargestSources(symbols, outfile, n):

353 map = MakeSourceMap(symbols)	349 source_map = MakeSourceMap(symbols)

354 sources = sorted(map.values(), key=lambda x: -x['size'])	350 sources = sorted(source_map.values(), key=lambda x: -x['size'])

355 dumped = 0	351 dumped = 0

356 out = open(outfile, 'w')	352 out = open(outfile, 'w')

357 try:	353 try:

358 out.write('var largestSources = [\n')	354 out.write('var largestSources = [\n')

359 for record in sources:	355 for record in sources:

360 entry = {'size': FormatBytes(record['size']),	356 entry = {'size': FormatBytes(record['size']),

361 'symbol_count': str(record['symbol_count']),	357 'symbol_count': str(record['symbol_count']),

362 'location': record['path']}	358 'location': record['path']}

363 out.write(json.dumps(entry))	359 out.write(json.dumps(entry))

364 out.write(',\n')	360 out.write(',\n')

365 dumped += 1	361 dumped += 1

366 if dumped >= n:	362 if dumped >= n:

367 return	363 return

368 finally:	364 finally:

369 out.write('];\n')	365 out.write('];\n')

370 out.flush()	366 out.flush()

371 out.close()	367 out.close()

372	368

373	369

374 # TODO(andrewhayden): Only used for legacy reports. Delete.	370 # TODO(andrewhayden): Only used for legacy reports. Delete.

375 def DumpLargestVTables(symbols, outfile, n):	371 def DumpLargestVTables(symbols, outfile, n):

376 vtables = []	372 vtables = []

377 for symbol, type, size, path in symbols:	373 for symbol, _type, size, path in symbols:

378 if 'vtable for ' in symbol:	374 if 'vtable for ' in symbol:

379 vtables.append({'symbol': symbol, 'path': path, 'size': size})	375 vtables.append({'symbol': symbol, 'path': path, 'size': size})

380 vtables = sorted(vtables, key=lambda x: -x['size'])	376 vtables = sorted(vtables, key=lambda x: -x['size'])

381 dumped = 0	377 dumped = 0

382 out = open(outfile, 'w')	378 out = open(outfile, 'w')

383 try:	379 try:

384 out.write('var largestVTables = [\n')	380 out.write('var largestVTables = [\n')

385 for record in vtables:	381 for record in vtables:

386 entry = {'size': FormatBytes(record['size']),	382 entry = {'size': FormatBytes(record['size']),

387 'symbol': record['symbol'],	383 'symbol': record['symbol'],

388 'location': record['path']}	384 'location': record['path']}

389 out.write(json.dumps(entry))	385 out.write(json.dumps(entry))

390 out.write(',\n')	386 out.write(',\n')

391 dumped += 1	387 dumped += 1

392 if dumped >= n:	388 if dumped >= n:

393 return	389 return

394 finally:	390 finally:

395 out.write('];\n')	391 out.write('];\n')

396 out.flush()	392 out.flush()

397 out.close()	393 out.close()

398	394

399	395

400 # TODO(andrewhayden): Switch to Primiano's python-based version.	396 # Regex for parsing "nm" output. A sample line looks like this:

401 def RunParallelAddress2Line(outfile, library, arch, jobs, verbose):	397 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95

402 """Run a parallel addr2line processing engine to dump and resolve symbols."""	398 #

403 out_dir = os.getenv('CHROMIUM_OUT_DIR', 'out')	399 # The fields are: address, size, type, name, source location

404 build_type = os.getenv('BUILDTYPE', 'Release')	400 # Regular expression explained ( see also: https://xkcd.com/208 ):

405 classpath = os.path.join(out_dir, build_type, 'lib.java',	401 # ([0-9a-f]{8,}+) The address

406 'binary_size_java.jar')	402 # [\s]+ Whitespace separator

407 cmd = ['java',	403 # ([0-9a-f]{8,}+) The size. From here on out it's all optional.

408 '-classpath', classpath,	404 # [\s]+ Whitespace separator

409 'org.chromium.tools.binary_size.ParallelAddress2Line',	405 # (\S?) The symbol type, which is any non-whitespace char

410 '--disambiguate',	406 # [\s*] Whitespace separator

411 '--outfile', outfile,	407 # ([^\t]*) Symbol name, any non-tab character (spaces ok!)

412 '--library', library,	408 # [\t]? Tab separator

413 '--threads', jobs]	409 # (.*) The location (filename[:linennum\|?][ (discriminator n)]

414 if verbose is True:	410 sNmPattern = re.compile(

415 cmd.append('--verbose')	411 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s](\S?)[\s]([^\t])[\t]?(.)')

416 prefix = os.path.join('third_party', 'android_tools', 'ndk', 'toolchains')

417 if arch == 'android-arm':

418 prefix = os.path.join(prefix, 'arm-linux-androideabi-4.8', 'prebuilt',

419 'linux-x86_64', 'bin', 'arm-linux-androideabi-')

420 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])

421 elif arch == 'android-mips':

422 prefix = os.path.join(prefix, 'mipsel-linux-android-4.8', 'prebuilt',

423 'linux-x86_64', 'bin', 'mipsel-linux-android-')

424 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])

425 elif arch == 'android-x86':

426 prefix = os.path.join(prefix, 'x86-4.8', 'prebuilt',

427 'linux-x86_64', 'bin', 'i686-linux-android-')

428 cmd.extend(['--nm', prefix + 'nm', '--addr2line', prefix + 'addr2line'])

429 # else, use whatever is in PATH (don't pass --nm or --addr2line)

430	412

431 if verbose:	413 class Progress():

432 print cmd	414 def __init__(self):

433	415 self.count = 0

434 return_code = subprocess.call(cmd)	416 self.skip_count = 0

435 if return_code:	417 self.collisions = 0

436 raise RuntimeError('Failed to run ParallelAddress2Line: returned ' +	418 self.time_last_output = time.time()

437 str(return_code))	419 self.count_last_output = 0

438	420

439	421

440 def GetNmSymbols(infile, outfile, library, arch, jobs, verbose):	422 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs):

441 if infile is None:	423 nm_output = RunNm(library, nm_binary)

	424 nm_output_lines = nm_output.splitlines()

	425 nm_output_lines_len = len(nm_output_lines)

	426 address_symbol = {}

	427 progress = Progress()

	428 def map_address_symbol(symbol, addr):

	429 progress.count += 1

	430 if addr in address_symbol:

	431 # 'Collision between %s and %s.' % (str(symbol.name),

	432 # str(address_symbol[addr].name))

	433 progress.collisions += 1

	434 else:

	435 address_symbol[addr] = symbol

	436

	437 progress_chunk = 100

	438 if progress.count % progress_chunk == 0:

	439 time_now = time.time()

	440 time_spent = time_now - progress.time_last_output

	441 if time_spent > 1.0:

	442 # Only output at most once per second.

	443 progress.time_last_output = time_now

	444 chunk_size = progress.count - progress.count_last_output

	445 progress.count_last_output = progress.count

	446 if time_spent > 0:

	447 speed = chunk_size / time_spent

	448 else:

	449 speed = 0

	450 progress_percent = (100.0 * (progress.count + progress.skip_count) /

	451 nm_output_lines_len)

	452 print('%.1f%%: Looked up %d symbols (%d collisions) - %.1f lookups/s.' %

	453 (progress_percent, progress.count, progress.collisions, speed))

	454

	455 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,

	456 map_address_symbol,

	457 max_concurrent_jobs=jobs)

	458 for line in nm_output_lines:

	459 match = sNmPattern.match(line)

	460 if match:

	461 location = match.group(5)

	462 if not location:

	463 addr = int(match.group(1), 16)

	464 size = int(match.group(2), 16)

	465 if addr in address_symbol: # Already looked up, shortcut ELFSymbolizer.

	466 map_address_symbol(address_symbol[addr], addr)

	467 continue

	468 elif size == 0:

	469 # Save time by not looking up empty symbols (do they even exist?)

	470 print('Empty symbol: ' + line)

	471 else:

	472 symbolizer.SymbolizeAsync(addr, addr)

	473 continue

	474

	475 progress.skip_count += 1

	476

	477 symbolizer.Join()

	478

	479 with open(outfile, 'w') as out:

	480 for line in nm_output_lines:

	481 match = sNmPattern.match(line)

	482 if match:

	483 location = match.group(5)

	484 if not location:

	485 addr = int(match.group(1), 16)

	486 symbol = address_symbol[addr]

	487 path = '??'

	488 if symbol.source_path is not None:

	489 path = symbol.source_path

	490 line_number = 0

	491 if symbol.source_line is not None:

	492 line_number = symbol.source_line

	493 out.write('%s\t%s:%d\n' % (line, path, line_number))

	494 continue

	495

	496 out.write('%s\n' % line)

	497

	498 print('%d symbols in the results.' % len(address_symbol))

	499

	500

	501 def RunNm(binary, nm_binary):

	502 print('Starting nm')

	503 cmd = [nm_binary, '-C', '--print-size', binary]

	504 nm_process = subprocess.Popen(cmd,

	505 stdout=subprocess.PIPE,

	506 stderr=subprocess.PIPE)

	507 (process_output, err_output) = nm_process.communicate()

	508

	509 if nm_process.returncode != 0:

	510 if err_output:

	511 raise Exception, err_output

	512 else:

	513 raise Exception, process_output

	514

	515 print('Finished nm')

	516 return process_output

	517

	518

	519 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,

	520 addr2line_binary, nm_binary):

	521 if nm_infile is None:

442 if outfile is None:	522 if outfile is None:

443 infile = tempfile.NamedTemporaryFile(delete=False).name	523 outfile = tempfile.NamedTemporaryFile(delete=False).name

444 else:

445 infile = outfile

446	524

447 if verbose:	525 if verbose:

448 print 'Running parallel addr2line, dumping symbols to ' + infile;	526 print 'Running parallel addr2line, dumping symbols to ' + outfile

449 RunParallelAddress2Line(outfile=infile, library=library, arch=arch,	527 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs)

450 jobs=jobs, verbose=verbose)	528

	529 nm_infile = outfile

	530

451 elif verbose:	531 elif verbose:

452 print 'Using nm input from ' + infile	532 print 'Using nm input from ' + nm_infile

453 with file(infile, 'r') as infile:	533 with file(nm_infile, 'r') as infile:

454 return list(ParseNm(infile))	534 return list(binary_size_utils.ParseNm(infile))

	535

	536

	537 def _find_in_system_path(binary):

	538 """Locate the full path to binary in the system path or return None

	539 if not found."""

	540 system_path = os.environ["PATH"].split(os.pathsep)

	541 for path in system_path:

	542 binary_path = os.path.join(path, binary)

	543 if os.path.isfile(binary_path):

	544 return binary_path

	545 return None

455	546

456	547

457 def main():	548 def main():

458 usage="""%prog [options]	549 usage = """%prog [options]

459	550

460 Runs a spatial analysis on a given library, looking up the source locations	551 Runs a spatial analysis on a given library, looking up the source locations

461 of its symbols and calculating how much space each directory, source file,	552 of its symbols and calculating how much space each directory, source file,

462 and so on is taking. The result is a report that can be used to pinpoint	553 and so on is taking. The result is a report that can be used to pinpoint

463 sources of large portions of the binary, etceteras.	554 sources of large portions of the binary, etceteras.

464	555

465 Under normal circumstances, you only need to pass two arguments, thusly:	556 Under normal circumstances, you only need to pass two arguments, thusly:

466	557

467 %prog --library /path/to/library --destdir /path/to/output	558 %prog --library /path/to/library --destdir /path/to/output

468	559

(...skipping 10 matching lines...) Expand all Loading...
479 'present in the file; i.e., no addr2line symbol lookups '	570 'present in the file; i.e., no addr2line symbol lookups '

480 'will be performed when this option is specified. '	571 'will be performed when this option is specified. '

481 'Mutually exclusive with --library.')	572 'Mutually exclusive with --library.')

482 parser.add_option('--destdir', metavar='PATH',	573 parser.add_option('--destdir', metavar='PATH',

483 help='write output to the specified directory. An HTML '	574 help='write output to the specified directory. An HTML '

484 'report is generated here along with supporting files; '	575 'report is generated here along with supporting files; '

485 'any existing report will be overwritten.')	576 'any existing report will be overwritten.')

486 parser.add_option('--library', metavar='PATH',	577 parser.add_option('--library', metavar='PATH',

487 help='if specified, process symbols in the library at '	578 help='if specified, process symbols in the library at '

488 'the specified path. Mutually exclusive with --nm-in.')	579 'the specified path. Mutually exclusive with --nm-in.')

489 parser.add_option('--arch',	580 parser.add_option('--nm-binary',

490 help='the architecture that the library is targeted to. '	581 help='use the specified nm binary to analyze library. '

491 'Determines which nm/addr2line binaries are used. When '	582 'This is to be used when the nm in the path is not for '

492 '\'host-native\' is chosen, the program will use whichever '	583 'the right architecture or of the right version.')

493 'nm/addr2line binaries are on the PATH. This is '	584 parser.add_option('--addr2line-binary',

494 'appropriate when you are analyzing a binary by and for '	585 help='use the specified addr2line binary to analyze '

495 'your computer. '	586 'library. This is to be used when the addr2line in '

496 'This argument is only valid when using --library. '	587 'the path is not for the right architecture or '

497 'Default is \'host-native\'.',	588 'of the right version.')

498 choices=['host-native', 'android-arm',

499 'android-mips', 'android-x86'],)

500 parser.add_option('--jobs',	589 parser.add_option('--jobs',

501 help='number of jobs to use for the parallel '	590 help='number of jobs to use for the parallel '

502 'addr2line processing pool; defaults to 1. More '	591 'addr2line processing pool; defaults to 1. More '

503 'jobs greatly improve throughput but eat RAM like '	592 'jobs greatly improve throughput but eat RAM like '

504 'popcorn, and take several gigabytes each. Start low '	593 'popcorn, and take several gigabytes each. Start low '

505 'and ramp this number up until your machine begins to '	594 'and ramp this number up until your machine begins to '

506 'struggle with RAM. '	595 'struggle with RAM. '

507 'This argument is only valid when using --library.')	596 'This argument is only valid when using --library.')

508 parser.add_option('-v', dest='verbose', action='store_true',	597 parser.add_option('-v', dest='verbose', action='store_true',

509 help='be verbose, printing lots of status information.')	598 help='be verbose, printing lots of status information.')

510 parser.add_option('--nm-out', metavar='PATH',	599 parser.add_option('--nm-out', metavar='PATH',

511 help='keep the nm output file, and store it at the '	600 help='keep the nm output file, and store it at the '

512 'specified path. This is useful if you want to see the '	601 'specified path. This is useful if you want to see the '

513 'fully processed nm output after the symbols have been '	602 'fully processed nm output after the symbols have been '

514 'mapped to source locations. By default, a tempfile is '	603 'mapped to source locations. By default, a tempfile is '

515 'used and is deleted when the program terminates.'	604 'used and is deleted when the program terminates.'

516 'This argument is only valid when using --library.')	605 'This argument is only valid when using --library.')

517 parser.add_option('--legacy', action='store_true',	606 parser.add_option('--legacy', action='store_true',

518 help='emit legacy binary size report instead of modern')	607 help='emit legacy binary size report instead of modern')

519 opts, args = parser.parse_args()	608 opts, _args = parser.parse_args()

520	609

521 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):	610 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):

522 parser.error('exactly one of --library or --nm-in is required')	611 parser.error('exactly one of --library or --nm-in is required')

523 if (opts.nm_in):	612 if (opts.nm_in):

524 if opts.jobs:	613 if opts.jobs:

525 print >> sys.stderr, ('WARNING: --jobs has no effect '	614 print >> sys.stderr, ('WARNING: --jobs has no effect '

526 'when used with --nm-in')	615 'when used with --nm-in')

527 if opts.arch:

528 print >> sys.stderr, ('WARNING: --arch has no effect '

529 'when used with --nm-in')

530 if not opts.destdir:	616 if not opts.destdir:

531 parser.error('--destdir is required argument')	617 parser.error('--destdir is required argument')

532 if not opts.jobs:	618 if not opts.jobs:

533 opts.jobs = '1'	619 # Use the number of processors but cap between 2 and 4 since raw

534 if not opts.arch:	620 # CPU power isn't the limiting factor. It's I/O limited, memory

535 opts.arch = 'host-native'	621 # bus limited and available-memory-limited. Too many processes and

	622 # the computer will run out of memory and it will be slow.

	623 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))

536	624

537 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library, opts.arch,	625 if opts.addr2line_binary:

538 opts.jobs, opts.verbose is True)	626 assert os.path.isfile(opts.addr2line_binary)

	627 addr2line_binary = opts.addr2line_binary

	628 else:

	629 addr2line_binary = _find_in_system_path('addr2line')

	630 assert addr2line_binary, 'Unable to find addr2line in the path. '\

	631 'Use --addr2line-binary to specify location.'

	632

	633 if opts.nm_binary:

	634 assert os.path.isfile(opts.nm_binary)

	635 nm_binary = opts.nm_binary

	636 else:

	637 nm_binary = _find_in_system_path('nm')

	638 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\

	639 'to specify location.'

	640

	641 print('nm: %s' % nm_binary)

	642 print('addr2line: %s' % addr2line_binary)

	643

	644 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,

	645 opts.jobs, opts.verbose is True,

	646 addr2line_binary, nm_binary)

539 if not os.path.exists(opts.destdir):	647 if not os.path.exists(opts.destdir):

540 os.makedirs(opts.destdir, 0755)	648 os.makedirs(opts.destdir, 0755)

541	649

542	650

543 if opts.legacy: # legacy report	651 if opts.legacy: # legacy report

544 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))	652 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))

545 DumpLargestSymbols(symbols,	653 DumpLargestSymbols(symbols,

546 os.path.join(opts.destdir, 'largest-symbols.js'), 100)	654 os.path.join(opts.destdir, 'largest-symbols.js'), 100)

547 DumpLargestSources(symbols,	655 DumpLargestSources(symbols,

548 os.path.join(opts.destdir, 'largest-sources.js'), 100)	656 os.path.join(opts.destdir, 'largest-sources.js'), 100)

549 DumpLargestVTables(symbols,	657 DumpLargestVTables(symbols,

550 os.path.join(opts.destdir, 'largest-vtables.js'), 100)	658 os.path.join(opts.destdir, 'largest-vtables.js'), 100)

551 treemap_out = os.path.join(opts.destdir, 'webtreemap')	659 treemap_out = os.path.join(opts.destdir, 'webtreemap')

552 if not os.path.exists(treemap_out):	660 if not os.path.exists(treemap_out):

553 os.makedirs(treemap_out, 0755)	661 os.makedirs(treemap_out, 0755)

554 treemap_src = os.path.join('third_party', 'webtreemap', 'src')	662 treemap_src = os.path.join('third_party', 'webtreemap', 'src')

555 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)	663 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)

556 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)	664 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)

557 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)	665 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)

558 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template',	666 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template',

559 'index.html'), opts.destdir)	667 'index.html'), opts.destdir)

560 else: # modern report	668 else: # modern report

561 DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js'))	669 DumpCompactTree(symbols, os.path.join(opts.destdir, 'data.js'))

562 d3_out = os.path.join(opts.destdir, 'd3')	670 d3_out = os.path.join(opts.destdir, 'd3')

563 if not os.path.exists(d3_out):	671 if not os.path.exists(d3_out):

564 os.makedirs(d3_out, 0755)	672 os.makedirs(d3_out, 0755)

565 d3_src = os.path.join('third_party', 'd3', 'src')	673 d3_src = os.path.join(os.path.dirname(__file__),

566 template_src = os.path.join('tools', 'binary_size',	674 '..',

	675 '..',

	676 'third_party', 'd3', 'src')

	677 template_src = os.path.join(os.path.dirname(__file__),

567 'template')	678 'template')

568 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)	679 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)

569 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)	680 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)

	681 print('Copying index.html')

570 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)	682 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)

571 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)	683 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)

572	684

573 if opts.verbose:	685 if opts.verbose:

574 print 'Report saved to ' + opts.destdir + '/index.html'	686 print 'Report saved to ' + opts.destdir + '/index.html'

575	687

576	688

577 if __name__ == '__main__':	689 if __name__ == '__main__':

578 sys.exit(main())	690 sys.exit(main())

OLD	NEW

« no previous file with comments | « tools/binary_size/java/src/org/chromium/tools/binary_size/Record.java ('k') | no next file » | no next file with comments »