tools/binary_size/map2size.py - Issue 2769933002: V2 of //tools/binary_size rewrite (diffs).

Side by Side Diff: tools/binary_size/map2size.py

Issue 2769933002: V2 of //tools/binary_size rewrite (diffs). (Closed)

Patch Set: __add__, __sub__ Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright 2017 The Chromium Authors. All rights reserved.	2 # Copyright 2017 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Main Python API for analyzing binary size."""	6 """Main Python API for analyzing binary size."""

7	7

8 import argparse	8 import argparse

9 import ast

10 import distutils.spawn	9 import distutils.spawn

11 import gzip

12 import logging	10 import logging

13 import os	11 import os

14 import re

15 import subprocess	12 import subprocess

	13 import sys

16	14

	15 import file_format

17 import function_signature	16 import function_signature

18 import helpers	17 import helpers

19 import mapfileparser	18 import linker_map_parser

20 import symbols	19 import models

21

22

23 # File format version for .size files.

24 _SERIALIZATION_VERSION = 1

25

26

27 def _OpenMaybeGz(path, mode=None):

28 """Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""

29 if path.endswith('.gz'):

30 if mode and 'w' in mode:

31 return gzip.GzipFile(path, mode, 1)

32 return gzip.open(path, mode)

33 return open(path, mode or 'r')

34

35

36 def _EndsWithMaybeGz(path, suffix):

37 return path.endswith(suffix) or path.endswith(suffix + '.gz')

38	20

39	21

40 def _IterLines(s):	22 def _IterLines(s):

41 prev_idx = -1	23 prev_idx = -1

42 while True:	24 while True:

43 idx = s.find('\n', prev_idx + 1)	25 idx = s.find('\n', prev_idx + 1)

44 if idx == -1:	26 if idx == -1:

45 return	27 return

46 yield s[prev_idx + 1:idx]	28 yield s[prev_idx + 1:idx]

47 prev_idx = idx	29 prev_idx = idx

48	30

49	31

50 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):	32 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):

51 """Uses c++filt to unmangle any symbols that need it."""	33 """Uses c++filt to unmangle any symbols that need it."""

52 to_process = [s for s in symbol_group if s.name and s.name.startswith('_Z')]	34 to_process = [s for s in symbol_group if s.name.startswith('_Z')]

53 if not to_process:	35 if not to_process:

54 return	36 return

55	37

56 logging.info('Unmangling %d names', len(to_process))	38 logging.info('Unmangling %d names', len(to_process))

57 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,	39 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

58 stdout=subprocess.PIPE)	40 stdout=subprocess.PIPE)

59 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]	41 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

60 assert proc.returncode == 0	42 assert proc.returncode == 0

61	43

62 for i, line in enumerate(_IterLines(stdout)):	44 for i, line in enumerate(_IterLines(stdout)):

63 to_process[i].name = line	45 to_process[i].name = line

64	46

65	47

66 def _NormalizeNames(symbol_group):	48 def _NormalizeNames(symbol_group):

67 """Ensures that all names are formatted in a useful way.	49 """Ensures that all names are formatted in a useful way.

68	50

69 This includes:	51 This includes:

70 - Assigning of \|function_signature\| (for functions).	52 - Assigning of \|function_signature\| (for functions).

71 - Stripping of return types in \|function_signature\| and \|name\|.	53 - Stripping of return types in \|function_signature\| and \|name\|.

72 - Stripping parameters from \|name\|.	54 - Stripping parameters from \|name\|.

73 - Moving "vtable for" and the like to be suffixes rather than prefixes.	55 - Moving "vtable for" and the like to be suffixes rather than prefixes.

74 """	56 """

75 found_prefixes = set()	57 found_prefixes = set()

76 for symbol in symbol_group:	58 for symbol in symbol_group:

77 if not symbol.name or symbol.name.startswith('*'):	59 if symbol.name.startswith('*'):

78 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this	60 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this

79 # can happen.	61 # can happen.

80 continue	62 continue

81	63

82 # E.g.: vtable for FOO	64 # E.g.: vtable for FOO

83 idx = symbol.name.find(' for ', 0, 30)	65 idx = symbol.name.find(' for ', 0, 30)

84 if idx != -1:	66 if idx != -1:

85 found_prefixes.add(symbol.name[:idx + 4])	67 found_prefixes.add(symbol.name[:idx + 4])

86 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'	68 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

87	69

(...skipping 10 matching lines...) Expand all Loading...
98	80

99 # Remove anonymous namespaces (they just harm clustering).	81 # Remove anonymous namespaces (they just harm clustering).

100 symbol.name = symbol.name.replace('(anonymous namespace)::', '')	82 symbol.name = symbol.name.replace('(anonymous namespace)::', '')

101	83

102 logging.debug('Found name prefixes of: %r', found_prefixes)	84 logging.debug('Found name prefixes of: %r', found_prefixes)

103	85

104	86

105 def _NormalizeObjectPaths(symbol_group):	87 def _NormalizeObjectPaths(symbol_group):

106 """Ensures that all paths are formatted in a useful way."""	88 """Ensures that all paths are formatted in a useful way."""

107 for symbol in symbol_group:	89 for symbol in symbol_group:

108 if symbol.path:	90 if symbol.path.startswith('obj/'):

109 if symbol.path.startswith('obj/'):	91 # Convert obj/third_party/... -> third_party/...

110 # Convert obj/third_party/... -> third_party/...	92 symbol.path = symbol.path[4:]

111 symbol.path = symbol.path[4:]	93 elif symbol.path.startswith('../../'):

112 elif symbol.path.startswith('../../'):	94 # Convert ../../third_party/... -> third_party/...

113 # Convert ../../third_party/... -> third_party/...	95 symbol.path = symbol.path[6:]

114 symbol.path = symbol.path[6:]	96 if symbol.path.endswith(')'):

115 if symbol.path.endswith(')'):	97 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o

116 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o	98 start_idx = symbol.path.index('(')

117 start_idx = symbol.path.index('(')	99 paren_path = symbol.path[start_idx + 1:-1]

118 paren_path = symbol.path[start_idx + 1:-1]	100 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path

119 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path

120	101

121	102

122 def _RemoveDuplicatesAndCalculatePadding(symbol_group):	103 def _RemoveDuplicatesAndCalculatePadding(symbol_group):

123 """Removes symbols at the same address and calculates the \|padding\| field.	104 """Removes symbols at the same address and calculates the \|padding\| field.

124	105

125 Symbols must already be sorted by \|address\|.	106 Symbols must already be sorted by \|address\|.

126 """	107 """

127 i = 0	108 i = 0

128 to_remove = set()	109 to_remove = set()

129 all_symbols = symbol_group.symbols	110 all_symbols = symbol_group.symbols

(...skipping 29 matching lines...) Expand all Loading...
159 symbol.padding = padding	140 symbol.padding = padding

160 symbol.size += padding	141 symbol.size += padding

161 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol	142 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol

162 # Map files have no overlaps, so worth special-casing the no-op case.	143 # Map files have no overlaps, so worth special-casing the no-op case.

163 if to_remove:	144 if to_remove:

164 logging.info('Removing %d overlapping symbols', len(to_remove))	145 logging.info('Removing %d overlapping symbols', len(to_remove))

165 symbol_group.symbols = (	146 symbol_group.symbols = (

166 [s for i, s in enumerate(all_symbols) if i not in to_remove])	147 [s for i, s in enumerate(all_symbols) if i not in to_remove])

167	148

168	149

169 def _PrintStats(result, write_func):	150 def PrintStats(size_info, write_func):

170 """Prints out how accurate \|result\| is."""	151 """Prints out how accurate \|size_info\| is."""

171 for section in symbols.SECTION_TO_SECTION_NAME:	152 for section in models.SECTION_TO_SECTION_NAME:

172 if section == 'd':	153 if section == 'd':

173 expected_size = sum(v for k, v in result.section_sizes.iteritems()	154 expected_size = sum(v for k, v in size_info.section_sizes.iteritems()

174 if k.startswith('.data'))	155 if k.startswith('.data'))

175 else:	156 else:

176 expected_size = result.section_sizes[	157 expected_size = size_info.section_sizes[

177 symbols.SECTION_TO_SECTION_NAME[section]]	158 models.SECTION_TO_SECTION_NAME[section]]

178	159

179 def one_stat(group):	160 def one_stat(group):

180 template = ('Section %s has %.1f%% of %d bytes accounted for from '	161 template = ('Section %s has %.1f%% of %d bytes accounted for from '

181 '%d symbols. %d bytes are unaccounted for. Padding '	162 '%d symbols. %d bytes are unaccounted for. Padding '

182 'accounts for %d bytes\n')	163 'accounts for %d bytes\n')

183 actual_size = group.size	164 actual_size = group.size_without_padding if group.IsBss() else group.size

184 count = len(group)	165 count = len(group)

185 padding = group.padding	166 padding = group.padding

186 size_percent = 100.0 * actual_size / expected_size	167 size_percent = 100.0 * actual_size / expected_size

187 return (template % (section, size_percent, actual_size, count,	168 return (template % (section, size_percent, actual_size, count,

188 expected_size - actual_size, padding))	169 expected_size - actual_size, padding))

189	170

190 in_section = result.symbol_group.WhereInSection(section)	171 in_section = size_info.symbols.WhereInSection(section)

191 write_func(one_stat(in_section))	172 write_func(one_stat(in_section))

192	173

193 star_syms = in_section.WhereNameMatches(r'^\*')	174 star_syms = in_section.WhereNameMatches(r'^\*')

194 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution()	175 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution()

195 anonymous_syms = attributed_syms.Inverted()	176 anonymous_syms = attributed_syms.Inverted()

196 if star_syms or anonymous_syms:	177 if star_syms or anonymous_syms:

197 missing_size = star_syms.size + anonymous_syms.size	178 missing_size = star_syms.size + anonymous_syms.size

198 write_func(('+ Without %d merge sections and %d anonymous entries ('	179 write_func(('+ Without %d merge sections and %d anonymous entries ('

199 'accounting for %d bytes):\n') % (	180 'accounting for %d bytes):\n') % (

200 len(star_syms), len(anonymous_syms), missing_size))	181 len(star_syms), len(anonymous_syms), missing_size))

201 write_func('+ ' + one_stat(attributed_syms))	182 write_func('+ ' + one_stat(attributed_syms))

202	183

203	184

204 def _SaveResult(result, file_obj):

205 """Saves the result to the given file object."""

206 # Store one bucket per line.

207 file_obj.write('%d\n' % _SERIALIZATION_VERSION)

208 file_obj.write('%r\n' % result.section_sizes)

209 file_obj.write('%d\n' % len(result.symbol_group))

210 prev_section_name = None

211 # Store symbol fields as tab-separated.

212 # Store only non-derived fields.

213 for symbol in result.symbol_group:

214 if symbol.section_name != prev_section_name:

215 file_obj.write('%s\n' % symbol.section_name)

216 prev_section_name = symbol.section_name

217 # Don't write padding nor name since these are derived values.

218 file_obj.write('%x\t%x\t%s\t%s\n' % (

219 symbol.address, symbol.size_without_padding,

220 symbol.function_signature or symbol.name or '',

221 symbol.path or ''))

222

223

224 def _LoadResults(file_obj):

225 """Loads a result from the given file."""

226 lines = iter(file_obj)

227 actual_version = int(next(lines))

228 assert actual_version == _SERIALIZATION_VERSION, (

229 'Version mismatch. Need to write some upgrade code.')

230

231 section_sizes = ast.literal_eval(next(lines))

232 num_syms = int(next(lines))

233 symbol_list = [None] * num_syms

234 section_name = None

235 for i in xrange(num_syms):

236 line = next(lines)[:-1]

237 if '\t' not in line:

238 section_name = intern(line)

239 line = next(lines)[:-1]

240 new_sym = symbols.Symbol.__new__(symbols.Symbol)

241 parts = line.split('\t')

242 new_sym.section_name = section_name

243 new_sym.address = int(parts[0], 16)

244 new_sym.size = int(parts[1], 16)

245 new_sym.name = parts[2] or None

246 new_sym.path = parts[3] or None

247 new_sym.padding = 0 # Derived

248 new_sym.function_signature = None # Derived

249 symbol_list[i] = new_sym

250

251 # Recompute derived values (padding and function names).

252 result = mapfileparser.ParseResult(symbol_list, section_sizes)

253 logging.info('Calculating padding')

254 _RemoveDuplicatesAndCalculatePadding(result.symbol_group)

255 logging.info('Deriving signatures')

256 # Re-parse out function parameters.

257 _NormalizeNames(result.symbol_group.WhereInSection('t'))

258 return result

259

260

261 def AddOptions(parser):	185 def AddOptions(parser):

262 parser.add_argument('input_file',

263 help='Path to input file. Can be a linker .map file, an '

264 'unstripped binary, or a saved result from '

265 'analyze.py')

266 parser.add_argument('--tool-prefix', default='',	186 parser.add_argument('--tool-prefix', default='',

267 help='Path prefix for c++filt.')	187 help='Path prefix for c++filt.')

268 parser.add_argument('--output-directory',	188 parser.add_argument('--output-directory',

269 help='Path to the root build directory.')	189 help='Path to the root build directory.')

270	190

271	191

272 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):	192 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):

273 """Calls Analyze with values from args."""	193 """Calls Analyze with values from args."""

274 if not output_directory:	194 if not output_directory:

275 abs_path = os.path.abspath(input_file)	195 abs_path = os.path.abspath(input_file)

(...skipping 16 matching lines...) Expand all Loading...
292 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')	212 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')

293 else:	213 else:

294 full_path = tool_prefix + 'c++filt'	214 full_path = tool_prefix + 'c++filt'

295	215

296 if not os.path.isfile(full_path):	216 if not os.path.isfile(full_path):

297 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)	217 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)

298 logging.info('Using --tool-prefix=%s', tool_prefix)	218 logging.info('Using --tool-prefix=%s', tool_prefix)

299 return tool_prefix	219 return tool_prefix

300	220

301	221

302 def AnalyzeWithArgs(args):	222 def AnalyzeWithArgs(args, input_path):

303 return Analyze(args.input_file, args.output_directory, args.tool_prefix)	223 return Analyze(input_path, args.output_directory, args.tool_prefix)

304	224

305	225

306 def Analyze(path, output_directory=None, tool_prefix=''):	226 def Analyze(path, output_directory=None, tool_prefix=''):

307 if _EndsWithMaybeGz(path, '.size'):	227 if file_format.EndsWithMaybeGz(path, '.size'):

308 logging.info('Loading cached results.')	228 logging.debug('Loading from .map file.')

309 with _OpenMaybeGz(path) as f:	229 size_info = file_format.LoadSizeInfo(path)

310 result = _LoadResults(f)	230 # Recompute derived values (padding and function names).

311 elif not _EndsWithMaybeGz(path, '.map'):	231 logging.info('Calculating padding')

	232 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

	233 logging.info('Deriving signatures')

	234 # Re-parse out function parameters.

	235 _NormalizeNames(size_info.symbols.WhereInSection('t'))

	236 return size_info

	237 elif not file_format.EndsWithMaybeGz(path, '.map'):

312 raise Exception('Expected input to be a .map or a .size')	238 raise Exception('Expected input to be a .map or a .size')

313 else:	239 else:

314 # Verify tool_prefix early.	240 # Verify tool_prefix early.

315 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)	241 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)

316	242

317 with _OpenMaybeGz(path) as map_file:	243 with file_format.OpenMaybeGz(path) as map_file:

318 result = mapfileparser.MapFileParser().Parse(map_file)	244 size_info = linker_map_parser.MapFileParser().Parse(map_file)

319	245

320 # Map file for some reason doesn't unmangle all names.	246 # Map file for some reason doesn't unmangle all names.

321 logging.info('Calculating padding')	247 logging.info('Calculating padding')

322 _RemoveDuplicatesAndCalculatePadding(result.symbol_group)	248 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

323 # Unmangle prints its own log statement.	249 # Unmangle prints its own log statement.

324 _UnmangleRemainingSymbols(result.symbol_group, tool_prefix)	250 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)

325 # Resolve paths prints its own log statement.	251 # Resolve paths prints its own log statement.

326 logging.info('Normalizing names')	252 logging.info('Normalizing names')

327 _NormalizeNames(result.symbol_group)	253 _NormalizeNames(size_info.symbols)

328 logging.info('Normalizing paths')	254 logging.info('Normalizing paths')

329 _NormalizeObjectPaths(result.symbol_group)	255 _NormalizeObjectPaths(size_info.symbols)

330	256

331 if logging.getLogger().isEnabledFor(logging.INFO):	257 if logging.getLogger().isEnabledFor(logging.INFO):

332 _PrintStats(result, lambda l: logging.info(l.rstrip()))	258 PrintStats(size_info, lambda l: logging.info(l.rstrip()))

333 logging.info('Finished analyzing %d symbols', len(result.symbol_group))	259 logging.info('Finished analyzing %d symbols', len(size_info.symbols))

334 return result	260 return size_info

335	261

336	262

337 def main():	263 def main(argv):

338 parser = argparse.ArgumentParser()	264 parser = argparse.ArgumentParser(argv)

339 parser.add_argument('--output', required=True,	265 parser.add_argument('input_file', help='Path to input .map file.')

340 help='Path to store results. Must end in .size or '	266 parser.add_argument('output_file', help='Path to output .size(.gz) file.')

341 '.size.gz')

342 AddOptions(parser)	267 AddOptions(parser)

343 args = helpers.AddCommonOptionsAndParseArgs(parser)	268 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)

344 if not _EndsWithMaybeGz(args.output, '.size'):	269 if not file_format.EndsWithMaybeGz(args.output_file, '.size'):

345 raise Exception('--output must end with .size or .size.gz')	270 parser.error('output_file must end with .size or .size.gz')

346	271

347 result = AnalyzeWithArgs(args)	272 size_info = AnalyzeWithArgs(args, args.input_file)

348 logging.info('Saving result to %s', args.output)	273 logging.info('Saving result to %s', args.output_file)

349 with _OpenMaybeGz(args.output, 'wb') as f:	274 file_format.SaveSizeInfo(size_info, args.output_file)

350 _SaveResult(result, f)

351	275

352 logging.info('Done')	276 logging.info('Done')

353	277

354	278

355 if __name__ == '__main__':	279 if __name__ == '__main__':

356 main()	280 sys.exit(main(sys.argv))

OLD	NEW

« tools/binary_size/linker_map_parser.py ('K') | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/mapfileparser.py » ('j') | tools/binary_size/models.py » ('J')