tools/binary_size/map2size.py - Issue 2785483002: Reland of V2 of //tools/binary_size rewrite (diffs).

Side by Side Diff: tools/binary_size/map2size.py

Issue 2785483002: Reland of V2 of //tools/binary_size rewrite (diffs). (Closed)

Patch Set: add missing name= Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright 2017 The Chromium Authors. All rights reserved.	2 # Copyright 2017 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Main Python API for analyzing binary size."""	6 """Main Python API for analyzing binary size."""

7	7

8 import argparse	8 import argparse

9 import ast

10 import distutils.spawn	9 import distutils.spawn

11 import gzip

12 import logging	10 import logging

13 import os	11 import os

14 import re

15 import subprocess	12 import subprocess

	13 import sys

16	14

	15 import describe

	16 import file_format

17 import function_signature	17 import function_signature

18 import helpers	18 import helpers

19 import mapfileparser	19 import linker_map_parser

20 import symbols	20 import models

21

22

23 # File format version for .size files.

24 _SERIALIZATION_VERSION = 1

25

26

27 def _OpenMaybeGz(path, mode=None):

28 """Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""

29 if path.endswith('.gz'):

30 if mode and 'w' in mode:

31 return gzip.GzipFile(path, mode, 1)

32 return gzip.open(path, mode)

33 return open(path, mode or 'r')

34

35

36 def _EndsWithMaybeGz(path, suffix):

37 return path.endswith(suffix) or path.endswith(suffix + '.gz')

38	21

39	22

40 def _IterLines(s):	23 def _IterLines(s):

41 prev_idx = -1	24 prev_idx = -1

42 while True:	25 while True:

43 idx = s.find('\n', prev_idx + 1)	26 idx = s.find('\n', prev_idx + 1)

44 if idx == -1:	27 if idx == -1:

45 return	28 return

46 yield s[prev_idx + 1:idx]	29 yield s[prev_idx + 1:idx]

47 prev_idx = idx	30 prev_idx = idx

48	31

49	32

50 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):	33 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):

51 """Uses c++filt to unmangle any symbols that need it."""	34 """Uses c++filt to unmangle any symbols that need it."""

52 to_process = [s for s in symbol_group if s.name and s.name.startswith('_Z')]	35 to_process = [s for s in symbol_group if s.name.startswith('_Z')]

53 if not to_process:	36 if not to_process:

54 return	37 return

55	38

56 logging.info('Unmangling %d names', len(to_process))	39 logging.info('Unmangling %d names', len(to_process))

57 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,	40 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

58 stdout=subprocess.PIPE)	41 stdout=subprocess.PIPE)

59 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]	42 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

60 assert proc.returncode == 0	43 assert proc.returncode == 0

61	44

62 for i, line in enumerate(_IterLines(stdout)):	45 for i, line in enumerate(_IterLines(stdout)):

63 to_process[i].name = line	46 to_process[i].name = line

64	47

65	48

66 def _NormalizeNames(symbol_group):	49 def _NormalizeNames(symbol_group):

67 """Ensures that all names are formatted in a useful way.	50 """Ensures that all names are formatted in a useful way.

68	51

69 This includes:	52 This includes:

70 - Assigning of \|function_signature\| (for functions).	53 - Assigning of \|function_signature\| (for functions).

71 - Stripping of return types in \|function_signature\| and \|name\|.	54 - Stripping of return types in \|function_signature\| and \|name\|.

72 - Stripping parameters from \|name\|.	55 - Stripping parameters from \|name\|.

73 - Moving "vtable for" and the like to be suffixes rather than prefixes.	56 - Moving "vtable for" and the like to be suffixes rather than prefixes.

74 """	57 """

75 found_prefixes = set()	58 found_prefixes = set()

76 for symbol in symbol_group:	59 for symbol in symbol_group:

77 if not symbol.name or symbol.name.startswith('*'):	60 if symbol.name.startswith('*'):

78 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this	61 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this

79 # can happen.	62 # can happen.

80 continue	63 continue

81	64

82 # E.g.: vtable for FOO	65 # E.g.: vtable for FOO

83 idx = symbol.name.find(' for ', 0, 30)	66 idx = symbol.name.find(' for ', 0, 30)

84 if idx != -1:	67 if idx != -1:

85 found_prefixes.add(symbol.name[:idx + 4])	68 found_prefixes.add(symbol.name[:idx + 4])

86 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'	69 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

87	70

(...skipping 10 matching lines...) Expand all Loading...
98	81

99 # Remove anonymous namespaces (they just harm clustering).	82 # Remove anonymous namespaces (they just harm clustering).

100 symbol.name = symbol.name.replace('(anonymous namespace)::', '')	83 symbol.name = symbol.name.replace('(anonymous namespace)::', '')

101	84

102 logging.debug('Found name prefixes of: %r', found_prefixes)	85 logging.debug('Found name prefixes of: %r', found_prefixes)

103	86

104	87

105 def _NormalizeObjectPaths(symbol_group):	88 def _NormalizeObjectPaths(symbol_group):

106 """Ensures that all paths are formatted in a useful way."""	89 """Ensures that all paths are formatted in a useful way."""

107 for symbol in symbol_group:	90 for symbol in symbol_group:

108 if symbol.path:	91 if symbol.path.startswith('obj/'):

109 if symbol.path.startswith('obj/'):	92 # Convert obj/third_party/... -> third_party/...

110 # Convert obj/third_party/... -> third_party/...	93 symbol.path = symbol.path[4:]

111 symbol.path = symbol.path[4:]	94 elif symbol.path.startswith('../../'):

112 elif symbol.path.startswith('../../'):	95 # Convert ../../third_party/... -> third_party/...

113 # Convert ../../third_party/... -> third_party/...	96 symbol.path = symbol.path[6:]

114 symbol.path = symbol.path[6:]	97 if symbol.path.endswith(')'):

115 if symbol.path.endswith(')'):	98 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o

116 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o	99 start_idx = symbol.path.index('(')

117 start_idx = symbol.path.index('(')	100 paren_path = symbol.path[start_idx + 1:-1]

118 paren_path = symbol.path[start_idx + 1:-1]	101 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path

119 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path

120	102

121	103

122 def _RemoveDuplicatesAndCalculatePadding(symbol_group):	104 def _RemoveDuplicatesAndCalculatePadding(symbol_group):

123 """Removes symbols at the same address and calculates the \|padding\| field.	105 """Removes symbols at the same address and calculates the \|padding\| field.

124	106

125 Symbols must already be sorted by \|address\|.	107 Symbols must already be sorted by \|address\|.

126 """	108 """

127 i = 0	109 i = 0

128 to_remove = set()	110 to_remove = set()

129 all_symbols = symbol_group.symbols	111 all_symbols = symbol_group.symbols

130 for i in xrange(len(all_symbols)):	112 for i in xrange(len(all_symbols)):

131 prev_symbol = all_symbols[i - 1]	113 prev_symbol = all_symbols[i - 1]

132 symbol = all_symbols[i]	114 symbol = all_symbols[i]

133 if prev_symbol.section_name is not symbol.section_name:	115 if prev_symbol.section_name != symbol.section_name:

134 continue	116 continue

135 if symbol.address > 0 and prev_symbol.address > 0:	117 if symbol.address > 0 and prev_symbol.address > 0:

136 # Fold symbols that are at the same address (happens in nm output).	118 # Fold symbols that are at the same address (happens in nm output).

137 if symbol.address == prev_symbol.address:	119 if symbol.address == prev_symbol.address:

138 symbol.size = max(prev_symbol.size, symbol.size)	120 symbol.size = max(prev_symbol.size, symbol.size)

139 to_remove.add(i)	121 to_remove.add(i)

140 continue	122 continue

141 # Even with symbols at the same address removed, overlaps can still	123 # Even with symbols at the same address removed, overlaps can still

142 # happen. In this case, padding will be negative (and this is fine).	124 # happen. In this case, padding will be negative (and this is fine).

143 padding = symbol.address - prev_symbol.end_address	125 padding = symbol.address - prev_symbol.end_address

(...skipping 15 matching lines...) Expand all Loading...
159 symbol.padding = padding	141 symbol.padding = padding

160 symbol.size += padding	142 symbol.size += padding

161 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol	143 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol

162 # Map files have no overlaps, so worth special-casing the no-op case.	144 # Map files have no overlaps, so worth special-casing the no-op case.

163 if to_remove:	145 if to_remove:

164 logging.info('Removing %d overlapping symbols', len(to_remove))	146 logging.info('Removing %d overlapping symbols', len(to_remove))

165 symbol_group.symbols = (	147 symbol_group.symbols = (

166 [s for i, s in enumerate(all_symbols) if i not in to_remove])	148 [s for i, s in enumerate(all_symbols) if i not in to_remove])

167	149

168	150

169 def _PrintStats(result, write_func):

170 """Prints out how accurate \|result\| is."""

171 for section in symbols.SECTION_TO_SECTION_NAME:

172 if section == 'd':

173 expected_size = sum(v for k, v in result.section_sizes.iteritems()

174 if k.startswith('.data'))

175 else:

176 expected_size = result.section_sizes[

177 symbols.SECTION_TO_SECTION_NAME[section]]

178

179 def one_stat(group):

180 template = ('Section %s has %.1f%% of %d bytes accounted for from '

181 '%d symbols. %d bytes are unaccounted for. Padding '

182 'accounts for %d bytes\n')

183 actual_size = group.size

184 count = len(group)

185 padding = group.padding

186 size_percent = 100.0 * actual_size / expected_size

187 return (template % (section, size_percent, actual_size, count,

188 expected_size - actual_size, padding))

189

190 in_section = result.symbol_group.WhereInSection(section)

191 write_func(one_stat(in_section))

192

193 star_syms = in_section.WhereNameMatches(r'^\*')

194 attributed_syms = star_syms.Inverted().WhereHasAnyAttribution()

195 anonymous_syms = attributed_syms.Inverted()

196 if star_syms or anonymous_syms:

197 missing_size = star_syms.size + anonymous_syms.size

198 write_func(('+ Without %d merge sections and %d anonymous entries ('

199 'accounting for %d bytes):\n') % (

200 len(star_syms), len(anonymous_syms), missing_size))

201 write_func('+ ' + one_stat(attributed_syms))

202

203

204 def _SaveResult(result, file_obj):

205 """Saves the result to the given file object."""

206 # Store one bucket per line.

207 file_obj.write('%d\n' % _SERIALIZATION_VERSION)

208 file_obj.write('%r\n' % result.section_sizes)

209 file_obj.write('%d\n' % len(result.symbol_group))

210 prev_section_name = None

211 # Store symbol fields as tab-separated.

212 # Store only non-derived fields.

213 for symbol in result.symbol_group:

214 if symbol.section_name != prev_section_name:

215 file_obj.write('%s\n' % symbol.section_name)

216 prev_section_name = symbol.section_name

217 # Don't write padding nor name since these are derived values.

218 file_obj.write('%x\t%x\t%s\t%s\n' % (

219 symbol.address, symbol.size_without_padding,

220 symbol.function_signature or symbol.name or '',

221 symbol.path or ''))

222

223

224 def _LoadResults(file_obj):

225 """Loads a result from the given file."""

226 lines = iter(file_obj)

227 actual_version = int(next(lines))

228 assert actual_version == _SERIALIZATION_VERSION, (

229 'Version mismatch. Need to write some upgrade code.')

230

231 section_sizes = ast.literal_eval(next(lines))

232 num_syms = int(next(lines))

233 symbol_list = [None] * num_syms

234 section_name = None

235 for i in xrange(num_syms):

236 line = next(lines)[:-1]

237 if '\t' not in line:

238 section_name = intern(line)

239 line = next(lines)[:-1]

240 new_sym = symbols.Symbol.__new__(symbols.Symbol)

241 parts = line.split('\t')

242 new_sym.section_name = section_name

243 new_sym.address = int(parts[0], 16)

244 new_sym.size = int(parts[1], 16)

245 new_sym.name = parts[2] or None

246 new_sym.path = parts[3] or None

247 new_sym.padding = 0 # Derived

248 new_sym.function_signature = None # Derived

249 symbol_list[i] = new_sym

250

251 # Recompute derived values (padding and function names).

252 result = mapfileparser.ParseResult(symbol_list, section_sizes)

253 logging.info('Calculating padding')

254 _RemoveDuplicatesAndCalculatePadding(result.symbol_group)

255 logging.info('Deriving signatures')

256 # Re-parse out function parameters.

257 _NormalizeNames(result.symbol_group.WhereInSection('t'))

258 return result

259

260

261 def AddOptions(parser):	151 def AddOptions(parser):

262 parser.add_argument('input_file',

263 help='Path to input file. Can be a linker .map file, an '

264 'unstripped binary, or a saved result from '

265 'analyze.py')

266 parser.add_argument('--tool-prefix', default='',	152 parser.add_argument('--tool-prefix', default='',

267 help='Path prefix for c++filt.')	153 help='Path prefix for c++filt.')

268 parser.add_argument('--output-directory',	154 parser.add_argument('--output-directory',

269 help='Path to the root build directory.')	155 help='Path to the root build directory.')

270	156

271	157

272 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):	158 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):

273 """Calls Analyze with values from args."""	159 """Calls Analyze with values from args."""

274 if not output_directory:	160 if not output_directory:

275 abs_path = os.path.abspath(input_file)	161 abs_path = os.path.abspath(input_file)

276 release_idx = abs_path.find('Release')	162 release_idx = abs_path.find('Release')

277 if release_idx != -1:	163 if release_idx != -1:

278 output_directory = abs_path[:release_idx] + 'Release'	164 output_directory = abs_path[:release_idx] + 'Release'

279 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')	165 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')

280 logging.debug('Detected --output-directory=%s', output_directory)	166 logging.debug('Detected --output-directory=%s', output_directory)

281	167

282 if not tool_prefix and output_directory:	168 if not tool_prefix and output_directory:

283 # Auto-detect from build_vars.txt	169 # Auto-detect from build_vars.txt

284 build_vars_path = os.path.join(output_directory, 'build_vars.txt')	170 build_vars_path = os.path.join(output_directory, 'build_vars.txt')

285 if os.path.exists(build_vars_path):	171 if os.path.exists(build_vars_path):

286 with open(build_vars_path) as f:	172 with open(build_vars_path) as f:

287 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)	173 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)

288 logging.debug('Found --tool-prefix from build_vars.txt')	174 logging.debug('Found --tool-prefix from build_vars.txt')

289 tool_prefix = build_vars['android_tool_prefix']	175 tool_prefix = os.path.join(output_directory,

	176 build_vars['android_tool_prefix'])

290	177

291 if os.path.sep not in tool_prefix:	178 if os.path.sep not in tool_prefix:

292 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')	179 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')

293 else:	180 else:

294 full_path = tool_prefix + 'c++filt'	181 full_path = tool_prefix + 'c++filt'

295	182

296 if not os.path.isfile(full_path):	183 if not os.path.isfile(full_path):

297 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)	184 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)

298 logging.info('Using --tool-prefix=%s', tool_prefix)	185 logging.info('Using --tool-prefix=%s', tool_prefix)

299 return tool_prefix	186 return tool_prefix

300	187

301	188

302 def AnalyzeWithArgs(args):	189 def AnalyzeWithArgs(args, input_path):

303 return Analyze(args.input_file, args.output_directory, args.tool_prefix)	190 return Analyze(input_path, args.output_directory, args.tool_prefix)

304	191

305	192

306 def Analyze(path, output_directory=None, tool_prefix=''):	193 def Analyze(path, output_directory=None, tool_prefix=''):

307 if _EndsWithMaybeGz(path, '.size'):	194 if file_format.EndsWithMaybeGz(path, '.size'):

308 logging.info('Loading cached results.')	195 logging.debug('Loading results from: %s', path)

309 with _OpenMaybeGz(path) as f:	196 size_info = file_format.LoadSizeInfo(path)

310 result = _LoadResults(f)	197 # Recompute derived values (padding and function names).

311 elif not _EndsWithMaybeGz(path, '.map'):	198 logging.info('Calculating padding')

	199 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

	200 logging.info('Deriving signatures')

	201 # Re-parse out function parameters.

	202 _NormalizeNames(size_info.symbols.WhereInSection('t'))

	203 return size_info

	204 elif not file_format.EndsWithMaybeGz(path, '.map'):

312 raise Exception('Expected input to be a .map or a .size')	205 raise Exception('Expected input to be a .map or a .size')

313 else:	206 else:

314 # Verify tool_prefix early.	207 # Verify tool_prefix early.

315 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)	208 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)

316	209

317 with _OpenMaybeGz(path) as map_file:	210 with file_format.OpenMaybeGz(path) as map_file:

318 result = mapfileparser.MapFileParser().Parse(map_file)	211 size_info = linker_map_parser.MapFileParser().Parse(map_file)

319	212

320 # Map file for some reason doesn't unmangle all names.	213 # Map file for some reason doesn't unmangle all names.

321 logging.info('Calculating padding')	214 logging.info('Calculating padding')

322 _RemoveDuplicatesAndCalculatePadding(result.symbol_group)	215 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

323 # Unmangle prints its own log statement.	216 # Unmangle prints its own log statement.

324 _UnmangleRemainingSymbols(result.symbol_group, tool_prefix)	217 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)

325 # Resolve paths prints its own log statement.	218 # Resolve paths prints its own log statement.

326 logging.info('Normalizing names')	219 logging.info('Normalizing names')

327 _NormalizeNames(result.symbol_group)	220 _NormalizeNames(size_info.symbols)

328 logging.info('Normalizing paths')	221 logging.info('Normalizing paths')

329 _NormalizeObjectPaths(result.symbol_group)	222 _NormalizeObjectPaths(size_info.symbols)

330	223

331 if logging.getLogger().isEnabledFor(logging.INFO):	224 if logging.getLogger().isEnabledFor(logging.INFO):

332 _PrintStats(result, lambda l: logging.info(l.rstrip()))	225 for line in describe.DescribeSizeInfoCoverage(size_info):

333 logging.info('Finished analyzing %d symbols', len(result.symbol_group))	226 logging.info(line)

334 return result	227 logging.info('Finished analyzing %d symbols', len(size_info.symbols))

	228 return size_info

335	229

336	230

337 def main():	231 def main(argv):

338 parser = argparse.ArgumentParser()	232 parser = argparse.ArgumentParser(argv)

339 parser.add_argument('--output', required=True,	233 parser.add_argument('input_file', help='Path to input .map file.')

340 help='Path to store results. Must end in .size or '	234 parser.add_argument('output_file', help='Path to output .size(.gz) file.')

341 '.size.gz')

342 AddOptions(parser)	235 AddOptions(parser)

343 args = helpers.AddCommonOptionsAndParseArgs(parser)	236 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)

344 if not _EndsWithMaybeGz(args.output, '.size'):	237 if not file_format.EndsWithMaybeGz(args.output_file, '.size'):

345 raise Exception('--output must end with .size or .size.gz')	238 parser.error('output_file must end with .size or .size.gz')

346	239

347 result = AnalyzeWithArgs(args)	240 size_info = AnalyzeWithArgs(args, args.input_file)

348 logging.info('Saving result to %s', args.output)	241 logging.info('Saving result to %s', args.output_file)

349 with _OpenMaybeGz(args.output, 'wb') as f:	242 file_format.SaveSizeInfo(size_info, args.output_file)

350 _SaveResult(result, f)

351	243

352 logging.info('Done')	244 logging.info('Done')

353	245

354	246

355 if __name__ == '__main__':	247 if __name__ == '__main__':

356 main()	248 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/mapfileparser.py » ('j') | no next file with comments »