tools/binary_size/map2size.py - Issue 2775173005: FREEZE.unindexed

Side by Side Diff: tools/binary_size/map2size.py

Issue 2775173005: FREEZE.unindexed (Closed)

Patch Set: ps2 Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright 2017 The Chromium Authors. All rights reserved.	2 # Copyright 2017 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Main Python API for analyzing binary size."""	6 """Main Python API for analyzing binary size."""

7	7

8 import argparse	8 import argparse

9 import distutils.spawn	9 import distutils.spawn

	10 import gzip

10 import logging	11 import logging

11 import os	12 import os

	13 import re

12 import subprocess	14 import subprocess

13 import sys	15 import sys

14	16

15 import describe	17 import describe

16 import file_format	18 import file_format

17 import function_signature	19 import function_signature

18 import helpers	20 import helpers

19 import linker_map_parser	21 import linker_map_parser

20 import models	22 import models

	23 import ninja_parser

21	24

22	25

23 def _IterLines(s):	26 def _OpenMaybeGz(path, mode=None):

24 prev_idx = -1	27 """Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""

25 while True:	28 if path.endswith('.gz'):

26 idx = s.find('\n', prev_idx + 1)	29 if mode and 'w' in mode:

27 if idx == -1:	30 return gzip.GzipFile(path, mode, 1)

28 return	31 return gzip.open(path, mode)

29 yield s[prev_idx + 1:idx]	32 return open(path, mode or 'r')

30 prev_idx = idx

31	33

32	34

33 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):	35 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):

34 """Uses c++filt to unmangle any symbols that need it."""	36 """Uses c++filt to unmangle any symbols that need it."""

35 to_process = [s for s in symbol_group if s.name.startswith('_Z')]	37 to_process = [s for s in symbol_group if s.name.startswith('_Z')]

36 if not to_process:	38 if not to_process:

37 return	39 return

38	40

39 logging.info('Unmangling %d names', len(to_process))	41 logging.info('Unmangling %d names', len(to_process))

40 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,	42 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

41 stdout=subprocess.PIPE)	43 stdout=subprocess.PIPE)

42 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]	44 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

43 assert proc.returncode == 0	45 assert proc.returncode == 0

44	46

45 for i, line in enumerate(_IterLines(stdout)):	47 for i, line in enumerate(stdout.splitlines()):

46 to_process[i].name = line	48 to_process[i].name = line

47	49

48	50

49 def _NormalizeNames(symbol_group):	51 def _NormalizeNames(symbol_group):

50 """Ensures that all names are formatted in a useful way.	52 """Ensures that all names are formatted in a useful way.

51	53

52 This includes:	54 This includes:

53 - Assigning of \|function_signature\| (for functions).	55 - Assigning of \|full_name\|.

54 - Stripping of return types in \|function_signature\| and \|name\|.	56 - Stripping of return types in \|full_name\| and \|name\| (for functions).

55 - Stripping parameters from \|name\|.	57 - Stripping parameters from \|name\|.

56 - Moving "vtable for" and the like to be suffixes rather than prefixes.	58 - Moving "vtable for" and the like to be suffixes rather than prefixes.

57 """	59 """

58 found_prefixes = set()	60 found_prefixes = set()

59 for symbol in symbol_group:	61 for symbol in symbol_group:

60 if symbol.name.startswith('*'):	62 if symbol.name.startswith('*'):

61 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this	63 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this

62 # can happen.	64 # can happen.

63 continue	65 continue

64	66

65 # E.g.: vtable for FOO	67 # E.g.: vtable for FOO

66 idx = symbol.name.find(' for ', 0, 30)	68 idx = symbol.name.find(' for ', 0, 30)

67 if idx != -1:	69 if idx != -1:

68 found_prefixes.add(symbol.name[:idx + 4])	70 found_prefixes.add(symbol.name[:idx + 4])

69 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'	71 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

70	72

71 # E.g.: virtual thunk to FOO	73 # E.g.: virtual thunk to FOO

72 idx = symbol.name.find(' to ', 0, 30)	74 idx = symbol.name.find(' to ', 0, 30)

73 if idx != -1:	75 if idx != -1:

74 found_prefixes.add(symbol.name[:idx + 3])	76 found_prefixes.add(symbol.name[:idx + 3])

75 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'	77 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'

76	78

77 # Strip out return type, and identify where parameter list starts.	79 # Strip out return type, and identify where parameter list starts.

78 if symbol.section == 't':	80 if symbol.section == 't':

79 symbol.function_signature, symbol.name = (	81 symbol.full_name, symbol.name = function_signature.Parse(symbol.name)

80 function_signature.Parse(symbol.name))

81	82

82 # Remove anonymous namespaces (they just harm clustering).	83 # Remove anonymous namespaces (they just harm clustering).

83 symbol.name = symbol.name.replace('(anonymous namespace)::', '')	84 non_anonymous = symbol.name.replace('(anonymous namespace)::', '')

	85 if symbol.name != non_anonymous:

	86 symbol.is_anonymous = True

	87 symbol.name = non_anonymous

	88 symbol.full_name = symbol.full_name.replace(

	89 '(anonymous namespace)::', '')

	90

	91 if symbol.section != 't' and '(' in symbol.name:

	92 # Pretty rare. Example:

	93 # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list

	94 symbol.full_name = symbol.name

	95 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)

84	96

85 logging.debug('Found name prefixes of: %r', found_prefixes)	97 logging.debug('Found name prefixes of: %r', found_prefixes)

86	98

87	99

88 def _NormalizeObjectPaths(symbol_group):	100 def _NormalizeObjectPaths(symbol_group):

89 """Ensures that all paths are formatted in a useful way."""	101 """Ensures that all paths are formatted in a useful way."""

90 for symbol in symbol_group:	102 for symbol in symbol_group:

91 if symbol.path.startswith('obj/'):	103 path = symbol.object_path

	104 if path.startswith('obj/'):

92 # Convert obj/third_party/... -> third_party/...	105 # Convert obj/third_party/... -> third_party/...

93 symbol.path = symbol.path[4:]	106 path = path[4:]

94 elif symbol.path.startswith('../../'):	107 elif path.startswith('../../'):

95 # Convert ../../third_party/... -> third_party/...	108 # Convert ../../third_party/... -> third_party/...

96 symbol.path = symbol.path[6:]	109 path = path[6:]

97 if symbol.path.endswith(')'):	110 if path.endswith(')'):

98 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o	111 # Convert foo/bar.a(baz.o) -> foo/bar.a/(baz.o)

99 start_idx = symbol.path.index('(')	112 start_idx = path.index('(')

100 paren_path = symbol.path[start_idx + 1:-1]	113 path = os.path.join(path[:start_idx], path[start_idx:])

101 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path	114 symbol.object_path = path

	115

	116

	117 def _NormalizeSourcePath(path):

	118 if path.startswith('gen/'):

	119 # Convert gen/third_party/... -> third_party/...

	120 return path[4:]

	121 if path.startswith('../../'):

	122 # Convert ../../third_party/... -> third_party/...

	123 return path[6:]

	124 return path

	125

	126

	127 def _ExtractSourcePaths(symbol_group, output_directory):

	128 """Fills in the .source_path attribute of all symbols."""

	129 mapper = ninja_parser.SourceFileMapper(output_directory)

	130

	131 for symbol in symbol_group:

	132 object_path = symbol.object_path

	133 if symbol.source_path or not object_path:

	134 continue

	135 # We don't have source info for prebuilt .a files.

	136 if not object_path.startswith('..'):

	137 source_path = mapper.FindSourceForPath(object_path)

	138 if source_path:

	139 symbol.source_path = _NormalizeSourcePath(source_path)

	140 else:

	141 logging.warning('Could not find source path for %s', object_path)

	142 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())

102	143

103	144

104 def _RemoveDuplicatesAndCalculatePadding(symbol_group):	145 def _RemoveDuplicatesAndCalculatePadding(symbol_group):

105 """Removes symbols at the same address and calculates the \|padding\| field.	146 """Removes symbols at the same address and calculates the \|padding\| field.

106	147

107 Symbols must already be sorted by \|address\|.	148 Symbols must already be sorted by \|address\|.

108 """	149 """

109 i = 0

110 to_remove = set()	150 to_remove = set()

111 all_symbols = symbol_group.symbols	151 all_symbols = symbol_group.symbols

112 for i in xrange(len(all_symbols)):	152 for i, symbol in enumerate(all_symbols[1:]):

113 prev_symbol = all_symbols[i - 1]	153 prev_symbol = all_symbols[i]

114 symbol = all_symbols[i]

115 if prev_symbol.section_name != symbol.section_name:	154 if prev_symbol.section_name != symbol.section_name:

116 continue	155 continue

117 if symbol.address > 0 and prev_symbol.address > 0:	156 if symbol.address > 0 and prev_symbol.address > 0:

118 # Fold symbols that are at the same address (happens in nm output).	157 # Fold symbols that are at the same address (happens in nm output).

119 if symbol.address == prev_symbol.address:	158 if symbol.address == prev_symbol.address:

120 symbol.size = max(prev_symbol.size, symbol.size)	159 symbol.size = max(prev_symbol.size, symbol.size)

121 to_remove.add(i)	160 to_remove.add(i + 1)

122 continue	161 continue

123 # Even with symbols at the same address removed, overlaps can still	162 # Even with symbols at the same address removed, overlaps can still

124 # happen. In this case, padding will be negative (and this is fine).	163 # happen. In this case, padding will be negative (and this is fine).

125 padding = symbol.address - prev_symbol.end_address	164 padding = symbol.address - prev_symbol.end_address

126 # These thresholds were found by manually auditing arm32 Chrome.	165 # These thresholds were found by manually auditing arm32 Chrome.

127 # E.g.: Set them to 0 and see what warnings get logged.	166 # E.g.: Set them to 0 and see what warnings get logged.

128 # TODO(agrieve): See if these thresholds make sense for architectures	167 # TODO(agrieve): See if these thresholds make sense for architectures

129 # other than arm32.	168 # other than arm32.

130 if (symbol.section in 'rd' and padding >= 256 or	169 if (symbol.section in 'rd' and padding >= 256 or

131 symbol.section in 't' and padding >= 64):	170 symbol.section in 't' and padding >= 64):

132 # For nm data, this is caused by data that has no associated symbol.	171 # For nm data, this is caused by data that has no associated symbol.

133 # The linker map file lists them with no name, but with a file.	172 # The linker map file lists them with no name, but with a file.

134 # Example:	173 # Example:

135 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o	174 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o

136 # Where as most look like:	175 # Where as most look like:

137 # .data.MANGLED_NAME...	176 # .data.MANGLED_NAME...

138 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (	177 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (

139 padding, prev_symbol, symbol))	178 padding, prev_symbol, symbol))

140 continue	179 continue

141 symbol.padding = padding	180 symbol.padding = padding

142 symbol.size += padding	181 symbol.size += padding

143 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol	182 assert symbol.size >= 0, 'Symbol has negative size: ' + (

	183 '%r\nprev symbol: %r' % (symbol, prev_symbol))

144 # Map files have no overlaps, so worth special-casing the no-op case.	184 # Map files have no overlaps, so worth special-casing the no-op case.

145 if to_remove:	185 if to_remove:

146 logging.info('Removing %d overlapping symbols', len(to_remove))	186 logging.info('Removing %d overlapping symbols', len(to_remove))

147 symbol_group.symbols = (	187 symbol_group.symbols = (

148 [s for i, s in enumerate(all_symbols) if i not in to_remove])	188 [s for i, s in enumerate(all_symbols) if i not in to_remove])

149	189

150	190

151 def AddOptions(parser):	191 def AddOptions(parser):

152 parser.add_argument('--tool-prefix', default='',	192 parser.add_argument('--tool-prefix', default='',

153 help='Path prefix for c++filt.')	193 help='Path prefix for c++filt.')

(...skipping 19 matching lines...) Expand all Loading...
173 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)	213 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)

174 logging.debug('Found --tool-prefix from build_vars.txt')	214 logging.debug('Found --tool-prefix from build_vars.txt')

175 tool_prefix = os.path.join(output_directory,	215 tool_prefix = os.path.join(output_directory,

176 build_vars['android_tool_prefix'])	216 build_vars['android_tool_prefix'])

177	217

178 if os.path.sep not in tool_prefix:	218 if os.path.sep not in tool_prefix:

179 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')	219 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')

180 else:	220 else:

181 full_path = tool_prefix + 'c++filt'	221 full_path = tool_prefix + 'c++filt'

182	222

183 if not os.path.isfile(full_path):	223 if not full_path or not os.path.isfile(full_path):

184 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)	224 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)

	225 if not output_directory or not os.path.isdir(output_directory):

	226 raise Exception('Bad --output-directory. Path not found: %s' %

	227 output_directory)

	228 logging.info('Using --output-directory=%s', output_directory)

185 logging.info('Using --tool-prefix=%s', tool_prefix)	229 logging.info('Using --tool-prefix=%s', tool_prefix)

186 return tool_prefix	230 return output_directory, tool_prefix

187	231

188	232

189 def AnalyzeWithArgs(args, input_path):	233 def AnalyzeWithArgs(args, input_path):

190 return Analyze(input_path, args.output_directory, args.tool_prefix)	234 return Analyze(input_path, args.output_directory, args.tool_prefix)

191	235

192	236

193 def Analyze(path, output_directory=None, tool_prefix=''):	237 def Analyze(path, output_directory=None, tool_prefix=''):

194 if file_format.EndsWithMaybeGz(path, '.size'):	238 if path.endswith('.size'):

195 logging.debug('Loading results from: %s', path)	239 logging.debug('Loading results from: %s', path)

196 size_info = file_format.LoadSizeInfo(path)	240 size_info = file_format.LoadSizeInfo(path)

197 # Recompute derived values (padding and function names).	241 # Recompute derived values (padding and function names).

198 logging.info('Calculating padding')	242 logging.info('Calculating padding')

199 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)	243 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

200 logging.info('Deriving signatures')	244 logging.info('Deriving signatures')

201 # Re-parse out function parameters.	245 # Re-parse out function parameters.

202 _NormalizeNames(size_info.symbols.WhereInSection('t'))	246 _NormalizeNames(size_info.symbols)

203 return size_info	247 return size_info

204 elif not file_format.EndsWithMaybeGz(path, '.map'):	248 elif not path.endswith('.map') and not path.endswith('.map.gz'):

205 raise Exception('Expected input to be a .map or a .size')	249 raise Exception('Expected input to be a .map or a .size')

206 else:	250 else:

207 # Verify tool_prefix early.	251 # Verify tool_prefix early.

208 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)	252 output_directory, tool_prefix = (

	253 _DetectToolPrefix(tool_prefix, path, output_directory))

209	254

210 with file_format.OpenMaybeGz(path) as map_file:	255 with _OpenMaybeGz(path) as map_file:

211 size_info = linker_map_parser.MapFileParser().Parse(map_file)	256 size_info = linker_map_parser.MapFileParser().Parse(map_file)

212	257

213 # Map file for some reason doesn't unmangle all names.	258 # Map file for some reason doesn't unmangle all names.

214 logging.info('Calculating padding')	259 logging.info('Calculating padding')

215 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)	260 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

216 # Unmangle prints its own log statement.	261 # Unmangle prints its own log statement.

217 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)	262 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)

	263 logging.info('Extracting source paths from .ninja files')

	264 _ExtractSourcePaths(size_info.symbols, output_directory)

218 # Resolve paths prints its own log statement.	265 # Resolve paths prints its own log statement.

219 logging.info('Normalizing names')	266 logging.info('Normalizing names')

220 _NormalizeNames(size_info.symbols)	267 _NormalizeNames(size_info.symbols)

221 logging.info('Normalizing paths')	268 logging.info('Normalizing paths')

222 _NormalizeObjectPaths(size_info.symbols)	269 _NormalizeObjectPaths(size_info.symbols)

223	270

224 if logging.getLogger().isEnabledFor(logging.INFO):	271 if logging.getLogger().isEnabledFor(logging.INFO):

225 for line in describe.DescribeSizeInfoCoverage(size_info):	272 for line in describe.DescribeSizeInfoCoverage(size_info):

226 logging.info(line)	273 logging.info(line)

227 logging.info('Finished analyzing %d symbols', len(size_info.symbols))	274 logging.info('Finished analyzing %d symbols', len(size_info.symbols))

228 return size_info	275 return size_info

229	276

230	277

231 def main(argv):	278 def main(argv):

232 parser = argparse.ArgumentParser(argv)	279 parser = argparse.ArgumentParser(argv)

233 parser.add_argument('input_file', help='Path to input .map file.')	280 parser.add_argument('input_file', help='Path to input .map file.')

234 parser.add_argument('output_file', help='Path to output .size(.gz) file.')	281 parser.add_argument('output_file', help='Path to output .size(.gz) file.')

235 AddOptions(parser)	282 AddOptions(parser)

236 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)	283 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)

237 if not file_format.EndsWithMaybeGz(args.output_file, '.size'):	284 if not args.output_file.endswith('.size'):

238 parser.error('output_file must end with .size or .size.gz')	285 parser.error('output_file must end with .size')

239	286

240 size_info = AnalyzeWithArgs(args, args.input_file)	287 size_info = AnalyzeWithArgs(args, args.input_file)

241 logging.info('Saving result to %s', args.output_file)	288 logging.info('Saving result to %s', args.output_file)

242 file_format.SaveSizeInfo(size_info, args.output_file)	289 file_format.SaveSizeInfo(size_info, args.output_file)

243	290

244 logging.info('Done')	291 logging.info('Done')

245	292

246	293

247 if __name__ == '__main__':	294 if __name__ == '__main__':

248 sys.exit(main(sys.argv))	295 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/models.py » ('j') | no next file with comments »