tools/binary_size/map2size.py - Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes

Side by Side Diff: tools/binary_size/map2size.py

Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes (Closed)

Patch Set: fix comment for _DetectToolPrefix Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright 2017 The Chromium Authors. All rights reserved.	2 # Copyright 2017 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Main Python API for analyzing binary size."""	6 """Main Python API for analyzing binary size."""

7	7

8 import argparse	8 import argparse

	9 import datetime

9 import distutils.spawn	10 import distutils.spawn

	11 import gzip

10 import logging	12 import logging

11 import os	13 import os

	14 import re

12 import subprocess	15 import subprocess

13 import sys	16 import sys

14	17

15 import describe	18 import describe

16 import file_format	19 import file_format

17 import function_signature	20 import function_signature

18 import helpers	21 import helpers

19 import linker_map_parser	22 import linker_map_parser

20 import models	23 import models

	24 import ninja_parser

21	25

22	26

23 def _IterLines(s):	27 def _OpenMaybeGz(path, mode=None):

24 prev_idx = -1	28 """Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""

25 while True:	29 if path.endswith('.gz'):

26 idx = s.find('\n', prev_idx + 1)	30 if mode and 'w' in mode:

27 if idx == -1:	31 return gzip.GzipFile(path, mode, 1)

28 return	32 return gzip.open(path, mode)

29 yield s[prev_idx + 1:idx]	33 return open(path, mode or 'r')

30 prev_idx = idx

31	34

32	35

33 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):	36 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):

34 """Uses c++filt to unmangle any symbols that need it."""	37 """Uses c++filt to unmangle any symbols that need it."""

35 to_process = [s for s in symbol_group if s.name.startswith('_Z')]	38 to_process = [s for s in symbol_group if s.name.startswith('_Z')]

36 if not to_process:	39 if not to_process:

37 return	40 return

38	41

39 logging.info('Unmangling %d names', len(to_process))	42 logging.info('Unmangling %d names', len(to_process))

40 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,	43 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

41 stdout=subprocess.PIPE)	44 stdout=subprocess.PIPE)

42 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]	45 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

43 assert proc.returncode == 0	46 assert proc.returncode == 0

44	47

45 for i, line in enumerate(_IterLines(stdout)):	48 for i, line in enumerate(stdout.splitlines()):

46 to_process[i].name = line	49 to_process[i].name = line

47	50

48	51

49 def _NormalizeNames(symbol_group):	52 def _NormalizeNames(symbol_group):

50 """Ensures that all names are formatted in a useful way.	53 """Ensures that all names are formatted in a useful way.

51	54

52 This includes:	55 This includes:

53 - Assigning of \|function_signature\| (for functions).	56 - Assigning of \|full_name\|.

54 - Stripping of return types in \|function_signature\| and \|name\|.	57 - Stripping of return types in \|full_name\| and \|name\| (for functions).

55 - Stripping parameters from \|name\|.	58 - Stripping parameters from \|name\|.

56 - Moving "vtable for" and the like to be suffixes rather than prefixes.	59 - Moving "vtable for" and the like to be suffixes rather than prefixes.

57 """	60 """

58 found_prefixes = set()	61 found_prefixes = set()

59 for symbol in symbol_group:	62 for symbol in symbol_group:

60 if symbol.name.startswith('*'):	63 if symbol.name.startswith('*'):

61 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this	64 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this

62 # can happen.	65 # can happen.

63 continue	66 continue

64	67

65 # E.g.: vtable for FOO	68 # E.g.: vtable for FOO

66 idx = symbol.name.find(' for ', 0, 30)	69 idx = symbol.name.find(' for ', 0, 30)

67 if idx != -1:	70 if idx != -1:

68 found_prefixes.add(symbol.name[:idx + 4])	71 found_prefixes.add(symbol.name[:idx + 4])

69 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'	72 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

70	73

71 # E.g.: virtual thunk to FOO	74 # E.g.: virtual thunk to FOO

72 idx = symbol.name.find(' to ', 0, 30)	75 idx = symbol.name.find(' to ', 0, 30)

73 if idx != -1:	76 if idx != -1:

74 found_prefixes.add(symbol.name[:idx + 3])	77 found_prefixes.add(symbol.name[:idx + 3])

75 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'	78 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'

76	79

77 # Strip out return type, and identify where parameter list starts.	80 # Strip out return type, and identify where parameter list starts.

78 if symbol.section == 't':	81 if symbol.section == 't':

79 symbol.function_signature, symbol.name = (	82 symbol.full_name, symbol.name = function_signature.Parse(symbol.name)

80 function_signature.Parse(symbol.name))

81	83

82 # Remove anonymous namespaces (they just harm clustering).	84 # Remove anonymous namespaces (they just harm clustering).

83 symbol.name = symbol.name.replace('(anonymous namespace)::', '')	85 non_anonymous = symbol.name.replace('(anonymous namespace)::', '')

	86 if symbol.name != non_anonymous:

	87 symbol.is_anonymous = True

	88 symbol.name = non_anonymous

	89 symbol.full_name = symbol.full_name.replace(

	90 '(anonymous namespace)::', '')

	91

	92 if symbol.section != 't' and '(' in symbol.name:

	93 # Pretty rare. Example:

	94 # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list

	95 symbol.full_name = symbol.name

	96 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)

84	97

85 logging.debug('Found name prefixes of: %r', found_prefixes)	98 logging.debug('Found name prefixes of: %r', found_prefixes)

86	99

87	100

88 def _NormalizeObjectPaths(symbol_group):	101 def _NormalizeObjectPaths(symbol_group):

89 """Ensures that all paths are formatted in a useful way."""	102 """Ensures that all paths are formatted in a useful way."""

90 for symbol in symbol_group:	103 for symbol in symbol_group:

91 if symbol.path.startswith('obj/'):	104 path = symbol.object_path

	105 if path.startswith('obj/'):

92 # Convert obj/third_party/... -> third_party/...	106 # Convert obj/third_party/... -> third_party/...

93 symbol.path = symbol.path[4:]	107 path = path[4:]

94 elif symbol.path.startswith('../../'):	108 elif path.startswith('../../'):

95 # Convert ../../third_party/... -> third_party/...	109 # Convert ../../third_party/... -> third_party/...

96 symbol.path = symbol.path[6:]	110 path = path[6:]

97 if symbol.path.endswith(')'):	111 if path.endswith(')'):

98 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o	112 # Convert foo/bar.a(baz.o) -> foo/bar.a/(baz.o)

99 start_idx = symbol.path.index('(')	113 start_idx = path.index('(')

100 paren_path = symbol.path[start_idx + 1:-1]	114 path = os.path.join(path[:start_idx], path[start_idx:])

101 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path	115 symbol.object_path = path

	116

	117

	118 def _NormalizeSourcePath(path):

	119 if path.startswith('gen/'):

	120 # Convert gen/third_party/... -> third_party/...

	121 return path[4:]

	122 if path.startswith('../../'):

	123 # Convert ../../third_party/... -> third_party/...

	124 return path[6:]

	125 return path

	126

	127

	128 def _ExtractSourcePaths(symbol_group, output_directory):

	129 """Fills in the .source_path attribute of all symbols."""

	130 mapper = ninja_parser.SourceFileMapper(output_directory)

	131

	132 for symbol in symbol_group:

	133 object_path = symbol.object_path

	134 if symbol.source_path or not object_path:

	135 continue

	136 # We don't have source info for prebuilt .a files.

	137 if not object_path.startswith('..'):

	138 source_path = mapper.FindSourceForPath(object_path)

	139 if source_path:

	140 symbol.source_path = _NormalizeSourcePath(source_path)

	141 else:

	142 logging.warning('Could not find source path for %s', object_path)

	143 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())

102	144

103	145

104 def _RemoveDuplicatesAndCalculatePadding(symbol_group):	146 def _RemoveDuplicatesAndCalculatePadding(symbol_group):

105 """Removes symbols at the same address and calculates the \|padding\| field.	147 """Removes symbols at the same address and calculates the \|padding\| field.

106	148

107 Symbols must already be sorted by \|address\|.	149 Symbols must already be sorted by \|address\|.

108 """	150 """

109 i = 0

110 to_remove = set()	151 to_remove = set()

111 all_symbols = symbol_group.symbols	152 all_symbols = symbol_group.symbols

112 for i in xrange(len(all_symbols)):	153 for i, symbol in enumerate(all_symbols[1:]):

113 prev_symbol = all_symbols[i - 1]	154 prev_symbol = all_symbols[i]

114 symbol = all_symbols[i]

115 if prev_symbol.section_name != symbol.section_name:	155 if prev_symbol.section_name != symbol.section_name:

116 continue	156 continue

117 if symbol.address > 0 and prev_symbol.address > 0:	157 if symbol.address > 0 and prev_symbol.address > 0:

118 # Fold symbols that are at the same address (happens in nm output).	158 # Fold symbols that are at the same address (happens in nm output).

119 if symbol.address == prev_symbol.address:	159 if symbol.address == prev_symbol.address:

120 symbol.size = max(prev_symbol.size, symbol.size)	160 symbol.size = max(prev_symbol.size, symbol.size)

121 to_remove.add(i)	161 to_remove.add(i + 1)

122 continue	162 continue

123 # Even with symbols at the same address removed, overlaps can still	163 # Even with symbols at the same address removed, overlaps can still

124 # happen. In this case, padding will be negative (and this is fine).	164 # happen. In this case, padding will be negative (and this is fine).

125 padding = symbol.address - prev_symbol.end_address	165 padding = symbol.address - prev_symbol.end_address

126 # These thresholds were found by manually auditing arm32 Chrome.	166 # These thresholds were found by manually auditing arm32 Chrome.

127 # E.g.: Set them to 0 and see what warnings get logged.	167 # E.g.: Set them to 0 and see what warnings get logged.

128 # TODO(agrieve): See if these thresholds make sense for architectures	168 # TODO(agrieve): See if these thresholds make sense for architectures

129 # other than arm32.	169 # other than arm32.

130 if (symbol.section in 'rd' and padding >= 256 or	170 if (symbol.section in 'rd' and padding >= 256 or

131 symbol.section in 't' and padding >= 64):	171 symbol.section in 't' and padding >= 64):

132 # For nm data, this is caused by data that has no associated symbol.	172 # For nm data, this is caused by data that has no associated symbol.

133 # The linker map file lists them with no name, but with a file.	173 # The linker map file lists them with no name, but with a file.

134 # Example:	174 # Example:

135 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o	175 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o

136 # Where as most look like:	176 # Where as most look like:

137 # .data.MANGLED_NAME...	177 # .data.MANGLED_NAME...

138 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (	178 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (

139 padding, prev_symbol, symbol))	179 padding, prev_symbol, symbol))

140 continue	180 continue

141 symbol.padding = padding	181 symbol.padding = padding

142 symbol.size += padding	182 symbol.size += padding

143 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol	183 assert symbol.size >= 0, 'Symbol has negative size: ' + (

	184 '%r\nprev symbol: %r' % (symbol, prev_symbol))

144 # Map files have no overlaps, so worth special-casing the no-op case.	185 # Map files have no overlaps, so worth special-casing the no-op case.

145 if to_remove:	186 if to_remove:

146 logging.info('Removing %d overlapping symbols', len(to_remove))	187 logging.info('Removing %d overlapping symbols', len(to_remove))

147 symbol_group.symbols = (	188 symbol_group.symbols = (

148 [s for i, s in enumerate(all_symbols) if i not in to_remove])	189 [s for i, s in enumerate(all_symbols) if i not in to_remove])

149	190

150	191

151 def AddOptions(parser):	192 def AddOptions(parser):

152 parser.add_argument('--tool-prefix', default='',	193 parser.add_argument('--tool-prefix', default='',

153 help='Path prefix for c++filt.')	194 help='Path prefix for c++filt.')

154 parser.add_argument('--output-directory',	195 parser.add_argument('--output-directory',

155 help='Path to the root build directory.')	196 help='Path to the root build directory.')

156	197

157	198

158 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):	199 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):

159 """Calls Analyze with values from args."""	200 """Detects values for --tool-prefix and --output-directory."""

160 if not output_directory:	201 if not output_directory:

161 abs_path = os.path.abspath(input_file)	202 abs_path = os.path.abspath(input_file)

162 release_idx = abs_path.find('Release')	203 release_idx = abs_path.find('Release')

163 if release_idx != -1:	204 if release_idx != -1:

164 output_directory = abs_path[:release_idx] + 'Release'	205 output_directory = abs_path[:release_idx] + 'Release'

165 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')	206 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')

166 logging.debug('Detected --output-directory=%s', output_directory)	207 logging.debug('Detected --output-directory=%s', output_directory)

167	208

168 if not tool_prefix and output_directory:	209 if not tool_prefix and output_directory:

169 # Auto-detect from build_vars.txt	210 # Auto-detect from build_vars.txt

170 build_vars_path = os.path.join(output_directory, 'build_vars.txt')	211 build_vars_path = os.path.join(output_directory, 'build_vars.txt')

171 if os.path.exists(build_vars_path):	212 if os.path.exists(build_vars_path):

172 with open(build_vars_path) as f:	213 with open(build_vars_path) as f:

173 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)	214 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)

174 logging.debug('Found --tool-prefix from build_vars.txt')	215 logging.debug('Found --tool-prefix from build_vars.txt')

175 tool_prefix = os.path.join(output_directory,	216 tool_prefix = os.path.join(output_directory,

176 build_vars['android_tool_prefix'])	217 build_vars['android_tool_prefix'])

177	218

178 if os.path.sep not in tool_prefix:	219 if os.path.sep not in tool_prefix:

179 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')	220 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')

180 else:	221 else:

181 full_path = tool_prefix + 'c++filt'	222 full_path = tool_prefix + 'c++filt'

182	223

183 if not os.path.isfile(full_path):	224 if not full_path or not os.path.isfile(full_path):

184 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)	225 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)

	226 if not output_directory or not os.path.isdir(output_directory):

	227 raise Exception('Bad --output-directory. Path not found: %s' %

	228 output_directory)

	229 logging.info('Using --output-directory=%s', output_directory)

185 logging.info('Using --tool-prefix=%s', tool_prefix)	230 logging.info('Using --tool-prefix=%s', tool_prefix)

186 return tool_prefix	231 return output_directory, tool_prefix

187	232

188	233

189 def AnalyzeWithArgs(args, input_path):	234 def AnalyzeWithArgs(args, input_path):

190 return Analyze(input_path, args.output_directory, args.tool_prefix)	235 return Analyze(input_path, args.output_directory, args.tool_prefix)

191	236

192	237

193 def Analyze(path, output_directory=None, tool_prefix=''):	238 def Analyze(path, output_directory=None, tool_prefix=''):

194 if file_format.EndsWithMaybeGz(path, '.size'):	239 if path.endswith('.size'):

195 logging.debug('Loading results from: %s', path)	240 logging.debug('Loading results from: %s', path)

196 size_info = file_format.LoadSizeInfo(path)	241 size_info = file_format.LoadSizeInfo(path)

197 # Recompute derived values (padding and function names).	242 # Recompute derived values (padding and function names).

198 logging.info('Calculating padding')	243 logging.info('Calculating padding')

199 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)	244 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

200 logging.info('Deriving signatures')	245 logging.info('Deriving signatures')

201 # Re-parse out function parameters.	246 # Re-parse out function parameters.

202 _NormalizeNames(size_info.symbols.WhereInSection('t'))	247 _NormalizeNames(size_info.symbols)

203 return size_info	248 return size_info

204 elif not file_format.EndsWithMaybeGz(path, '.map'):	249 elif not path.endswith('.map') and not path.endswith('.map.gz'):

205 raise Exception('Expected input to be a .map or a .size')	250 raise Exception('Expected input to be a .map or a .size')

206 else:	251 else:

207 # Verify tool_prefix early.	252 # Verify tool_prefix early.

208 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)	253 output_directory, tool_prefix = (

	254 _DetectToolPrefix(tool_prefix, path, output_directory))

209	255

210 with file_format.OpenMaybeGz(path) as map_file:	256 with _OpenMaybeGz(path) as map_file:

211 size_info = linker_map_parser.MapFileParser().Parse(map_file)	257 section_sizes, symbols = linker_map_parser.MapFileParser().Parse(map_file)

	258 timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path))

	259 size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols),

	260 timestamp=timestamp)

212	261

213 # Map file for some reason doesn't unmangle all names.	262 # Map file for some reason doesn't unmangle all names.

214 logging.info('Calculating padding')	263 logging.info('Calculating padding')

215 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)	264 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

216 # Unmangle prints its own log statement.	265 # Unmangle prints its own log statement.

217 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)	266 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)

	267 logging.info('Extracting source paths from .ninja files')

	268 _ExtractSourcePaths(size_info.symbols, output_directory)

218 # Resolve paths prints its own log statement.	269 # Resolve paths prints its own log statement.

219 logging.info('Normalizing names')	270 logging.info('Normalizing names')

220 _NormalizeNames(size_info.symbols)	271 _NormalizeNames(size_info.symbols)

221 logging.info('Normalizing paths')	272 logging.info('Normalizing paths')

222 _NormalizeObjectPaths(size_info.symbols)	273 _NormalizeObjectPaths(size_info.symbols)

223	274

224 if logging.getLogger().isEnabledFor(logging.INFO):	275 if logging.getLogger().isEnabledFor(logging.INFO):

225 for line in describe.DescribeSizeInfoCoverage(size_info):	276 for line in describe.DescribeSizeInfoCoverage(size_info):

226 logging.info(line)	277 logging.info(line)

227 logging.info('Finished analyzing %d symbols', len(size_info.symbols))	278 logging.info('Finished analyzing %d symbols', len(size_info.symbols))

228 return size_info	279 return size_info

229	280

230	281

	282 def _DetectGitRevision(path):

	283 try:

	284 git_rev = subprocess.check_output(

	285 ['git', '-C', os.path.dirname(path), 'rev-parse', 'HEAD'])

	286 return git_rev.rstrip()

	287 except Exception:

	288 logging.warning('Failed to detect git revision for file metadata.')

	289 return None

	290

	291

231 def main(argv):	292 def main(argv):

232 parser = argparse.ArgumentParser(argv)	293 parser = argparse.ArgumentParser(argv)

233 parser.add_argument('input_file', help='Path to input .map file.')	294 parser.add_argument('input_file', help='Path to input .map file.')

234 parser.add_argument('output_file', help='Path to output .size(.gz) file.')	295 parser.add_argument('output_file', help='Path to output .size(.gz) file.')

235 AddOptions(parser)	296 AddOptions(parser)

236 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)	297 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)

237 if not file_format.EndsWithMaybeGz(args.output_file, '.size'):	298 if not args.output_file.endswith('.size'):

238 parser.error('output_file must end with .size or .size.gz')	299 parser.error('output_file must end with .size')

239	300

240 size_info = AnalyzeWithArgs(args, args.input_file)	301 size_info = AnalyzeWithArgs(args, args.input_file)

	302 if not args.input_file.endswith('.size'):

	303 git_rev = _DetectGitRevision(args.input_file)

	304 size_info.tag = 'Filename=%s git_rev=%s' % (

	305 os.path.basename(args.input_file), git_rev)

	306 logging.info('Recording metadata: %s',

	307 describe.DescribeSizeInfoMetadata(size_info))

241 logging.info('Saving result to %s', args.output_file)	308 logging.info('Saving result to %s', args.output_file)

242 file_format.SaveSizeInfo(size_info, args.output_file)	309 file_format.SaveSizeInfo(size_info, args.output_file)

243	310

244 logging.info('Done')	311 logging.info('Done')

245	312

246	313

247 if __name__ == '__main__':	314 if __name__ == '__main__':

248 sys.exit(main(sys.argv))	315 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/models.py » ('j') | no next file with comments »