tools/binary_size/map2size.py - Issue 2778963003: Revert of V2 of //tools/binary_size rewrite (diffs).

Side by Side Diff: tools/binary_size/map2size.py

Issue 2778963003: Revert of V2 of //tools/binary_size rewrite (diffs). (Closed)

Patch Set: Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # Copyright 2017 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 """Main Python API for analyzing binary size."""

7

8 import argparse

9 import distutils.spawn

10 import logging

11 import os

12 import subprocess

13 import sys

14

15 import describe

16 import file_format

17 import function_signature

18 import helpers

19 import linker_map_parser

20 import models

21

22

23 def _IterLines(s):

24 prev_idx = -1

25 while True:

26 idx = s.find('\n', prev_idx + 1)

27 if idx == -1:

28 return

29 yield s[prev_idx + 1:idx]

30 prev_idx = idx

31

32

33 def _UnmangleRemainingSymbols(symbol_group, tool_prefix):

34 """Uses c++filt to unmangle any symbols that need it."""

35 to_process = [s for s in symbol_group if s.name.startswith('_Z')]

36 if not to_process:

37 return

38

39 logging.info('Unmangling %d names', len(to_process))

40 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

41 stdout=subprocess.PIPE)

42 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

43 assert proc.returncode == 0

44

45 for i, line in enumerate(_IterLines(stdout)):

46 to_process[i].name = line

47

48

49 def _NormalizeNames(symbol_group):

50 """Ensures that all names are formatted in a useful way.

51

52 This includes:

53 - Assigning of \|function_signature\| (for functions).

54 - Stripping of return types in \|function_signature\| and \|name\|.

55 - Stripping parameters from \|name\|.

56 - Moving "vtable for" and the like to be suffixes rather than prefixes.

57 """

58 found_prefixes = set()

59 for symbol in symbol_group:

60 if symbol.name.startswith('*'):

61 # See comment in _RemoveDuplicatesAndCalculatePadding() about when this

62 # can happen.

63 continue

64

65 # E.g.: vtable for FOO

66 idx = symbol.name.find(' for ', 0, 30)

67 if idx != -1:

68 found_prefixes.add(symbol.name[:idx + 4])

69 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

70

71 # E.g.: virtual thunk to FOO

72 idx = symbol.name.find(' to ', 0, 30)

73 if idx != -1:

74 found_prefixes.add(symbol.name[:idx + 3])

75 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'

76

77 # Strip out return type, and identify where parameter list starts.

78 if symbol.section == 't':

79 symbol.function_signature, symbol.name = (

80 function_signature.Parse(symbol.name))

81

82 # Remove anonymous namespaces (they just harm clustering).

83 symbol.name = symbol.name.replace('(anonymous namespace)::', '')

84

85 logging.debug('Found name prefixes of: %r', found_prefixes)

86

87

88 def _NormalizeObjectPaths(symbol_group):

89 """Ensures that all paths are formatted in a useful way."""

90 for symbol in symbol_group:

91 if symbol.path.startswith('obj/'):

92 # Convert obj/third_party/... -> third_party/...

93 symbol.path = symbol.path[4:]

94 elif symbol.path.startswith('../../'):

95 # Convert ../../third_party/... -> third_party/...

96 symbol.path = symbol.path[6:]

97 if symbol.path.endswith(')'):

98 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o

99 start_idx = symbol.path.index('(')

100 paren_path = symbol.path[start_idx + 1:-1]

101 symbol.path = symbol.path[:start_idx] + os.path.sep + paren_path

102

103

104 def _RemoveDuplicatesAndCalculatePadding(symbol_group):

105 """Removes symbols at the same address and calculates the \|padding\| field.

106

107 Symbols must already be sorted by \|address\|.

108 """

109 i = 0

110 to_remove = set()

111 all_symbols = symbol_group.symbols

112 for i in xrange(len(all_symbols)):

113 prev_symbol = all_symbols[i - 1]

114 symbol = all_symbols[i]

115 if prev_symbol.section_name != symbol.section_name:

116 continue

117 if symbol.address > 0 and prev_symbol.address > 0:

118 # Fold symbols that are at the same address (happens in nm output).

119 if symbol.address == prev_symbol.address:

120 symbol.size = max(prev_symbol.size, symbol.size)

121 to_remove.add(i)

122 continue

123 # Even with symbols at the same address removed, overlaps can still

124 # happen. In this case, padding will be negative (and this is fine).

125 padding = symbol.address - prev_symbol.end_address

126 # These thresholds were found by manually auditing arm32 Chrome.

127 # E.g.: Set them to 0 and see what warnings get logged.

128 # TODO(agrieve): See if these thresholds make sense for architectures

129 # other than arm32.

130 if (symbol.section in 'rd' and padding >= 256 or

131 symbol.section in 't' and padding >= 64):

132 # For nm data, this is caused by data that has no associated symbol.

133 # The linker map file lists them with no name, but with a file.

134 # Example:

135 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o

136 # Where as most look like:

137 # .data.MANGLED_NAME...

138 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (

139 padding, prev_symbol, symbol))

140 continue

141 symbol.padding = padding

142 symbol.size += padding

143 assert symbol.size >= 0, 'Symbol has negative size: %r' % symbol

144 # Map files have no overlaps, so worth special-casing the no-op case.

145 if to_remove:

146 logging.info('Removing %d overlapping symbols', len(to_remove))

147 symbol_group.symbols = (

148 [s for i, s in enumerate(all_symbols) if i not in to_remove])

149

150

151 def AddOptions(parser):

152 parser.add_argument('--tool-prefix', default='',

153 help='Path prefix for c++filt.')

154 parser.add_argument('--output-directory',

155 help='Path to the root build directory.')

156

157

158 def _DetectToolPrefix(tool_prefix, input_file, output_directory=None):

159 """Calls Analyze with values from args."""

160 if not output_directory:

161 abs_path = os.path.abspath(input_file)

162 release_idx = abs_path.find('Release')

163 if release_idx != -1:

164 output_directory = abs_path[:release_idx] + 'Release'

165 output_directory = os.path.relpath(abs_path[:release_idx] + '/Release')

166 logging.debug('Detected --output-directory=%s', output_directory)

167

168 if not tool_prefix and output_directory:

169 # Auto-detect from build_vars.txt

170 build_vars_path = os.path.join(output_directory, 'build_vars.txt')

171 if os.path.exists(build_vars_path):

172 with open(build_vars_path) as f:

173 build_vars = dict(l.rstrip().split('=', 1) for l in f if '=' in l)

174 logging.debug('Found --tool-prefix from build_vars.txt')

175 tool_prefix = os.path.join(output_directory,

176 build_vars['android_tool_prefix'])

177

178 if os.path.sep not in tool_prefix:

179 full_path = distutils.spawn.find_executable(tool_prefix + 'c++filt')

180 else:

181 full_path = tool_prefix + 'c++filt'

182

183 if not os.path.isfile(full_path):

184 raise Exception('Bad --tool-prefix. Path not found: %s' % full_path)

185 logging.info('Using --tool-prefix=%s', tool_prefix)

186 return tool_prefix

187

188

189 def AnalyzeWithArgs(args, input_path):

190 return Analyze(input_path, args.output_directory, args.tool_prefix)

191

192

193 def Analyze(path, output_directory=None, tool_prefix=''):

194 if file_format.EndsWithMaybeGz(path, '.size'):

195 logging.debug('Loading results from: %s', path)

196 size_info = file_format.LoadSizeInfo(path)

197 # Recompute derived values (padding and function names).

198 logging.info('Calculating padding')

199 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

200 logging.info('Deriving signatures')

201 # Re-parse out function parameters.

202 _NormalizeNames(size_info.symbols.WhereInSection('t'))

203 return size_info

204 elif not file_format.EndsWithMaybeGz(path, '.map'):

205 raise Exception('Expected input to be a .map or a .size')

206 else:

207 # Verify tool_prefix early.

208 tool_prefix = _DetectToolPrefix(tool_prefix, path, output_directory)

209

210 with file_format.OpenMaybeGz(path) as map_file:

211 size_info = linker_map_parser.MapFileParser().Parse(map_file)

212

213 # Map file for some reason doesn't unmangle all names.

214 logging.info('Calculating padding')

215 _RemoveDuplicatesAndCalculatePadding(size_info.symbols)

216 # Unmangle prints its own log statement.

217 _UnmangleRemainingSymbols(size_info.symbols, tool_prefix)

218 # Resolve paths prints its own log statement.

219 logging.info('Normalizing names')

220 _NormalizeNames(size_info.symbols)

221 logging.info('Normalizing paths')

222 _NormalizeObjectPaths(size_info.symbols)

223

224 if logging.getLogger().isEnabledFor(logging.INFO):

225 for line in describe.DescribeSizeInfoCoverage(size_info):

226 logging.info(line)

227 logging.info('Finished analyzing %d symbols', len(size_info.symbols))

228 return size_info

229

230

231 def main(argv):

232 parser = argparse.ArgumentParser(argv)

233 parser.add_argument('input_file', help='Path to input .map file.')

234 parser.add_argument('output_file', help='Path to output .size(.gz) file.')

235 AddOptions(parser)

236 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)

237 if not file_format.EndsWithMaybeGz(args.output_file, '.size'):

238 parser.error('output_file must end with .size or .size.gz')

239

240 size_info = AnalyzeWithArgs(args, args.input_file)

241 logging.info('Saving result to %s', args.output_file)

242 file_format.SaveSizeInfo(size_info, args.output_file)

243

244 logging.info('Done')

245

246

247 if __name__ == '__main__':

248 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/mapfileparser.py » ('j') | no next file with comments »