tools/binary_size/map2size.py - Issue 2813963002: //tools/binary_size: Consolidate most tools into "supersize" command

Side by Side Diff: tools/binary_size/map2size.py

Issue 2813963002: //tools/binary_size: Consolidate most tools into "supersize" command (Closed)

Patch Set: Fix readme formatting. Make archive's --outoput-file a positional arg Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 #!/usr/bin/env python

2 # Copyright 2017 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.

5

6 """Main Python API for analyzing binary size."""

7

8 import argparse

9 import calendar

10 import collections

11 import datetime

12 import gzip

13 import logging

14 import os

15 import re

16 import subprocess

17 import sys

18

19 import describe

20 import file_format

21 import function_signature

22 import helpers

23 import linker_map_parser

24 import models

25 import ninja_parser

26 import paths

27

28

29 def _OpenMaybeGz(path, mode=None):

30 """Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""

31 if path.endswith('.gz'):

32 if mode and 'w' in mode:

33 return gzip.GzipFile(path, mode, 1)

34 return gzip.open(path, mode)

35 return open(path, mode or 'r')

36

37

38 def _UnmangleRemainingSymbols(symbols, tool_prefix):

39 """Uses c++filt to unmangle any symbols that need it."""

40 to_process = [s for s in symbols if s.name.startswith('_Z')]

41 if not to_process:

42 return

43

44 logging.info('Unmangling %d names', len(to_process))

45 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

46 stdout=subprocess.PIPE)

47 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

48 assert proc.returncode == 0

49

50 for i, line in enumerate(stdout.splitlines()):

51 to_process[i].name = line

52

53

54 def _NormalizeNames(symbols):

55 """Ensures that all names are formatted in a useful way.

56

57 This includes:

58 - Assigning of \|full_name\|.

59 - Stripping of return types in \|full_name\| and \|name\| (for functions).

60 - Stripping parameters from \|name\|.

61 - Moving "vtable for" and the like to be suffixes rather than prefixes.

62 """

63 found_prefixes = set()

64 for symbol in symbols:

65 if symbol.name.startswith('*'):

66 # See comment in _CalculatePadding() about when this

67 # can happen.

68 continue

69

70 # E.g.: vtable for FOO

71 idx = symbol.name.find(' for ', 0, 30)

72 if idx != -1:

73 found_prefixes.add(symbol.name[:idx + 4])

74 symbol.name = symbol.name[idx + 5:] + ' [' + symbol.name[:idx] + ']'

75

76 # E.g.: virtual thunk to FOO

77 idx = symbol.name.find(' to ', 0, 30)

78 if idx != -1:

79 found_prefixes.add(symbol.name[:idx + 3])

80 symbol.name = symbol.name[idx + 4:] + ' [' + symbol.name[:idx] + ']'

81

82 # Strip out return type, and identify where parameter list starts.

83 if symbol.section == 't':

84 symbol.full_name, symbol.name = function_signature.Parse(symbol.name)

85

86 # Remove anonymous namespaces (they just harm clustering).

87 non_anonymous = symbol.name.replace('(anonymous namespace)::', '')

88 if symbol.name != non_anonymous:

89 symbol.is_anonymous = True

90 symbol.name = non_anonymous

91 symbol.full_name = symbol.full_name.replace(

92 '(anonymous namespace)::', '')

93

94 if symbol.section != 't' and '(' in symbol.name:

95 # Pretty rare. Example:

96 # blink::CSSValueKeywordsHash::findValueImpl(char const*)::value_word_list

97 symbol.full_name = symbol.name

98 symbol.name = re.sub(r'$.*$', '', symbol.full_name)

99

100 # Don't bother storing both if they are the same.

101 if symbol.full_name == symbol.name:

102 symbol.full_name = ''

103

104 logging.debug('Found name prefixes of: %r', found_prefixes)

105

106

107 def _NormalizeObjectPaths(symbols):

108 """Ensures that all paths are formatted in a useful way."""

109 for symbol in symbols:

110 path = symbol.object_path

111 if path.startswith('obj/'):

112 # Convert obj/third_party/... -> third_party/...

113 path = path[4:]

114 elif path.startswith('../../'):

115 # Convert ../../third_party/... -> third_party/...

116 path = path[6:]

117 if path.endswith(')'):

118 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o

119 start_idx = path.index('(')

120 path = os.path.join(path[:start_idx], path[start_idx + 1:-1])

121 symbol.object_path = path

122

123

124 def _NormalizeSourcePath(path):

125 if path.startswith('gen/'):

126 # Convert gen/third_party/... -> third_party/...

127 return path[4:]

128 if path.startswith('../../'):

129 # Convert ../../third_party/... -> third_party/...

130 return path[6:]

131 return path

132

133

134 def _ExtractSourcePaths(symbols, output_directory):

135 """Fills in the .source_path attribute of all symbols.

136

137 Returns True if source paths were found.

138 """

139 all_found = True

140 mapper = ninja_parser.SourceFileMapper(output_directory)

141

142 for symbol in symbols:

143 object_path = symbol.object_path

144 if symbol.source_path or not object_path:

145 continue

146 # We don't have source info for prebuilt .a files.

147 if not object_path.startswith('..'):

148 source_path = mapper.FindSourceForPath(object_path)

149 if source_path:

150 symbol.source_path = _NormalizeSourcePath(source_path)

151 else:

152 all_found = False

153 logging.warning('Could not find source path for %s', object_path)

154 logging.debug('Parsed %d .ninja files.', mapper.GetParsedFileCount())

155 return all_found

156

157

158 def _CalculatePadding(symbols):

159 """Populates the \|padding\| field based on symbol addresses.

160

161 Symbols must already be sorted by \|address\|.

162 """

163 seen_sections = []

164 for i, symbol in enumerate(symbols[1:]):

165 prev_symbol = symbols[i]

166 if prev_symbol.section_name != symbol.section_name:

167 assert symbol.section_name not in seen_sections, (

168 'Input symbols must be sorted by section, then address.')

169 seen_sections.append(symbol.section_name)

170 continue

171 if symbol.address <= 0 or prev_symbol.address <= 0:

172 continue

173 # Padding-only symbols happen for ** symbol gaps.

174 prev_is_padding_only = prev_symbol.size_without_padding == 0

175 if symbol.address == prev_symbol.address and not prev_is_padding_only:

176 assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)

177 # Even with symbols at the same address removed, overlaps can still

178 # happen. In this case, padding will be negative (and this is fine).

179 padding = symbol.address - prev_symbol.end_address

180 # These thresholds were found by manually auditing arm32 Chrome.

181 # E.g.: Set them to 0 and see what warnings get logged.

182 # TODO(agrieve): See if these thresholds make sense for architectures

183 # other than arm32.

184 if not symbol.name.startswith('*') and (

185 symbol.section in 'rd' and padding >= 256 or

186 symbol.section in 't' and padding >= 64):

187 # For nm data, this is caused by data that has no associated symbol.

188 # The linker map file lists them with no name, but with a file.

189 # Example:

190 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o

191 # Where as most look like:

192 # .data.MANGLED_NAME...

193 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (

194 padding, prev_symbol, symbol))

195 continue

196 symbol.padding = padding

197 symbol.size += padding

198 assert symbol.size >= 0, (

199 'Symbol has negative size (likely not sorted propertly): '

200 '%r\nprev symbol: %r' % (symbol, prev_symbol))

201

202

203 def _ClusterSymbols(symbols):

204 """Returns a new list of symbols with some symbols moved into groups.

205

206 Groups include:

207 * Symbols that have [clone] in their name (created by compiler optimization).

208 * Star symbols (such as " merge strings", and " symbol gap")

209 """

210 # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suf fix-after-compilation

211 # Example name suffixes:

212 # [clone .part.322]

213 # [clone .isra.322]

214 # [clone .constprop.1064]

215

216 # Step 1: Create name map, find clones, collect star syms into replacements.

217 logging.debug('Creating name -> symbol map')

218 clone_indices = []

219 indices_by_full_name = {}

220 # (name, full_name) -> [(index, sym),...]

221 replacements_by_name = collections.defaultdict(list)

222 for i, symbol in enumerate(symbols):

223 if symbol.name.startswith('**'):

224 # "symbol gap 3" -> "symbol gaps"

225 name = re.sub(r'\s+\d+$', 's', symbol.name)

226 replacements_by_name[(name, None)].append((i, symbol))

227 elif symbol.full_name:

228 if symbol.full_name.endswith(']') and ' [clone ' in symbol.full_name:

229 clone_indices.append(i)

230 else:

231 indices_by_full_name[symbol.full_name] = i

232

233 # Step 2: Collect same-named clone symbols.

234 logging.debug('Grouping all clones')

235 group_names_by_index = {}

236 for i in clone_indices:

237 symbol = symbols[i]

238 # Multiple attributes could exist, so search from left-to-right.

239 stripped_name = symbol.name[:symbol.name.index(' [clone ')]

240 stripped_full_name = symbol.full_name[:symbol.full_name.index(' [clone ')]

241 name_tup = (stripped_name, stripped_full_name)

242 replacement_list = replacements_by_name[name_tup]

243

244 if not replacement_list:

245 # First occurance, check for non-clone symbol.

246 non_clone_idx = indices_by_full_name.get(stripped_name)

247 if non_clone_idx is not None:

248 non_clone_symbol = symbols[non_clone_idx]

249 replacement_list.append((non_clone_idx, non_clone_symbol))

250 group_names_by_index[non_clone_idx] = stripped_name

251

252 replacement_list.append((i, symbol))

253 group_names_by_index[i] = stripped_name

254

255 # Step 3: Undo clustering when length=1.

256 # Removing these groups means Diff() logic must know about [clone] suffix.

257 to_clear = []

258 for name_tup, replacement_list in replacements_by_name.iteritems():

259 if len(replacement_list) == 1:

260 to_clear.append(name_tup)

261 for name_tup in to_clear:

262 del replacements_by_name[name_tup]

263

264 # Step 4: Replace first symbol from each cluster with a SymbolGroup.

265 before_symbol_count = sum(len(x) for x in replacements_by_name.itervalues())

266 logging.debug('Creating %d symbol groups from %d symbols. %d clones had only '

267 'one symbol.', len(replacements_by_name), before_symbol_count,

268 len(to_clear))

269

270 len_delta = len(replacements_by_name) - before_symbol_count

271 grouped_symbols = [None] * (len(symbols) + len_delta)

272 dest_index = 0

273 src_index = 0

274 seen_names = set()

275 replacement_names_by_index = {}

276 for name_tup, replacement_list in replacements_by_name.iteritems():

277 for tup in replacement_list:

278 replacement_names_by_index[tup[0]] = name_tup

279

280 sorted_items = replacement_names_by_index.items()

281 sorted_items.sort(key=lambda tup: tup[0])

282 for index, name_tup in sorted_items:

283 count = index - src_index

284 grouped_symbols[dest_index:dest_index + count] = (

285 symbols[src_index:src_index + count])

286 src_index = index + 1

287 dest_index += count

288 if name_tup not in seen_names:

289 seen_names.add(name_tup)

290 group_symbols = [tup[1] for tup in replacements_by_name[name_tup]]

291 grouped_symbols[dest_index] = models.SymbolGroup(

292 group_symbols, name=name_tup[0], full_name=name_tup[1],

293 section_name=group_symbols[0].section_name)

294 dest_index += 1

295

296 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None])

297 grouped_symbols[dest_index:None] = symbols[src_index:None]

298 logging.debug('Finished making groups.')

299 return grouped_symbols

300

301

302 def LoadAndPostProcessSizeInfo(path):

303 """Returns a SizeInfo for the given \|path\|."""

304 logging.debug('Loading results from: %s', path)

305 size_info = file_format.LoadSizeInfo(path)

306 _PostProcessSizeInfo(size_info)

307 return size_info

308

309

310 def _PostProcessSizeInfo(size_info):

311 logging.info('Normalizing symbol names')

312 _NormalizeNames(size_info.raw_symbols)

313 logging.info('Calculating padding')

314 _CalculatePadding(size_info.raw_symbols)

315 logging.info('Grouping decomposed functions')

316 size_info.symbols = models.SymbolGroup(

317 _ClusterSymbols(size_info.raw_symbols))

318 logging.info('Processed %d symbols', len(size_info.raw_symbols))

319

320

321 def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False,

322 raw_only=False):

323 """Creates a SizeInfo from the given map file."""

324 if not no_source_paths:

325 # output_directory needed for source file information.

326 lazy_paths.VerifyOutputDirectory()

327 # tool_prefix needed for c++filt.

328 lazy_paths.VerifyToolPrefix()

329

330 with _OpenMaybeGz(map_path) as map_file:

331 section_sizes, raw_symbols = (

332 linker_map_parser.MapFileParser().Parse(map_file))

333

334 if not no_source_paths:

335 logging.info('Extracting source paths from .ninja files')

336 all_found = _ExtractSourcePaths(raw_symbols, lazy_paths.output_directory)

337 assert all_found, (

338 'One or more source file paths could not be found. Likely caused by '

339 '.ninja files being generated at a different time than the .map file.')

340 # Map file for some reason doesn't unmangle all names.

341 # Unmangle prints its own log statement.

342 _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix)

343 logging.info('Normalizing object paths')

344 _NormalizeObjectPaths(raw_symbols)

345 size_info = models.SizeInfo(section_sizes, raw_symbols)

346

347 # Name normalization not strictly required, but makes for smaller files.

348 if raw_only:

349 logging.info('Normalizing symbol names')

350 _NormalizeNames(size_info.raw_symbols)

351 else:

352 _PostProcessSizeInfo(size_info)

353

354 if logging.getLogger().isEnabledFor(logging.DEBUG):

355 for line in describe.DescribeSizeInfoCoverage(size_info):

356 logging.info(line)

357 logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))

358 return size_info

359

360

361 def _DetectGitRevision(directory):

362 try:

363 git_rev = subprocess.check_output(

364 ['git', '-C', directory, 'rev-parse', 'HEAD'])

365 return git_rev.rstrip()

366 except Exception:

367 logging.warning('Failed to detect git revision for file metadata.')

368 return None

369

370

371 def BuildIdFromElf(elf_path, tool_prefix):

372 args = [tool_prefix + 'readelf', '-n', elf_path]

373 stdout = subprocess.check_output(args)

374 match = re.search(r'Build ID: (\w+)', stdout)

375 assert match, 'Build ID not found from running: ' + ' '.join(args)

376 return match.group(1)

377

378

379 def _SectionSizesFromElf(elf_path, tool_prefix):

380 args = [tool_prefix + 'readelf', '-S', '--wide', elf_path]

381 stdout = subprocess.check_output(args)

382 section_sizes = {}

383 # Matches [ 2] .hash HASH 00000000006681f0 0001f0 003154 04 A 3 0 8

384 for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE):

385 items = match.group(1).split()

386 section_sizes[items[0]] = int(items[4], 16)

387 return section_sizes

388

389

390 def _ParseGnArgs(args_path):

391 """Returns a list of normalized "key=value" strings."""

392 args = {}

393 with open(args_path) as f:

394 for l in f:

395 # Strips #s even if within string literal. Not a problem in practice.

396 parts = l.split('#')[0].split('=')

397 if len(parts) != 2:

398 continue

399 args[parts[0].strip()] = parts[1].strip()

400 return ["%s=%s" % x for x in sorted(args.iteritems())]

401

402

403 def main(argv):

404 parser = argparse.ArgumentParser(argv)

405 parser.add_argument('--elf-file', required=True,

406 help='Path to input ELF file. Currently used for '

407 'capturing metadata. Pass "" to skip metadata '

408 'collection.')

409 parser.add_argument('--map-file',

410 help='Path to input .map(.gz) file. Defaults to '

411 '{{elf_file}}.map(.gz)?')

412 parser.add_argument('--output-file', required=True,

413 help='Path to output .size file.')

414 parser.add_argument('--no-source-paths', action='store_true',

415 help='Do not use .ninja files to map '

416 'object_path -> source_path')

417 paths.AddOptions(parser)

418 args = helpers.AddCommonOptionsAndParseArgs(parser, argv)

419 if not args.output_file.endswith('.size'):

420 parser.error('output_file must end with .size')

421

422 if args.map_file:

423 if (not args.map_file.endswith('.map')

424 and not args.map_file.endswith('.map.gz')):

425 parser.error('Expected --map-file to end with .map or .map.gz')

426 map_file_path = args.map_file

427 else:

428 map_file_path = args.elf_file + '.map'

429 if not os.path.exists(map_file_path):

430 map_file_path += '.gz'

431 if not os.path.exists(map_file_path):

432 parser.error('Could not find .map(.gz)? file. Use --map-file.')

433

434 lazy_paths = paths.LazyPaths(args=args, input_file=args.elf_file)

435 metadata = None

436 if args.elf_file:

437 logging.debug('Constructing metadata')

438 git_rev = _DetectGitRevision(os.path.dirname(args.elf_file))

439 build_id = BuildIdFromElf(args.elf_file, lazy_paths.tool_prefix)

440 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime(

441 args.elf_file))

442 timestamp = calendar.timegm(timestamp_obj.timetuple())

443 gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn'))

444

445 def relative_to_out(path):

446 return os.path.relpath(path, lazy_paths.VerifyOutputDirectory())

447

448 metadata = {

449 models.METADATA_GIT_REVISION: git_rev,

450 models.METADATA_MAP_FILENAME: relative_to_out(map_file_path),

451 models.METADATA_ELF_FILENAME: relative_to_out(args.elf_file),

452 models.METADATA_ELF_MTIME: timestamp,

453 models.METADATA_ELF_BUILD_ID: build_id,

454 models.METADATA_GN_ARGS: gn_args,

455 }

456

457 size_info = CreateSizeInfo(map_file_path, lazy_paths,

458 no_source_paths=args.no_source_paths,

459 raw_only=True)

460

461 if metadata:

462 size_info.metadata = metadata

463 logging.debug('Validating section sizes')

464 elf_section_sizes = _SectionSizesFromElf(args.elf_file,

465 lazy_paths.tool_prefix)

466 for k, v in elf_section_sizes.iteritems():

467 assert v == size_info.section_sizes.get(k), (

468 'ELF file and .map file do not match.')

469

470 logging.info('Recording metadata: \n %s',

471 '\n '.join(describe.DescribeMetadata(size_info.metadata)))

472 logging.info('Saving result to %s', args.output_file)

473 file_format.SaveSizeInfo(size_info, args.output_file)

474 logging.info('Done')

475

476

477 if __name__ == '__main__':

478 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « tools/binary_size/linker_map_parser.py ('k') | tools/binary_size/match_util.py » ('j') | no next file with comments »