tools/binary_size/libsupersize/archive.py - Issue 2851473003: supersize: Track symbol aliases and shared symbols

Side by Side Diff: tools/binary_size/libsupersize/archive.py

Issue 2851473003: supersize: Track symbol aliases and shared symbols (Closed)

Patch Set: tweak nm interfface Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2017 The Chromium Authors. All rights reserved.	1 # Copyright 2017 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """Main Python API for analyzing binary size."""	5 """Main Python API for analyzing binary size."""

6	6

7 import argparse	7 import argparse

8 import calendar	8 import calendar

9 import collections	9 import collections

10 import datetime	10 import datetime

11 import gzip	11 import gzip

12 import logging	12 import logging

13 import os	13 import os

14 import posixpath	14 import posixpath

15 import re	15 import re

16 import subprocess	16 import subprocess

17 import sys	17 import sys

18 import tempfile	18 import tempfile

19 import zipfile	19 import zipfile

20	20

	21 import concurrent

21 import describe	22 import describe

22 import file_format	23 import file_format

23 import function_signature	24 import function_signature

24 import helpers

25 import linker_map_parser	25 import linker_map_parser

26 import models	26 import models

27 import ninja_parser	27 import ninja_parser

	28 import nm

28 import paths	29 import paths

29	30

30	31

31 def _OpenMaybeGz(path, mode=None):	32 def _OpenMaybeGz(path, mode=None):

32 """Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""	33 """Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""

33 if path.endswith('.gz'):	34 if path.endswith('.gz'):

34 if mode and 'w' in mode:	35 if mode and 'w' in mode:

35 return gzip.GzipFile(path, mode, 1)	36 return gzip.GzipFile(path, mode, 1)

36 return gzip.open(path, mode)	37 return gzip.open(path, mode)

37 return open(path, mode or 'r')	38 return open(path, mode or 'r')

(...skipping 13 matching lines...) Expand all Loading...
51 symbol.flags \|= models.FLAG_UNLIKELY	52 symbol.flags \|= models.FLAG_UNLIKELY

52 symbol.name = name[9:]	53 symbol.name = name[9:]

53 elif name.startswith('rel.local.'):	54 elif name.startswith('rel.local.'):

54 symbol.flags \|= models.FLAG_REL_LOCAL	55 symbol.flags \|= models.FLAG_REL_LOCAL

55 symbol.name = name[10:]	56 symbol.name = name[10:]

56 elif name.startswith('rel.'):	57 elif name.startswith('rel.'):

57 symbol.flags \|= models.FLAG_REL	58 symbol.flags \|= models.FLAG_REL

58 symbol.name = name[4:]	59 symbol.name = name[4:]

59	60

60	61

	62 def _UnmangleNames(names, tool_prefix):
	estevenson 2017/04/28 17:06:11 revert this part revert this part agrieve 2017/04/28 19:26:58 Done. Show quoted text On 2017/04/28 17:06:11, estevenson wrote: > revert this part Done.
	63 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,

	64 stdout=subprocess.PIPE)

	65 stdout = proc.communicate('\n'.join(names))[0]

	66 assert proc.returncode == 0

	67 return stdout.splitlines()

	68

	69

61 def _UnmangleRemainingSymbols(symbols, tool_prefix):	70 def _UnmangleRemainingSymbols(symbols, tool_prefix):

62 """Uses c++filt to unmangle any symbols that need it."""	71 """Uses c++filt to unmangle any symbols that need it."""

63 to_process = [s for s in symbols if s.name.startswith('_Z')]	72 to_process = [s for s in symbols if s.name.startswith('_Z')]

64 if not to_process:	73 if not to_process:

65 return	74 return

66	75

67 logging.info('Unmangling %d names', len(to_process))	76 logging.info('Unmangling %d names', len(to_process))

68 proc = subprocess.Popen([tool_prefix + 'c++filt'], stdin=subprocess.PIPE,	77 unmangled = _UnmangleNames((s.name for s in to_process), tool_prefix)

69 stdout=subprocess.PIPE)	78 for i, line in enumerate(unmangled):

70 stdout = proc.communicate('\n'.join(s.name for s in to_process))[0]

71 assert proc.returncode == 0

72

73 for i, line in enumerate(stdout.splitlines()):

74 to_process[i].name = line	79 to_process[i].name = line

75	80

76	81

77 def _NormalizeNames(symbols):	82 def _NormalizeNames(symbols):

78 """Ensures that all names are formatted in a useful way.	83 """Ensures that all names are formatted in a useful way.

79	84

80 This includes:	85 This includes:

81 - Assigning of \|full_name\|.	86 - Assigning of \|full_name\|.

82 - Stripping of return types in \|full_name\| and \|name\| (for functions).	87 - Stripping of return types in \|full_name\| and \|name\| (for functions).

83 - Stripping parameters from \|name\|.	88 - Stripping parameters from \|name\|.

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
120 symbol.full_name = symbol.name	125 symbol.full_name = symbol.name

121 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)	126 symbol.name = re.sub(r'\(.*\)', '', symbol.full_name)

122	127

123 # Don't bother storing both if they are the same.	128 # Don't bother storing both if they are the same.

124 if symbol.full_name == symbol.name:	129 if symbol.full_name == symbol.name:

125 symbol.full_name = ''	130 symbol.full_name = ''

126	131

127 logging.debug('Found name prefixes of: %r', found_prefixes)	132 logging.debug('Found name prefixes of: %r', found_prefixes)

128	133

129	134

130 def _NormalizeObjectPaths(symbols):	135 def _NormalizeObjectPath(path):

131 """Ensures that all paths are formatted in a useful way."""	136 if path.startswith('obj/'):

132 for symbol in symbols:	137 # Convert obj/third_party/... -> third_party/...

133 path = symbol.object_path	138 path = path[4:]

134 if path.startswith('obj/'):	139 elif path.startswith('../../'):

135 # Convert obj/third_party/... -> third_party/...	140 # Convert ../../third_party/... -> third_party/...

136 path = path[4:]	141 path = path[6:]

137 elif path.startswith('../../'):	142 if path.endswith(')'):

138 # Convert ../../third_party/... -> third_party/...	143 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o

139 path = path[6:]	144 start_idx = path.index('(')

140 if path.endswith(')'):	145 path = os.path.join(path[:start_idx], path[start_idx + 1:-1])

141 # Convert foo/bar.a(baz.o) -> foo/bar.a/baz.o	146 return path

142 start_idx = path.index('(')

143 path = os.path.join(path[:start_idx], path[start_idx + 1:-1])

144 symbol.object_path = path

145	147

146	148

147 def _NormalizeSourcePath(path):	149 def _NormalizeSourcePath(path):

148 if path.startswith('gen/'):	150 if path.startswith('gen/'):

149 # Convert gen/third_party/... -> third_party/...	151 # Convert gen/third_party/... -> third_party/...

150 return path[4:]	152 return path[4:]

151 if path.startswith('../../'):	153 if path.startswith('../../'):

152 # Convert ../../third_party/... -> third_party/...	154 # Convert ../../third_party/... -> third_party/...

153 return path[6:]	155 return path[6:]

154 return path	156 return path

155	157

156	158

	159 def _SourcePathForObjectPath(object_path, source_mapper):

	160 # We don't have source info for prebuilt .a files.

	161 if not os.path.isabs(object_path) and not object_path.startswith('..'):

	162 source_path = source_mapper.FindSourceForPath(object_path)

	163 if source_path:

	164 return _NormalizeSourcePath(source_path)

	165 return ''

	166

	167

157 def _ExtractSourcePaths(symbols, source_mapper):	168 def _ExtractSourcePaths(symbols, source_mapper):

158 """Fills in the .source_path attribute of all symbols."""	169 """Fills in the \|source_path\| attribute."""

159 logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count)	170 logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count)

160

161 for symbol in symbols:	171 for symbol in symbols:

162 object_path = symbol.object_path	172 object_path = symbol.object_path

163 if symbol.source_path or not object_path:	173 if object_path and not symbol.source_path:

	174 symbol.source_path = _SourcePathForObjectPath(object_path, source_mapper)

	175

	176

	177 def _ComputeAnscestorPath(path_list):

	178 """Returns the common anscestor of the given paths."""

	179 # Ignore missing paths.

	180 path_list = [p for p in path_list if p]

	181 prefix = os.path.commonprefix(path_list)

	182 # Put the path count as a subdirectory to allow for better grouping when

	183 # path-based breakdowns.

	184 if not prefix:

	185 if len(path_list) < 2:

	186 return ''

	187 return os.path.join('{shared}', str(len(path_list)))

	188 if prefix == path_list[0]:

	189 return prefix

	190 assert len(path_list) > 1, 'path_list: ' + repr(path_list)

	191 return os.path.join(os.path.dirname(prefix), '{shared}', str(len(path_list)))

	192

	193

	194 # This must normalize object paths at the same time because normalization

	195 # needs to occur before finding common ancestor.

	196 def _ComputeAnscestorPathsAndNormalizeObjectPaths(

	197 symbols, object_paths_by_name, source_mapper):

	198 num_found_paths = 0

	199 num_unknown_names = 0

	200 num_path_mismatches = 0

	201 num_unmatched_aliases = 0

	202 for symbol in symbols:

	203 name = symbol.name

	204 if (symbol.IsBss() or

	205 not name or

	206 name[0] in '.' or # e.g. * merge symbols, .Lswitch.table

	207 name == 'startup'):

	208 symbol.object_path = _NormalizeObjectPath(symbol.object_path)

164 continue	209 continue

165 # We don't have source info for prebuilt .a files.	210

166 if not os.path.isabs(object_path) and not object_path.startswith('..'):	211 object_paths = object_paths_by_name.get(name)

167 source_path = source_mapper.FindSourceForPath(object_path)	212 if object_paths:

168 if source_path:	213 num_found_paths += 1

169 symbol.source_path = _NormalizeSourcePath(source_path)	214 else:

	215 if not symbol.object_path and symbol.aliases:

	216 # Happens when aliases are from object files where all symbols were

	217 # pruned or de-duped as aliases. Since we are only scanning .o files

	218 # referenced by included symbols, such files are missed.

	219 # TODO(agrieve): This could be fixed by retrieving linker inputs from

	220 # build.ninja, or by looking for paths within the .map file's

	221 # discarded sections.

	222 num_unmatched_aliases += 1

	223 continue

	224 if num_unknown_names < 10:

	225 logging.warning('Symbol not found in any .o files: %r', symbol)

	226 num_unknown_names += 1

	227 symbol.object_path = _NormalizeObjectPath(symbol.object_path)

	228 continue

	229

	230 if symbol.object_path and symbol.object_path not in object_paths:

	231 if num_path_mismatches < 10:

	232 logging.warning('Symbol path reported by .map not found by nm.')

	233 logging.warning('sym=%r', symbol)

	234 logging.warning('paths=%r', object_paths)

	235 num_path_mismatches += 1

	236

	237 if source_mapper:

	238 source_paths = [

	239 _SourcePathForObjectPath(p, source_mapper) for p in object_paths]

	240 symbol.source_path = _ComputeAnscestorPath(source_paths)

	241

	242 object_paths = [_NormalizeObjectPath(p) for p in object_paths]

	243 symbol.object_path = _ComputeAnscestorPath(object_paths)

	244

	245 logging.debug('Cross-referenced %d symbols with nm output. '

	246 'num_unknown_names=%d num_path_mismatches=%d '

	247 'num_unused_aliases=%d', num_found_paths, num_unknown_names,

	248 num_path_mismatches, num_unmatched_aliases)

	249

	250

	251 def _DiscoverMissedObjectPaths(symbols, elf_object_paths):

	252 # Missing object paths are caused by .a files added by -l flags, which are not

	253 # listed as explicit inputs within .ninja rules.

	254 parsed_inputs = set(elf_object_paths)

	255 missed_inputs = set()

	256 for symbol in symbols:

	257 path = symbol.object_path

	258 if path.endswith(')'):

	259 # Convert foo/bar.a(baz.o) -> foo/bar.a

	260 path = path[:path.index('(')]

	261 if path and path not in parsed_inputs:

	262 missed_inputs.add(path)

	263 return missed_inputs

170	264

171	265

172 def _CalculatePadding(symbols):	266 def _CalculatePadding(symbols):

173 """Populates the \|padding\| field based on symbol addresses.	267 """Populates the \|padding\| field based on symbol addresses.

174	268

175 Symbols must already be sorted by \|address\|.	269 Symbols must already be sorted by \|address\|.

176 """	270 """

177 seen_sections = []	271 seen_sections = []

178 for i, symbol in enumerate(symbols[1:]):	272 for i, symbol in enumerate(symbols[1:]):

179 prev_symbol = symbols[i]	273 prev_symbol = symbols[i]

180 if prev_symbol.section_name != symbol.section_name:	274 if prev_symbol.section_name != symbol.section_name:

181 assert symbol.section_name not in seen_sections, (	275 assert symbol.section_name not in seen_sections, (

182 'Input symbols must be sorted by section, then address.')	276 'Input symbols must be sorted by section, then address.')

183 seen_sections.append(symbol.section_name)	277 seen_sections.append(symbol.section_name)

184 continue	278 continue

185 if symbol.address <= 0 or prev_symbol.address <= 0:	279 if symbol.address <= 0 or prev_symbol.address <= 0:

186 continue	280 continue

187 # Padding-only symbols happen for ** symbol gaps.	281

188 prev_is_padding_only = prev_symbol.size_without_padding == 0	282 if symbol.address == prev_symbol.address:

189 if symbol.address == prev_symbol.address and not prev_is_padding_only:	283 # Padding-only symbols happen for ** symbol gaps.

190 assert False, 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol)	284 prev_is_padding_only = prev_symbol.size_without_padding == 0
	estevenson 2017/04/28 17:06:11 This would be better to check that the symbol is i This would be better to check that the symbol is in symbol.aliases agrieve 2017/04/28 19:26:58 Done. Show quoted text On 2017/04/28 17:06:11, estevenson wrote: > This would be better to check that the symbol is in symbol.aliases Done.
191 # Even with symbols at the same address removed, overlaps can still	285 if not prev_is_padding_only:

192 # happen. In this case, padding will be negative (and this is fine).	286 # Must be an alias. Clone its padding.

	287 assert symbol.aliases, (

	288 'Found duplicate symbols:\n%r\n%r' % (prev_symbol, symbol))

	289 symbol.padding = prev_symbol.padding

	290 symbol.size = prev_symbol.size

	291 continue

	292

193 padding = symbol.address - prev_symbol.end_address	293 padding = symbol.address - prev_symbol.end_address

194 # These thresholds were found by manually auditing arm32 Chrome.	294 # These thresholds were found by experimenting with arm32 Chrome.

195 # E.g.: Set them to 0 and see what warnings get logged.	295 # E.g.: Set them to 0 and see what warnings get logged, then take max value.

196 # TODO(agrieve): See if these thresholds make sense for architectures	296 # TODO(agrieve): See if these thresholds make sense for architectures

197 # other than arm32.	297 # other than arm32.

198 if not symbol.name.startswith('*') and (	298 if not symbol.name.startswith('*') and (

199 symbol.section in 'rd' and padding >= 256 or	299 symbol.section in 'rd' and padding >= 256 or

200 symbol.section in 't' and padding >= 64):	300 symbol.section in 't' and padding >= 64):

201 # For nm data, this is caused by data that has no associated symbol.	301 # Should not happen.

202 # The linker map file lists them with no name, but with a file.	302 logging.warning('Large padding of %d between:\n A) %r\n B) %r' % (

203 # Example:	303 padding, prev_symbol, symbol))

204 # .data 0x02d42764 0x120 .../V8SharedWorkerGlobalScope.o

205 # Where as most look like:

206 # .data.MANGLED_NAME...

207 logging.debug('Large padding of %d between:\n A) %r\n B) %r' % (

208 padding, prev_symbol, symbol))

209 continue

210 symbol.padding = padding	304 symbol.padding = padding

211 symbol.size += padding	305 symbol.size += padding

212 assert symbol.size >= 0, (	306 assert symbol.size >= 0, (

213 'Symbol has negative size (likely not sorted propertly): '	307 'Symbol has negative size (likely not sorted propertly): '

214 '%r\nprev symbol: %r' % (symbol, prev_symbol))	308 '%r\nprev symbol: %r' % (symbol, prev_symbol))

215	309

216	310

217 def _ClusterSymbols(symbols):	311 def _ClusterSymbols(symbols):

218 """Returns a new list of symbols with some symbols moved into groups.	312 """Returns a new list of symbols with some symbols moved into groups.

219	313

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
310 group_symbols, name=name_tup[0], full_name=name_tup[1],	404 group_symbols, name=name_tup[0], full_name=name_tup[1],

311 section_name=group_symbols[0].section_name)	405 section_name=group_symbols[0].section_name)

312 dest_index += 1	406 dest_index += 1

313	407

314 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None])	408 assert len(grouped_symbols[dest_index:None]) == len(symbols[src_index:None])

315 grouped_symbols[dest_index:None] = symbols[src_index:None]	409 grouped_symbols[dest_index:None] = symbols[src_index:None]

316 logging.debug('Finished making groups.')	410 logging.debug('Finished making groups.')

317 return grouped_symbols	411 return grouped_symbols

318	412

319	413

	414 def _AddSymbolAliases(symbols, aliases_by_address):

	415 # Step 1: Create list of (index_of_symbol, name_list).

	416 logging.debug('Creating alias list')

	417 replacements = []

	418 num_new_symbols = 0

	419 for i, s in enumerate(symbols):

	420 # Don't alias padding-only symbols (e.g. ** symbol gap)

	421 if s.size_without_padding == 0:

	422 continue

	423 name_list = aliases_by_address.get(s.address)

	424 if name_list:

	425 if s.name not in name_list:

	426 logging.warning('Name missing from aliases: %s %s', s.name, name_list)

	427 continue

	428 replacements.append((i, name_list))

	429 num_new_symbols += len(name_list) - 1

	430

	431 # Step 2: Create new symbols as siblings to each existing one.

	432 logging.debug('Creating %d aliases', num_new_symbols)

	433 src_cursor_end = len(symbols)

	434 symbols += [None] * num_new_symbols

	435 dst_cursor_end = len(symbols)

	436 for src_index, name_list in reversed(replacements):

	437 # Copy over symbols that come after the current one.

	438 chunk_size = src_cursor_end - src_index - 1

	439 dst_cursor_end -= chunk_size

	440 src_cursor_end -= chunk_size

	441 symbols[dst_cursor_end:dst_cursor_end + chunk_size] = (

	442 symbols[src_cursor_end:src_cursor_end + chunk_size])

	443 sym = symbols[src_index]

	444 src_cursor_end -= 1

	445

	446 # Create aliases (does not bother reusing the existing symbol).

	447 aliases = [None] * len(name_list)

	448 for i, name in enumerate(name_list):

	449 aliases[i] = models.Symbol(

	450 sym.section_name, sym.size, address=sym.address, name=name,

	451 aliases=aliases)

	452

	453 dst_cursor_end -= len(aliases)

	454 symbols[dst_cursor_end:dst_cursor_end + len(aliases)] = aliases

	455

	456 assert dst_cursor_end == src_cursor_end

	457

	458

320 def LoadAndPostProcessSizeInfo(path):	459 def LoadAndPostProcessSizeInfo(path):

321 """Returns a SizeInfo for the given \|path\|."""	460 """Returns a SizeInfo for the given \|path\|."""

322 logging.debug('Loading results from: %s', path)	461 logging.debug('Loading results from: %s', path)

323 size_info = file_format.LoadSizeInfo(path)	462 size_info = file_format.LoadSizeInfo(path)

324 _PostProcessSizeInfo(size_info)	463 _PostProcessSizeInfo(size_info)

325 return size_info	464 return size_info

326	465

327	466

328 def _PostProcessSizeInfo(size_info):	467 def _PostProcessSizeInfo(size_info):

329 logging.info('Normalizing symbol names')	468 logging.info('Normalizing symbol names')

330 _NormalizeNames(size_info.raw_symbols)	469 _NormalizeNames(size_info.raw_symbols)

331 logging.info('Calculating padding')	470 logging.info('Calculating padding')

332 _CalculatePadding(size_info.raw_symbols)	471 _CalculatePadding(size_info.raw_symbols)

333 logging.info('Grouping decomposed functions')	472 logging.info('Grouping decomposed functions')

334 size_info.symbols = models.SymbolGroup(	473 size_info.symbols = models.SymbolGroup(

335 _ClusterSymbols(size_info.raw_symbols))	474 _ClusterSymbols(size_info.raw_symbols))

336 logging.info('Processed %d symbols', len(size_info.raw_symbols))	475 logging.info('Processed %d symbols', len(size_info.raw_symbols))

337	476

338	477

339 def CreateSizeInfo(map_path, lazy_paths=None, no_source_paths=False,	478 def CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory):

	479 metadata = None

	480 if elf_path:

	481 logging.debug('Constructing metadata')

	482 git_rev = _DetectGitRevision(os.path.dirname(elf_path))

	483 architecture = _ArchFromElf(elf_path, tool_prefix)

	484 build_id = BuildIdFromElf(elf_path, tool_prefix)

	485 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime(

	486 elf_path))

	487 timestamp = calendar.timegm(timestamp_obj.timetuple())

	488

	489 metadata = {

	490 models.METADATA_GIT_REVISION: git_rev,

	491 models.METADATA_ELF_ARCHITECTURE: architecture,

	492 models.METADATA_ELF_MTIME: timestamp,

	493 models.METADATA_ELF_BUILD_ID: build_id,

	494 }

	495

	496 if output_directory:

	497 relative_to_out = lambda path: os.path.relpath(path, output_directory)

	498 gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn'))

	499 metadata[models.METADATA_MAP_FILENAME] = relative_to_out(map_path)

	500 metadata[models.METADATA_ELF_FILENAME] = relative_to_out(elf_path)

	501 metadata[models.METADATA_GN_ARGS] = gn_args

	502

	503 if apk_path:

	504 metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path)

	505 return metadata

	506

	507

	508 def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory,

340 raw_only=False):	509 raw_only=False):

341 """Creates a SizeInfo from the given map file."""	510 """Creates a SizeInfo.

342 # tool_prefix needed for c++filt.

343 lazy_paths.VerifyToolPrefix()

344	511

345 if not no_source_paths:	512 Args:

346 # Parse .ninja files at the same time as parsing the .map file.	513 map_path: Path to the linker .map(.gz) file to parse.

347 source_mapper_result = helpers.ForkAndCall(	514 elf_path: Path to the corresponding unstripped ELF file. Used to find symbol

348 ninja_parser.Parse, lazy_paths.VerifyOutputDirectory())	515 aliases and inlined functions. Can be None.

	516 tool_prefix: Prefix for c++filt & nm (required).

	517 output_directory: Build output directory. If None, source_paths and symbol

	518 alias information will not be recorded.

	519 raw_only: Fill in just the information required for creating a .size file.

	520 """

	521 source_mapper = None

	522 if output_directory:

	523 # Start by finding the elf_object_paths, so that nm can run on them while

	524 # the linker .map is being parsed.

	525 logging.info('Parsing ninja files.')

	526 source_mapper, elf_object_paths = ninja_parser.Parse(

	527 output_directory, elf_path)

	528 assert not elf_path or elf_object_paths, (

	529 'Failed to find link command in ninja files for ' +

	530 os.path.relpath(elf_path, output_directory))

	531

	532 if elf_path:

	533 # Run nm on the elf file to retrieve the list of symbol names per-address.

	534 # This list is required because the .map file contains only a single name

	535 # for each address, yet multiple symbols are often coalesced when they are

	536 # identical. This coalescing happens mainly for small symbols and for C++

	537 # templates. Such symbols make up ~500kb of libchrome.so on Android.

	538 elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)

	539

	540 # Run nm on all .o/.a files to retrieve the symbol names within them.

	541 # The list is used to detect when mutiple .o files contain the same symbol

	542 # (e.g. inline functions), and to update the object_path / source_path

	543 # fields accordingly.

	544 # Looking in object files is required because the .map file choses a

	545 # single path for these symbols.

	546 # Rather than record all paths for each symbol, set the paths to be the

	547 # common ancestor of all paths.

	548 if output_directory:

	549 bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory)

	550 bulk_analyzer.AnalyzePaths(elf_object_paths)

349	551

350 with _OpenMaybeGz(map_path) as map_file:	552 with _OpenMaybeGz(map_path) as map_file:

351 section_sizes, raw_symbols = (	553 section_sizes, raw_symbols = (

352 linker_map_parser.MapFileParser().Parse(map_file))	554 linker_map_parser.MapFileParser().Parse(map_file))

353	555

354 if not no_source_paths:	556 if elf_path:

355 logging.info('Extracting source paths from .ninja files')	557 logging.debug('Validating section sizes')

356 source_mapper = source_mapper_result.get()	558 elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)

	559 for k, v in elf_section_sizes.iteritems():

	560 if v != section_sizes.get(k):

	561 logging.error('ELF file and .map file do not agree on section sizes.')

	562 logging.error('.map file: %r', section_sizes)

	563 logging.error('readelf: %r', elf_section_sizes)

	564 sys.exit(1)

	565

	566 if elf_path and output_directory:

	567 missed_object_paths = _DiscoverMissedObjectPaths(

	568 raw_symbols, elf_object_paths)

	569 bulk_analyzer.AnalyzePaths(missed_object_paths)

	570 bulk_analyzer.Close()

	571

	572 if source_mapper:

	573 logging.info('Looking up source paths from ninja files')

357 _ExtractSourcePaths(raw_symbols, source_mapper)	574 _ExtractSourcePaths(raw_symbols, source_mapper)

358 assert source_mapper.unmatched_paths_count == 0, (	575 assert source_mapper.unmatched_paths_count == 0, (

359 'One or more source file paths could not be found. Likely caused by '	576 'One or more source file paths could not be found. Likely caused by '

360 '.ninja files being generated at a different time than the .map file.')	577 '.ninja files being generated at a different time than the .map file.')

361	578

362 logging.info('Stripping linker prefixes from symbol names')	579 logging.info('Stripping linker prefixes from symbol names')

363 _StripLinkerAddedSymbolPrefixes(raw_symbols)	580 _StripLinkerAddedSymbolPrefixes(raw_symbols)

364 # Map file for some reason doesn't unmangle all names.	581 # Map file for some reason doesn't unmangle all names.

365 # Unmangle prints its own log statement.	582 # Unmangle prints its own log statement.

366 _UnmangleRemainingSymbols(raw_symbols, lazy_paths.tool_prefix)	583 _UnmangleRemainingSymbols(raw_symbols, tool_prefix)

367 logging.info('Normalizing object paths')	584

368 _NormalizeObjectPaths(raw_symbols)	585 if elf_path:

	586 logging.info('Adding aliased symbols, as reported by nm')

	587 # This normally does not block (it's finished by this time).

	588 aliases_by_address = elf_nm_result.get()

	589 _AddSymbolAliases(raw_symbols, aliases_by_address)

	590

	591 if output_directory:

	592 # For aliases, this provides path information where there wasn't any.

	593 logging.info('Computing ancestor paths for inline functions and '

	594 'normalizing object paths')

	595

	596 object_paths_by_name = bulk_analyzer.Get()

	597 logging.debug('Fetched path information for %d symbols from %d files',

	598 len(object_paths_by_name),

	599 len(elf_object_paths) + len(missed_object_paths))

	600 _ComputeAnscestorPathsAndNormalizeObjectPaths(

	601 raw_symbols, object_paths_by_name, source_mapper)

	602 else:

	603 logging.info('Normalizing object paths.')

	604 for symbol in raw_symbols:

	605 symbol.object_path = _NormalizeObjectPath(symbol.object_path)

	606

369 size_info = models.SizeInfo(section_sizes, raw_symbols)	607 size_info = models.SizeInfo(section_sizes, raw_symbols)

370	608

371 # Name normalization not strictly required, but makes for smaller files.	609 # Name normalization not strictly required, but makes for smaller files.

372 if raw_only:	610 if raw_only:

373 logging.info('Normalizing symbol names')	611 logging.info('Normalizing symbol names')

374 _NormalizeNames(size_info.raw_symbols)	612 _NormalizeNames(size_info.raw_symbols)

375 else:	613 else:

376 _PostProcessSizeInfo(size_info)	614 _PostProcessSizeInfo(size_info)

377	615

378 if logging.getLogger().isEnabledFor(logging.DEBUG):	616 if logging.getLogger().isEnabledFor(logging.DEBUG):

(...skipping 102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
481 any_path_within_output_directory=any_input)	719 any_path_within_output_directory=any_input)

482 if apk_path:	720 if apk_path:

483 with zipfile.ZipFile(apk_path) as z:	721 with zipfile.ZipFile(apk_path) as z:

484 lib_infos = [f for f in z.infolist()	722 lib_infos = [f for f in z.infolist()

485 if f.filename.endswith('.so') and f.file_size > 0]	723 if f.filename.endswith('.so') and f.file_size > 0]

486 assert lib_infos, 'APK has no .so files.'	724 assert lib_infos, 'APK has no .so files.'

487 # TODO(agrieve): Add support for multiple .so files, and take into account	725 # TODO(agrieve): Add support for multiple .so files, and take into account

488 # secondary architectures.	726 # secondary architectures.

489 apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename	727 apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename

490 logging.debug('Sub-apk path=%s', apk_so_path)	728 logging.debug('Sub-apk path=%s', apk_so_path)

491 if not elf_path:	729 if not elf_path and lazy_paths.output_directory:

492 elf_path = os.path.join(	730 elf_path = os.path.join(

493 lazy_paths.output_directory, 'lib.unstripped',	731 lazy_paths.output_directory, 'lib.unstripped',

494 os.path.basename(apk_so_path.replace('crazy.', '')))	732 os.path.basename(apk_so_path.replace('crazy.', '')))

495 logging.debug('Detected --elf-file=%s', elf_path)	733 logging.debug('Detected --elf-file=%s', elf_path)

496	734

497 if map_path:	735 if map_path:

498 if not map_path.endswith('.map') and not map_path.endswith('.map.gz'):	736 if not map_path.endswith('.map') and not map_path.endswith('.map.gz'):

499 parser.error('Expected --map-file to end with .map or .map.gz')	737 parser.error('Expected --map-file to end with .map or .map.gz')

500 else:	738 else:

501 map_path = elf_path + '.map'	739 map_path = elf_path + '.map'

502 if not os.path.exists(map_path):	740 if not os.path.exists(map_path):

503 map_path += '.gz'	741 map_path += '.gz'

504 if not os.path.exists(map_path):	742 if not os.path.exists(map_path):

505 parser.error('Could not find .map(.gz)? file. Use --map-file.')	743 parser.error('Could not find .map(.gz)? file. Use --map-file.')

506	744

507 metadata = None	745 tool_prefix = lazy_paths.VerifyToolPrefix()

508 if elf_path:	746 output_directory = None

509 logging.debug('Constructing metadata')	747 if not args.no_source_paths:

510 git_rev = _DetectGitRevision(os.path.dirname(elf_path))	748 output_directory = lazy_paths.VerifyOutputDirectory()

511 architecture = _ArchFromElf(elf_path, lazy_paths.tool_prefix)

512 build_id = BuildIdFromElf(elf_path, lazy_paths.tool_prefix)

513 timestamp_obj = datetime.datetime.utcfromtimestamp(os.path.getmtime(

514 elf_path))

515 timestamp = calendar.timegm(timestamp_obj.timetuple())

516 gn_args = _ParseGnArgs(os.path.join(lazy_paths.output_directory, 'args.gn'))

517	749

518 def relative_to_out(path):	750 metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix,

519 return os.path.relpath(path, lazy_paths.VerifyOutputDirectory())	751 output_directory)

520	752 if apk_path and elf_path:

521 metadata = {	753 # Extraction takes around 1 second, so do it in parallel.

522 models.METADATA_GIT_REVISION: git_rev,	754 apk_elf_result = concurrent.ForkAndCall(

523 models.METADATA_MAP_FILENAME: relative_to_out(map_path),	755 _ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix))

524 models.METADATA_ELF_ARCHITECTURE: architecture,

525 models.METADATA_ELF_FILENAME: relative_to_out(elf_path),

526 models.METADATA_ELF_MTIME: timestamp,

527 models.METADATA_ELF_BUILD_ID: build_id,

528 models.METADATA_GN_ARGS: gn_args,

529 }

530

531 if apk_path:

532 metadata[models.METADATA_APK_FILENAME] = relative_to_out(apk_path)

533 # Extraction takes around 1 second, so do it in parallel.

534 apk_elf_result = helpers.ForkAndCall(

535 _ElfInfoFromApk, apk_path, apk_so_path, lazy_paths.tool_prefix)

536	756

537 size_info = CreateSizeInfo(	757 size_info = CreateSizeInfo(

538 map_path, lazy_paths, no_source_paths=args.no_source_paths, raw_only=True)	758 map_path, elf_path, tool_prefix, output_directory, raw_only=True)

539	759

540 if metadata:	760 if metadata:

541 size_info.metadata = metadata	761 size_info.metadata = metadata

542 logging.debug('Validating section sizes')

543 elf_section_sizes = _SectionSizesFromElf(elf_path, lazy_paths.tool_prefix)

544 for k, v in elf_section_sizes.iteritems():

545 assert v == size_info.section_sizes.get(k), (

546 'ELF file and .map file do not match.')

547	762

548 if apk_path:	763 if apk_path:

549 logging.debug('Extracting section sizes from .so within .apk')	764 logging.debug('Extracting section sizes from .so within .apk')

550 unstripped_section_sizes = size_info.section_sizes	765 unstripped_section_sizes = size_info.section_sizes

551 apk_build_id, size_info.section_sizes = apk_elf_result.get()	766 apk_build_id, size_info.section_sizes = apk_elf_result.get()

552 assert apk_build_id == build_id, (	767 assert apk_build_id == metadata[models.METADATA_ELF_BUILD_ID], (

553 'BuildID for %s within %s did not match the one at %s' %	768 'BuildID for %s within %s did not match the one at %s' %

554 (apk_so_path, apk_path, elf_path))	769 (apk_so_path, apk_path, elf_path))

555	770

556 packed_section_name = None	771 packed_section_name = None

	772 architecture = metadata[models.METADATA_ELF_ARCHITECTURE]

557 if architecture == 'ARM':	773 if architecture == 'ARM':

558 packed_section_name = '.rel.dyn'	774 packed_section_name = '.rel.dyn'

559 elif architecture == 'AArch64':	775 elif architecture == 'AArch64':

560 packed_section_name = '.rela.dyn'	776 packed_section_name = '.rela.dyn'

561	777

562 if packed_section_name:	778 if packed_section_name:

563 logging.debug('Recording size of unpacked relocations')	779 logging.debug('Recording size of unpacked relocations')

564 if packed_section_name not in size_info.section_sizes:	780 if packed_section_name not in size_info.section_sizes:

565 logging.warning('Packed section not present: %s', packed_section_name)	781 logging.warning('Packed section not present: %s', packed_section_name)

566 else:	782 else:

567 size_info.section_sizes['%s (unpacked)' % packed_section_name] = (	783 size_info.section_sizes['%s (unpacked)' % packed_section_name] = (

568 unstripped_section_sizes.get(packed_section_name))	784 unstripped_section_sizes.get(packed_section_name))

569	785

570 logging.info('Recording metadata: \n %s',	786 logging.info('Recording metadata: \n %s',

571 '\n '.join(describe.DescribeMetadata(size_info.metadata)))	787 '\n '.join(describe.DescribeMetadata(size_info.metadata)))

572 logging.info('Saving result to %s', args.size_file)	788 logging.info('Saving result to %s', args.size_file)

573 file_format.SaveSizeInfo(size_info, args.size_file)	789 file_format.SaveSizeInfo(size_info, args.size_file)

574 logging.info('Done')	790 logging.info('Done')

OLD	NEW

« no previous file with comments | « no previous file | tools/binary_size/libsupersize/concurrent.py » ('j') | tools/binary_size/libsupersize/diff.py » ('J')