tools/binary_size/file_format.py - Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes

Side by Side Diff: tools/binary_size/file_format.py

Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes (Closed)

Patch Set: Use json in header. gzip as separate step. Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2017 The Chromium Authors. All rights reserved.	1 # Copyright 2017 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """Deals with loading & saving .size files."""	5 """Deals with loading & saving .size files."""

6	6

7 import ast	7 import cStringIO

	8 import calendar

	9 import collections

	10 import datetime

8 import gzip	11 import gzip

	12 import json

9 import models	13 import models

	14 import logging

	15 import os

	16 import shutil

10	17

11	18

12 # File format version for .size files.	19 # File format version for .size files.

13 _SERIALIZATION_VERSION = 1	20 _SERIALIZATION_VERSION = 'Size File Format v1'

14	21

15	22

16 def EndsWithMaybeGz(path, suffix):	23 def _LogSize(file_obj, desc):

17 return path.endswith(suffix) or path.endswith(suffix + '.gz')	24 if not hasattr(file_obj, 'fileno'):

	25 return

	26 file_obj.flush()

	27 size = os.fstat(file_obj.fileno()).st_size

	28 logging.debug('File size with %s: %d' % (desc, size))

18	29

19	30

20 def OpenMaybeGz(path, mode=None):	31 def _SaveSizeInfoToFile(size_info, file_obj):

21 """Calls `gzip.open()` if \|path\| ends in ".gz", otherwise calls `open()`."""	32 file_obj.write('# Created by //tools/binary_size\n')

22 if path.endswith('.gz'):	33 file_obj.write('%s\n' % _SERIALIZATION_VERSION)

23 if mode and 'w' in mode:	34 headers = {

24 return gzip.GzipFile(path, mode, 1)	35 'tag': size_info.tag,

25 return gzip.open(path, mode)	36 'section_sizes': size_info.section_sizes,

26 return open(path, mode or 'r')	37 }

	38 if size_info.timestamp:

	39 headers['timestamp'] = calendar.timegm(size_info.timestamp.timetuple())

	40 metadata_str = json.dumps(headers, file_obj, indent=2, sort_keys=True)

	41 file_obj.write('%d\n' % len(metadata_str))

	42 file_obj.write(metadata_str)

	43 file_obj.write('\n')

	44 _LogSize(file_obj, 'header') # For libchrome: 570 bytes.

27	45

	46 # Store a single copy of all paths and have them referenced by index.

	47 # Using an OrderedDict makes the indices more repetitive (better compression).

	48 path_tuples = collections.OrderedDict.fromkeys(

	49 (s.object_path, s.source_path) for s in size_info.symbols)

	50 for i, key in enumerate(path_tuples):

	51 path_tuples[key] = i

	52 file_obj.write('%d\n' % len(path_tuples))

	53 file_obj.writelines('%s\t%s\n' % pair for pair in path_tuples)

	54 _LogSize(file_obj, 'paths') # For libchrome, adds 200kb.

28	55

29 def _SaveSizeInfoToFile(result, file_obj):	56 # Symbol counts by section.

30 """Saves the result to the given file object."""	57 by_section = size_info.symbols.GroupBySectionName().SortedByName()

31 # Store one bucket per line.	58 file_obj.write('%s\n' % '\t'.join(g.name for g in by_section))

32 file_obj.write('%d\n' % _SERIALIZATION_VERSION)	59 file_obj.write('%s\n' % '\t'.join(str(len(g)) for g in by_section))

33 file_obj.write('%r\n' % result.section_sizes)	60

34 file_obj.write('%d\n' % len(result.symbols))	61 def write_numeric(func, delta=False):

35 prev_section_name = None	62 for group in by_section:

36 # Store symbol fields as tab-separated.	63 prev_value = 0

37 # Store only non-derived fields.	64 last_sym = group[-1]

38 for symbol in result.symbols:	65 for symbol in group:

39 if symbol.section_name != prev_section_name:	66 value = func(symbol)

40 file_obj.write('%s\n' % symbol.section_name)	67 if delta:

41 prev_section_name = symbol.section_name	68 value, prev_value = value - prev_value, value

42 # Don't write padding nor name since these are derived values.	69 file_obj.write(str(value))

43 file_obj.write('%x\t%x\t%s\t%s\n' % (	70 if symbol is not last_sym:

44 symbol.address, symbol.size_without_padding,	71 file_obj.write(' ')

45 symbol.function_signature or symbol.name, symbol.path))	72 file_obj.write('\n')

	73

	74 write_numeric(lambda s: s.address, delta=True)

	75 _LogSize(file_obj, 'addresses') # For libchrome, adds 300kb.

	76 # Do not write padding, it will be recalcualted from addresses on load.

	77 write_numeric(lambda s: s.size_without_padding)

	78 _LogSize(file_obj, 'sizes') # For libchrome, adds 300kb

	79 write_numeric(lambda s: path_tuples[(s.object_path, s.source_path)],

	80 delta=True)

	81 _LogSize(file_obj, 'path indices') # For libchrome: adds 125kb.

	82

	83 for group in by_section:

	84 for symbol in group:

	85 # Do not write name when full_name exists. It will be derived on load.

	86 file_obj.write(symbol.full_name or symbol.name)

	87 if symbol.is_anonymous:

	88 file_obj.write('\t1')

	89 file_obj.write('\n')

	90 _LogSize(file_obj, 'names (final)') # For libchrome: adds 3.5mb.

46	91

47	92

48 def _LoadSizeInfoFromFile(file_obj):	93 def _LoadSizeInfoFromFile(file_obj):

49 """Loads a result from the given file."""	94 """Loads a size_info from the given file."""

50 lines = iter(file_obj)	95 lines = iter(file_obj)

51 actual_version = int(next(lines))	96 next(lines) # Comment line.

	97 actual_version = next(lines)[:-1]

52 assert actual_version == _SERIALIZATION_VERSION, (	98 assert actual_version == _SERIALIZATION_VERSION, (

53 'Version mismatch. Need to write some upgrade code.')	99 'Version mismatch. Need to write some upgrade code.')

	100 json_len = int(next(lines))

	101 json_str = file_obj.read(json_len)

	102 metadata = json.loads(json_str)

	103 timestamp = metadata.get('timestamp')

	104 if timestamp is not None:

	105 timestamp = datetime.datetime.utcfromtimestamp(timestamp)

	106 tag = metadata['tag']

	107 section_sizes = metadata['section_sizes']

54	108

55 section_sizes = ast.literal_eval(next(lines))	109 lines = iter(file_obj)

56 num_syms = int(next(lines))	110 next(lines) # newline after closing } of json.

57 symbol_list = [None] * num_syms	111

58 section_name = None	112 num_path_tuples = int(next(lines))

59 for i in xrange(num_syms):	113 path_tuples = [None] * num_path_tuples

60 line = next(lines)[:-1]	114 for i in xrange(num_path_tuples):

61 if '\t' not in line:	115 path_tuples[i] = next(lines)[:-1].split('\t')

62 section_name = line	116

	117 section_names = next(lines)[:-1].split('\t')

	118 section_counts = [int(c) for c in next(lines)[:-1].split('\t')]

	119

	120 def read_numeric(delta=False):

	121 ret = []

	122 delta_multiplier = int(delta)

	123 for _ in section_counts:

	124 value = 0

	125 fields = next(lines).split(' ')

	126 for i, f in enumerate(fields):

	127 value = value * delta_multiplier + int(f)

	128 fields[i] = value

	129 ret.append(fields)

	130 return ret

	131

	132 addresses = read_numeric(delta=True)

	133 sizes = read_numeric(delta=False)

	134 path_indices = read_numeric(delta=True)

	135

	136 symbol_list = [None] * sum(section_counts)

	137 symbol_idx = 0

	138 for section_index, cur_section_name in enumerate(section_names):

	139 for i in xrange(section_counts[section_index]):

63 line = next(lines)[:-1]	140 line = next(lines)[:-1]

64 new_sym = models.Symbol.__new__(models.Symbol)	141 is_anonymous = line.endswith('\t1')

65 parts = line.split('\t')	142 name = line[:-2] if is_anonymous else line

66 new_sym.section_name = section_name

67 new_sym.address = int(parts[0], 16)

68 new_sym.size = int(parts[1], 16)

69 new_sym.name = parts[2]

70 new_sym.path = parts[3]

71 new_sym.padding = 0 # Derived

72 new_sym.function_signature = None # Derived

73 symbol_list[i] = new_sym

74	143

75 return models.SizeInfo(models.SymbolGroup(symbol_list), section_sizes)	144 new_sym = models.Symbol.__new__(models.Symbol)

	145 new_sym.section_name = cur_section_name

	146 new_sym.address = addresses[section_index][i]

	147 new_sym.size = sizes[section_index][i]

	148 new_sym.name = name

	149 paths = path_tuples[path_indices[section_index][i]]

	150 new_sym.object_path = paths[0]

	151 new_sym.source_path = paths[1]

	152 new_sym.is_anonymous = is_anonymous

	153 new_sym.padding = 0 # Derived

	154 new_sym.full_name = None # Derived

	155 symbol_list[symbol_idx] = new_sym

	156 symbol_idx += 1

	157

	158 symbols = models.SymbolGroup(symbol_list)

	159 return models.SizeInfo(section_sizes, symbols, timestamp=timestamp, tag=tag)

76	160

77	161

78 def SaveSizeInfo(result, path):	162 def SaveSizeInfo(size_info, path):

79 with OpenMaybeGz(path, 'wb') as f:	163 """Saves \|size_info\| to \|path}."""

80 _SaveSizeInfoToFile(result, f)	164 if os.environ.get('MEASURE_GZIP') == '1':
	estevenson 2017/04/03 18:14:30 Was this for local development? Was this for local development? agrieve 2017/04/03 18:29:43 It was, but I'd like to keep it in, as it makes it Show quoted text On 2017/04/03 18:14:30, estevenson wrote: > Was this for local development? It was, but I'd like to keep it in, as it makes it easy to see how large each section of the file is.
	165 with gzip.open(path, 'wb') as f:

	166 _SaveSizeInfoToFile(size_info, f)

	167 else:

	168 # It is seconds faster to do gzip in a separate step. 6s -> 3.5s.

	169 stringio = cStringIO.StringIO()

	170 _SaveSizeInfoToFile(size_info, stringio)

	171

	172 logging.debug('Serialization complete. Gzipping...')

	173 stringio.seek(0)

	174 with gzip.open(path, 'wb') as f:

	175 shutil.copyfileobj(stringio, f)

81	176

82	177

83 def LoadSizeInfo(path):	178 def LoadSizeInfo(path):

84 with OpenMaybeGz(path) as f:	179 """Returns a SizeInfo loaded from \|path\|."""

	180 with gzip.open(path) as f:

85 return _LoadSizeInfoFromFile(f)	181 return _LoadSizeInfoFromFile(f)

OLD	NEW

« no previous file with comments | « tools/binary_size/describe.py ('k') | tools/binary_size/integration_test.py » ('j') | tools/binary_size/map2size.py » ('J')