Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(145)

Side by Side Diff: tools/binary_size/file_format.py

Issue 2791433004: //tools/binary_size: source_path information, change file format, fixes (Closed)
Patch Set: Use json in header. gzip as separate step. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2017 The Chromium Authors. All rights reserved. 1 # Copyright 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 """Deals with loading & saving .size files.""" 5 """Deals with loading & saving .size files."""
6 6
7 import ast 7 import cStringIO
8 import calendar
9 import collections
10 import datetime
8 import gzip 11 import gzip
12 import json
9 import models 13 import models
14 import logging
15 import os
16 import shutil
10 17
11 18
12 # File format version for .size files. 19 # File format version for .size files.
13 _SERIALIZATION_VERSION = 1 20 _SERIALIZATION_VERSION = 'Size File Format v1'
14 21
15 22
16 def EndsWithMaybeGz(path, suffix): 23 def _LogSize(file_obj, desc):
17 return path.endswith(suffix) or path.endswith(suffix + '.gz') 24 if not hasattr(file_obj, 'fileno'):
25 return
26 file_obj.flush()
27 size = os.fstat(file_obj.fileno()).st_size
28 logging.debug('File size with %s: %d' % (desc, size))
18 29
19 30
20 def OpenMaybeGz(path, mode=None): 31 def _SaveSizeInfoToFile(size_info, file_obj):
21 """Calls `gzip.open()` if |path| ends in ".gz", otherwise calls `open()`.""" 32 file_obj.write('# Created by //tools/binary_size\n')
22 if path.endswith('.gz'): 33 file_obj.write('%s\n' % _SERIALIZATION_VERSION)
23 if mode and 'w' in mode: 34 headers = {
24 return gzip.GzipFile(path, mode, 1) 35 'tag': size_info.tag,
25 return gzip.open(path, mode) 36 'section_sizes': size_info.section_sizes,
26 return open(path, mode or 'r') 37 }
38 if size_info.timestamp:
39 headers['timestamp'] = calendar.timegm(size_info.timestamp.timetuple())
40 metadata_str = json.dumps(headers, file_obj, indent=2, sort_keys=True)
41 file_obj.write('%d\n' % len(metadata_str))
42 file_obj.write(metadata_str)
43 file_obj.write('\n')
44 _LogSize(file_obj, 'header') # For libchrome: 570 bytes.
27 45
46 # Store a single copy of all paths and have them referenced by index.
47 # Using an OrderedDict makes the indices more repetitive (better compression).
48 path_tuples = collections.OrderedDict.fromkeys(
49 (s.object_path, s.source_path) for s in size_info.symbols)
50 for i, key in enumerate(path_tuples):
51 path_tuples[key] = i
52 file_obj.write('%d\n' % len(path_tuples))
53 file_obj.writelines('%s\t%s\n' % pair for pair in path_tuples)
54 _LogSize(file_obj, 'paths') # For libchrome, adds 200kb.
28 55
29 def _SaveSizeInfoToFile(result, file_obj): 56 # Symbol counts by section.
30 """Saves the result to the given file object.""" 57 by_section = size_info.symbols.GroupBySectionName().SortedByName()
31 # Store one bucket per line. 58 file_obj.write('%s\n' % '\t'.join(g.name for g in by_section))
32 file_obj.write('%d\n' % _SERIALIZATION_VERSION) 59 file_obj.write('%s\n' % '\t'.join(str(len(g)) for g in by_section))
33 file_obj.write('%r\n' % result.section_sizes) 60
34 file_obj.write('%d\n' % len(result.symbols)) 61 def write_numeric(func, delta=False):
35 prev_section_name = None 62 for group in by_section:
36 # Store symbol fields as tab-separated. 63 prev_value = 0
37 # Store only non-derived fields. 64 last_sym = group[-1]
38 for symbol in result.symbols: 65 for symbol in group:
39 if symbol.section_name != prev_section_name: 66 value = func(symbol)
40 file_obj.write('%s\n' % symbol.section_name) 67 if delta:
41 prev_section_name = symbol.section_name 68 value, prev_value = value - prev_value, value
42 # Don't write padding nor name since these are derived values. 69 file_obj.write(str(value))
43 file_obj.write('%x\t%x\t%s\t%s\n' % ( 70 if symbol is not last_sym:
44 symbol.address, symbol.size_without_padding, 71 file_obj.write(' ')
45 symbol.function_signature or symbol.name, symbol.path)) 72 file_obj.write('\n')
73
74 write_numeric(lambda s: s.address, delta=True)
75 _LogSize(file_obj, 'addresses') # For libchrome, adds 300kb.
76 # Do not write padding, it will be recalcualted from addresses on load.
77 write_numeric(lambda s: s.size_without_padding)
78 _LogSize(file_obj, 'sizes') # For libchrome, adds 300kb
79 write_numeric(lambda s: path_tuples[(s.object_path, s.source_path)],
80 delta=True)
81 _LogSize(file_obj, 'path indices') # For libchrome: adds 125kb.
82
83 for group in by_section:
84 for symbol in group:
85 # Do not write name when full_name exists. It will be derived on load.
86 file_obj.write(symbol.full_name or symbol.name)
87 if symbol.is_anonymous:
88 file_obj.write('\t1')
89 file_obj.write('\n')
90 _LogSize(file_obj, 'names (final)') # For libchrome: adds 3.5mb.
46 91
47 92
48 def _LoadSizeInfoFromFile(file_obj): 93 def _LoadSizeInfoFromFile(file_obj):
49 """Loads a result from the given file.""" 94 """Loads a size_info from the given file."""
50 lines = iter(file_obj) 95 lines = iter(file_obj)
51 actual_version = int(next(lines)) 96 next(lines) # Comment line.
97 actual_version = next(lines)[:-1]
52 assert actual_version == _SERIALIZATION_VERSION, ( 98 assert actual_version == _SERIALIZATION_VERSION, (
53 'Version mismatch. Need to write some upgrade code.') 99 'Version mismatch. Need to write some upgrade code.')
100 json_len = int(next(lines))
101 json_str = file_obj.read(json_len)
102 metadata = json.loads(json_str)
103 timestamp = metadata.get('timestamp')
104 if timestamp is not None:
105 timestamp = datetime.datetime.utcfromtimestamp(timestamp)
106 tag = metadata['tag']
107 section_sizes = metadata['section_sizes']
54 108
55 section_sizes = ast.literal_eval(next(lines)) 109 lines = iter(file_obj)
56 num_syms = int(next(lines)) 110 next(lines) # newline after closing } of json.
57 symbol_list = [None] * num_syms 111
58 section_name = None 112 num_path_tuples = int(next(lines))
59 for i in xrange(num_syms): 113 path_tuples = [None] * num_path_tuples
60 line = next(lines)[:-1] 114 for i in xrange(num_path_tuples):
61 if '\t' not in line: 115 path_tuples[i] = next(lines)[:-1].split('\t')
62 section_name = line 116
117 section_names = next(lines)[:-1].split('\t')
118 section_counts = [int(c) for c in next(lines)[:-1].split('\t')]
119
120 def read_numeric(delta=False):
121 ret = []
122 delta_multiplier = int(delta)
123 for _ in section_counts:
124 value = 0
125 fields = next(lines).split(' ')
126 for i, f in enumerate(fields):
127 value = value * delta_multiplier + int(f)
128 fields[i] = value
129 ret.append(fields)
130 return ret
131
132 addresses = read_numeric(delta=True)
133 sizes = read_numeric(delta=False)
134 path_indices = read_numeric(delta=True)
135
136 symbol_list = [None] * sum(section_counts)
137 symbol_idx = 0
138 for section_index, cur_section_name in enumerate(section_names):
139 for i in xrange(section_counts[section_index]):
63 line = next(lines)[:-1] 140 line = next(lines)[:-1]
64 new_sym = models.Symbol.__new__(models.Symbol) 141 is_anonymous = line.endswith('\t1')
65 parts = line.split('\t') 142 name = line[:-2] if is_anonymous else line
66 new_sym.section_name = section_name
67 new_sym.address = int(parts[0], 16)
68 new_sym.size = int(parts[1], 16)
69 new_sym.name = parts[2]
70 new_sym.path = parts[3]
71 new_sym.padding = 0 # Derived
72 new_sym.function_signature = None # Derived
73 symbol_list[i] = new_sym
74 143
75 return models.SizeInfo(models.SymbolGroup(symbol_list), section_sizes) 144 new_sym = models.Symbol.__new__(models.Symbol)
145 new_sym.section_name = cur_section_name
146 new_sym.address = addresses[section_index][i]
147 new_sym.size = sizes[section_index][i]
148 new_sym.name = name
149 paths = path_tuples[path_indices[section_index][i]]
150 new_sym.object_path = paths[0]
151 new_sym.source_path = paths[1]
152 new_sym.is_anonymous = is_anonymous
153 new_sym.padding = 0 # Derived
154 new_sym.full_name = None # Derived
155 symbol_list[symbol_idx] = new_sym
156 symbol_idx += 1
157
158 symbols = models.SymbolGroup(symbol_list)
159 return models.SizeInfo(section_sizes, symbols, timestamp=timestamp, tag=tag)
76 160
77 161
78 def SaveSizeInfo(result, path): 162 def SaveSizeInfo(size_info, path):
79 with OpenMaybeGz(path, 'wb') as f: 163 """Saves |size_info| to |path}."""
80 _SaveSizeInfoToFile(result, f) 164 if os.environ.get('MEASURE_GZIP') == '1':
estevenson 2017/04/03 18:14:30 Was this for local development?
agrieve 2017/04/03 18:29:43 It was, but I'd like to keep it in, as it makes it
165 with gzip.open(path, 'wb') as f:
166 _SaveSizeInfoToFile(size_info, f)
167 else:
168 # It is seconds faster to do gzip in a separate step. 6s -> 3.5s.
169 stringio = cStringIO.StringIO()
170 _SaveSizeInfoToFile(size_info, stringio)
171
172 logging.debug('Serialization complete. Gzipping...')
173 stringio.seek(0)
174 with gzip.open(path, 'wb') as f:
175 shutil.copyfileobj(stringio, f)
81 176
82 177
83 def LoadSizeInfo(path): 178 def LoadSizeInfo(path):
84 with OpenMaybeGz(path) as f: 179 """Returns a SizeInfo loaded from |path|."""
180 with gzip.open(path) as f:
85 return _LoadSizeInfoFromFile(f) 181 return _LoadSizeInfoFromFile(f)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698