Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: tools/cygprofile/cyglog_to_orderfile.py

Issue 874683004: Refactor the symbolize step of the orderfile generation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Address comments. Change filenames. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Symbolize log file produced by cyprofile instrumentation.
7
8 Given a log file and the binary being profiled, create an order file.
9 """
10
11 import logging
12 import multiprocessing
13 import optparse
14 import os
15 import tempfile
16 import string
17 import sys
18
19 import symbol_extractor
20
21
22 def _ParseLogLines(log_file_lines):
23 """Parse a merge cyglog produced by mergetraces.py.
pasko 2015/01/30 19:31:33 s/Parse/Parses/ (and similarly below) (sorry forgo
Benoit L 2015/02/02 09:28:14 Ah, I was consistently doing the wrong thing. Done
24
25 Args:
26 log_file_lines: array of lines in log file produced by profiled run
27 lib_name: library or executable containing symbols
28
29 Below is an example of a small log file:
30 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
31 secs usecs pid:threadid func
32 START
33 1314897086 795828 3587:1074648168 0x509e105c
34 1314897086 795874 3587:1074648168 0x509e0eb4
35 1314897086 796326 3587:1074648168 0x509e0e3c
36 1314897086 796552 3587:1074648168 0x509e07bc
37 END
38
39 Returns:
40 A list of tuples of the format (sec, usec, call id, function address called)
41 """
42 call_lines = []
43 vm_start = 0
44 line = log_file_lines[0]
45 assert 'r-xp' in line
46 end_index = line.find('-')
47 vm_start = int(line[:end_index], 16)
48 for line in log_file_lines[2:]:
49 fields = line.split()
50 if len(fields) == 4:
51 call_lines.append(fields)
pasko 2015/01/30 19:31:33 else: assert fields[0] == 'END' wdyt?
Benoit L 2015/02/02 09:28:14 There are lines at the start that don't follow thi
pasko-google - do not use 2015/02/02 10:35:49 But but .. you can replace the '2' with '3' in the
52
53 # Convert strings to int in fields.
54 call_info = []
55 for call_line in call_lines:
56 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2])
pasko 2015/01/30 19:31:33 Why are we doing all this? We only use the ordered
Benoit L 2015/02/02 09:28:14 Carried over from the previous version of the code
57 callee_id = call_line[2]
58 addr = int(call_line[3], 16)
59 if vm_start < addr:
60 addr -= vm_start
61 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr))
62
63 return call_info
64
65
66 def _GroupLibrarySymbolInfosByOffset(lib_filename):
67 """Return a dict {offset: [SymbolInfo]} from a library."""
68 symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename)
69 return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)
70
71
72 class SymbolNotFoundException(Exception):
73 def __init__(self, value):
74 super(SymbolNotFoundException, self).__init__(value)
75 self.value = value
76
77 def __str__(self):
78 return repr(self.value)
79
80
81 def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset):
pasko 2015/01/30 19:31:33 not sure how strongly I feel about this. Usually t
Benoit L 2015/02/02 09:28:14 Acknowledged.
82 """Find all SymbolInfo at a given offset.
83
84 Args:
85 offset_to_symbol_infos: {offset: [SymbolInfo]}
86 offset: offset to look the symbols at
87
88 Returns:
89 The list of SymbolInfo at the given offset
90
91 Raises:
92 SymbolNotFoundException if the offset doesn't match any symbol.
93 """
94 if offset in offset_to_symbol_infos:
95 return offset_to_symbol_infos[offset]
96 elif offset % 2 and (offset - 1) in offset_to_symbol_infos:
97 # On ARM, odd addresses are used to signal thumb instruction. They are
98 # generated by setting the LSB to 1 (see
99 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfj hia.html).
100 # TODO(lizeb): Make sure this hack doesn't propagate to other archs.
101 return offset_to_symbol_infos[offset - 1]
102 else:
103 raise SymbolNotFoundException(offset)
104
105
106 class WarningCollector(object):
107 """Collect warnings, but limit the number printed to a set value."""
108 def __init__(self, max_warnings):
109 self._warnings = 0
110 self._max_warnings = max_warnings
111
112 def Write(self, message):
113 if self._warnings < self._max_warnings:
114 logging.warning(message)
115 self._warnings += 1
116
117 def WriteEnd(self, message):
118 if self._warnings > self._max_warnings:
119 logging.warning('%d more warnings for %s' % (
120 self._warnings - self._max_warnings, message))
121
122
123 def _GetObjectFileNames(obj_dir):
124 """Return the list of object files in a directory."""
125 obj_files = []
126 for (dirpath, _, filenames) in os.walk(obj_dir):
127 for file_name in filenames:
128 if file_name.endswith('.o'):
129 obj_files.append(os.path.join(dirpath, file_name))
130 return obj_files
131
132
133 def _AllSymbolInfos(object_filenames):
134 """Return a list of SymbolInfo from an iterable of filenames."""
135 pool = multiprocessing.Pool()
136 symbol_infos_nested = pool.map(
137 symbol_extractor.SymbolInfosFromBinary, object_filenames)
pasko 2015/01/30 19:31:32 maybe just write a comment saying that we expect e
Benoit L 2015/02/02 09:28:14 Done.
138 result = []
139 for symbol_infos in symbol_infos_nested:
140 result += symbol_infos
141 return result
142
143
144 def _GetSymbolToSectionMapFromObjectFiles(obj_dir):
145 """ Create a mapping from symbol to linker section name by scanning all
146 the object files.
147 """
148 object_files = _GetObjectFileNames(obj_dir)
149 symbol_to_section_map = {}
150 symbol_warnings = WarningCollector(300)
151 symbol_infos = _AllSymbolInfos(object_files)
152 symbol_infos = [s for s in symbol_infos if not s.name.startswith('.LTHUNK')]
pasko 2015/01/30 19:31:33 I think it would be more readable if it lives in t
Benoit L 2015/02/02 09:28:14 Done.
153 for symbol_info in symbol_infos:
154 symbol = symbol_info.name
155 section = symbol_info.section
156 if ((symbol in symbol_to_section_map) and
157 (symbol_to_section_map[symbol] != symbol_info.section)):
158 symbol_warnings.Write('Symbol ' + symbol +
159 ' in conflicting sections ' + section +
160 ' and ' + symbol_to_section_map[symbol])
161 elif not section.startswith('.text'):
162 symbol_warnings.Write('Symbol ' + symbol +
163 ' in incorrect section ' + section)
pasko 2015/01/30 19:31:32 I forgot why it would be incorrect for a symbol to
Benoit L 2015/02/02 09:28:15 It is the contrary, print a warning if the section
pasko-google - do not use 2015/02/02 10:35:49 ah, sorry, did not spot the 'not' in the condition
164 else:
165 symbol_to_section_map[symbol] = section
166 symbol_warnings.WriteEnd('bad sections')
167 return symbol_to_section_map
168
169
170 def _WarnAboutDuplicates(offsets):
171 """Warn about duplicate offsets.
172
173 Args:
174 offsets: list of offsets to check for duplicates
175
176 Returns:
177 True if there are no duplicates, False otherwise.
178 """
179 seen_offsets = set()
180 ok = True
181 for offset in offsets:
182 if offset not in seen_offsets:
183 seen_offsets.add(offset)
184 else:
185 ok = False
186 logging.warning('Duplicate offset: ' + hex(offset))
187 return ok
188
189
190 def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map,
191 output_file):
192 """Output the order file to output_file.
pasko 2015/01/30 19:31:33 s/order file/orderfile/
Benoit L 2015/02/02 09:28:14 Done.
193
194 Args:
195 offsets: Iterable of offsets to match to section names
196 offset_to_symbol_infos: {offset: [SymbolInfo]}
197 symbol_to_section_map: {name: section}
198 output_file: file-like object to write the results to
199 """
200 success = True
201 unknown_symbol_warnings = WarningCollector(300)
202 symbol_not_found_warnings = WarningCollector(300)
203 for offset in offsets:
204 try:
205 symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset)
206 for symbol_info in symbol_infos:
207 if symbol_info.name in symbol_to_section_map:
208 output_file.write(symbol_to_section_map[symbol_info.name] + '\n')
209 else:
210 unknown_symbol_warnings.Write(
211 'No known section for symbol ' + symbol_info.name)
212 except SymbolNotFoundException:
213 symbol_not_found_warnings.Write(
214 'Did not find function in binary. offset: ' + hex(offset))
215 success = False
216 unknown_symbol_warnings.WriteEnd('no known section for symbol')
217 symbol_not_found_warnings.WriteEnd('did not find function')
pasko 2015/01/30 19:31:32 "X more warnings for did not find function" sounds
Benoit L 2015/02/02 09:28:14 Done.
218 return success
219
220
221 def main():
222 if len(sys.argv) != 4:
223 logging.error('Usage: cyglog_to_orderfile.py <merged_cyglog> '
224 '<library> <output_filename>')
225 return 1
226 (log_file, lib_file, output_filename) = sys.argv[1:]
pasko 2015/01/30 19:31:33 nit: they are all file names, but two of them are
Benoit L 2015/02/02 09:28:15 Done.
227
228 obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj'))
229
230 log_file_lines = map(string.rstrip, open(log_file).readlines())
231 call_info = _ParseLogLines(log_file_lines)
pasko 2015/01/30 19:31:33 s/call_info/call_infos/
Benoit L 2015/02/02 09:28:15 Done.
232 offsets = [call[3] for call in call_info]
233 offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_file)
234
235 _WarnAboutDuplicates(offsets)
236
237 symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir)
238 success = False
239 temp_filename = None
240 output_file = None
241 try:
242 (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename))
243 output_file = os.fdopen(fd, 'w')
244 ok = _OutputOrderfile(offsets, offset_to_symbol_infos,
245 symbol_to_section_map, output_file)
pasko 2015/01/30 19:31:33 nit: less whitespace
Benoit L 2015/02/02 09:28:14 Done.
246 output_file.close()
247 os.rename(temp_filename, output_filename)
248 temp_filename = None
249 success = ok
250 finally:
251 if output_file:
252 output_file.close()
253 if temp_filename:
254 os.remove(temp_filename)
255
256 return 0 if success else 1
257
258
259 if __name__ == '__main__':
260 logging.basicConfig(level=logging.INFO)
261 sys.exit(main())
pasko 2015/01/30 19:31:32 so, symbol_extractor.CreateNameToSymbolInfo() is n
Benoit L 2015/02/02 09:28:15 Used later on. :-)
OLDNEW
« no previous file with comments | « no previous file | tools/cygprofile/cyglog_to_orderfile_unittest.py » ('j') | tools/cygprofile/cyglog_to_orderfile_unittest.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698