Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(311)

Side by Side Diff: tools/cygprofile/cyglog_to_orderfile.py

Issue 874683004: Refactor the symbolize step of the orderfile generation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Address comments. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tools/cygprofile/cyglog_to_orderfile_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/python
2 # Copyright 2015 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Symbolizes a log file produced by cyprofile instrumentation.
7
8 Given a log file and the binary being profiled, creates an orderfile.
9 """
10
11 import logging
12 import multiprocessing
13 import optparse
14 import os
15 import tempfile
16 import string
17 import sys
18
19 import symbol_extractor
20
21
22 def _ParseLogLines(log_file_lines):
23 """Parses a merged cyglog produced by mergetraces.py.
24
25 Args:
26 log_file_lines: array of lines in log file produced by profiled run
27 lib_name: library or executable containing symbols
28
29 Below is an example of a small log file:
30 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
31 secs usecs pid:threadid func
32 START
33 1314897086 795828 3587:1074648168 0x509e105c
34 1314897086 795874 3587:1074648168 0x509e0eb4
35 1314897086 796326 3587:1074648168 0x509e0e3c
36 1314897086 796552 3587:1074648168 0x509e07bc
37 END
38
39 Returns:
40 An ordered list of callee offsets.
41 """
42 call_lines = []
43 vm_start = 0
44 line = log_file_lines[0]
45 assert 'r-xp' in line
46 end_index = line.find('-')
47 vm_start = int(line[:end_index], 16)
48 for line in log_file_lines[3:]:
49 fields = line.split()
50 if len(fields) == 4:
51 call_lines.append(fields)
52 else:
53 assert fields[0] == 'END'
54 # Convert strings to int in fields.
55 call_info = []
56 for call_line in call_lines:
57 addr = int(call_line[3], 16)
58 if vm_start < addr:
59 addr -= vm_start
60 call_info.append(addr)
61 return call_info
62
63
64 def _GroupLibrarySymbolInfosByOffset(lib_filename):
65 """Returns a dict {offset: [SymbolInfo]} from a library."""
66 symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename)
67 return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)
68
69
70 class SymbolNotFoundException(Exception):
71 def __init__(self, value):
72 super(SymbolNotFoundException, self).__init__(value)
73 self.value = value
74
75 def __str__(self):
76 return repr(self.value)
77
78
79 def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset):
80 """Finds all SymbolInfo at a given offset.
81
82 Args:
83 offset_to_symbol_infos: {offset: [SymbolInfo]}
84 offset: offset to look the symbols at
85
86 Returns:
87 The list of SymbolInfo at the given offset
88
89 Raises:
90 SymbolNotFoundException if the offset doesn't match any symbol.
91 """
92 if offset in offset_to_symbol_infos:
93 return offset_to_symbol_infos[offset]
94 elif offset % 2 and (offset - 1) in offset_to_symbol_infos:
95 # On ARM, odd addresses are used to signal thumb instruction. They are
96 # generated by setting the LSB to 1 (see
97 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfj hia.html).
98 # TODO(lizeb): Make sure this hack doesn't propagate to other archs.
99 return offset_to_symbol_infos[offset - 1]
100 else:
101 raise SymbolNotFoundException(offset)
102
103
104 class WarningCollector(object):
105 """Collect warnings, but limit the number printed to a set value."""
106 def __init__(self, max_warnings):
107 self._warnings = 0
108 self._max_warnings = max_warnings
109
110 def Write(self, message):
111 if self._warnings < self._max_warnings:
112 logging.warning(message)
113 self._warnings += 1
114
115 def WriteEnd(self, message):
116 if self._warnings > self._max_warnings:
117 logging.warning('%d more warnings for: %s' % (
118 self._warnings - self._max_warnings, message))
119
120
121 def _GetObjectFileNames(obj_dir):
122 """Returns the list of object files in a directory."""
123 obj_files = []
124 for (dirpath, _, filenames) in os.walk(obj_dir):
125 for file_name in filenames:
126 if file_name.endswith('.o'):
127 obj_files.append(os.path.join(dirpath, file_name))
128 return obj_files
129
130
131 def _AllSymbolInfos(object_filenames):
132 """Returns a list of SymbolInfo from an iterable of filenames."""
133 pool = multiprocessing.Pool()
134 # Hopefully the object files are in the page cache at this step, so IO should
135 # not be a problem (hence no concurrency limit on the pool).
136 symbol_infos_nested = pool.map(
137 symbol_extractor.SymbolInfosFromBinary, object_filenames)
138 result = []
139 for symbol_infos in symbol_infos_nested:
140 result += symbol_infos
141 return result
142
143
144 def _GetSymbolToSectionMapFromObjectFiles(obj_dir):
145 """ Creates a mapping from symbol to linker section name by scanning all
146 the object files.
147 """
148 object_files = _GetObjectFileNames(obj_dir)
149 symbol_to_section_map = {}
150 symbol_warnings = WarningCollector(300)
151 symbol_infos = _AllSymbolInfos(object_files)
152 for symbol_info in symbol_infos:
153 symbol = symbol_info.name
154 if symbol.startswith('.LTHUNK'):
155 continue
156 section = symbol_info.section
157 if ((symbol in symbol_to_section_map) and
158 (symbol_to_section_map[symbol] != symbol_info.section)):
159 symbol_warnings.Write('Symbol ' + symbol +
160 ' in conflicting sections ' + section +
161 ' and ' + symbol_to_section_map[symbol])
162 elif not section.startswith('.text'):
163 symbol_warnings.Write('Symbol ' + symbol +
164 ' in incorrect section ' + section)
165 else:
166 symbol_to_section_map[symbol] = section
167 symbol_warnings.WriteEnd('bad sections')
168 return symbol_to_section_map
169
170
171 def _WarnAboutDuplicates(offsets):
172 """Warns about duplicate offsets.
173
174 Args:
175 offsets: list of offsets to check for duplicates
176
177 Returns:
178 True if there are no duplicates, False otherwise.
179 """
180 seen_offsets = set()
181 ok = True
182 for offset in offsets:
183 if offset not in seen_offsets:
184 seen_offsets.add(offset)
185 else:
186 ok = False
187 logging.warning('Duplicate offset: ' + hex(offset))
188 return ok
189
190
191 def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map,
192 output_file):
193 """Outputs the orderfile to output_file.
194
195 Args:
196 offsets: Iterable of offsets to match to section names
197 offset_to_symbol_infos: {offset: [SymbolInfo]}
198 symbol_to_section_map: {name: section}
199 output_file: file-like object to write the results to
200 """
201 success = True
202 unknown_symbol_warnings = WarningCollector(300)
203 symbol_not_found_warnings = WarningCollector(300)
204 for offset in offsets:
205 try:
206 symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset)
207 for symbol_info in symbol_infos:
208 if symbol_info.name in symbol_to_section_map:
209 output_file.write(symbol_to_section_map[symbol_info.name] + '\n')
210 else:
211 unknown_symbol_warnings.Write(
212 'No known section for symbol ' + symbol_info.name)
213 except SymbolNotFoundException:
214 symbol_not_found_warnings.Write(
215 'Did not find function in binary. offset: ' + hex(offset))
216 success = False
217 unknown_symbol_warnings.WriteEnd('no known section for symbol.')
218 symbol_not_found_warnings.WriteEnd('symbol not found in the binary.')
219 return success
220
221
222 def main():
223 if len(sys.argv) != 4:
224 logging.error('Usage: cyglog_to_orderfile.py <merged_cyglog> '
225 '<library> <output_filename>')
226 return 1
227 (log_filename, lib_filename, output_filename) = sys.argv[1:]
228
229 obj_dir = os.path.abspath(os.path.join(
230 os.path.dirname(lib_filename), '../obj'))
231
232 log_file_lines = map(string.rstrip, open(log_filename).readlines())
233 offsets = _ParseLogLines(log_file_lines)
234 _WarnAboutDuplicates(offsets)
235
236 offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename)
237 symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir)
238
239 success = False
240 temp_filename = None
241 output_file = None
242 try:
243 (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename))
244 output_file = os.fdopen(fd, 'w')
245 ok = _OutputOrderfile(
246 offsets, offset_to_symbol_infos, symbol_to_section_map, output_file)
247 output_file.close()
248 os.rename(temp_filename, output_filename)
249 temp_filename = None
250 success = ok
251 finally:
252 if output_file:
253 output_file.close()
254 if temp_filename:
255 os.remove(temp_filename)
256
257 return 0 if success else 1
258
259
260 if __name__ == '__main__':
261 logging.basicConfig(level=logging.INFO)
262 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/cygprofile/cyglog_to_orderfile_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698