Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(179)

Side by Side Diff: tools/cygprofile/symbolize.py

Issue 796423003: Check that all symbolized methods in the output orderfile are in their own linker section in the or… (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Symbolize log file produced by cypgofile instrumentation. 6 """Symbolize log file produced by cypgofile instrumentation.
7 7
8 Given a log file and the binary being profiled (e.g. executable, shared 8 Given a log file and the binary being profiled (e.g. executable, shared
9 library), the script can produce three different outputs: 1) symbols for the 9 library), the script can produce three different outputs: 1) symbols for the
10 addresses, 2) function and line numbers for the addresses, or 3) an order file. 10 addresses, 2) function and line numbers for the addresses, or 3) an order file.
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
55 for call_line in call_lines: 55 for call_line in call_lines:
56 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) 56 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2])
57 callee_id = call_line[2] 57 callee_id = call_line[2]
58 addr = int(call_line[3], 16) 58 addr = int(call_line[3], 16)
59 if vm_start < addr: 59 if vm_start < addr:
60 addr -= vm_start 60 addr -= vm_start
61 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) 61 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr))
62 62
63 return call_info 63 return call_info
64 64
65 def GetStdOutputLines(cmd):
66 p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
67 output = p.communicate()[0]
68 return output.split('\n')
65 69
66 def ParseLibSymbols(lib_file): 70 def ParseLibSymbols(lib_file):
67 """Get output from running nm and greping for text symbols. 71 """Get output from running nm and greping for text symbols.
68 72
69 Args: 73 Args:
70 lib_file: the library or executable that contains the profiled code 74 lib_file: the library or executable that contains the profiled code
71 75
72 Returns: 76 Returns:
73 list of sorted unique addresses and corresponding size of function symbols 77 list of sorted unique addresses and corresponding size of function symbols
74 in lib_file and map of addresses to all symbols at a particular address 78 in lib_file and map of addresses to all symbols at a particular address
75 """ 79 """
76 cmd = ['nm', '-S', '-n', lib_file] 80 cmd = ['nm', '-S', '-n', lib_file]
77 nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 81 nm_lines = GetStdOutputLines(cmd)
78 output = nm_p.communicate()[0]
79 nm_lines = output.split('\n')
80 82
81 nm_symbols = [] 83 nm_symbols = []
82 for nm_line in nm_lines: 84 for nm_line in nm_lines:
83 if any(str in nm_line for str in (' t ', ' W ', ' T ')): 85 if any(str in nm_line for str in (' t ', ' W ', ' T ')):
84 nm_symbols.append(nm_line) 86 nm_symbols.append(nm_line)
85 87
86 nm_index = 0 88 nm_index = 0
87 unique_addrs = [] 89 unique_addrs = []
88 address_map = {} 90 address_map = {}
89 while nm_index < len(nm_symbols): 91 while nm_index < len(nm_symbols):
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
163 165
164 def FindFunctions(addr, unique_addrs, address_map): 166 def FindFunctions(addr, unique_addrs, address_map):
165 """Find function symbol names at address addr.""" 167 """Find function symbol names at address addr."""
166 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, 168 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1,
167 unique_addrs)] 169 unique_addrs)]
168 170
169 171
170 def AddrToLine(addr, lib_file): 172 def AddrToLine(addr, lib_file):
171 """Use addr2line to determine line info of a particular address.""" 173 """Use addr2line to determine line info of a particular address."""
172 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] 174 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)]
173 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 175 output = GetStdOutputLines(cmd)
174 output = (p.communicate()[0]).split('\n') 176 assert(len(output) == 2)
175 line = output[0] 177 return ':'.join(output)
176 index = 1
177 while index < len(output):
178 line = line + ':' + output[index]
179 index += 1
180 return line
181 178
179 def GetObjectFileNames(obj_dir):
180 """ Gets the list of object files in the output directory. """
181 obj_files = []
182 for (dirpath, _, filenames) in os.walk(obj_dir):
183 for file_name in filenames:
184 if file_name.endswith('.o'):
185 obj_files.append(os.path.join(dirpath, file_name))
186 return obj_files
187
188 class WarningCollector(object):
189 def __init__(self, max_warnings):
190 self._warnings = 0
191 self._max_warnings = max_warnings
192
193 def Write(self, message):
194 if self._warnings < self._max_warnings:
195 sys.stderr.write(message + '\n')
196 self._warnings += 1
197
198 def WriteEnd(self, message):
199 if self._warnings > self._max_warnings:
200 sys.stderr.write(str(self._warnings - self._max_warnings) +
201 ' more warnings for: ' + message + '\n')
202
203 def SymbolToSection(obj_dir):
204 """ Gets a mapping from symbol to linker section name by scanning all
205 of the object files. """
206 object_files = GetObjectFileNames(obj_dir)
207 symbol_to_section_map = {}
208 symbol_warnings = WarningCollector(300)
209 for obj_file in object_files:
210 cmd = ['objdump', '-w', '-t', obj_file]
211 symbol_lines = GetStdOutputLines(cmd)
212 for symbol_line in symbol_lines:
213 items = symbol_line.split()
214 # All of the symbol lines we care about are in the form
215 # 0000000000 g F .text.foo 000000000 [.hidden] foo
216 # where g (global) might also be l (local) or w (weak).
217 if len(items) > 4 and items[2] == 'F':
218 # This symbol is a function
219 symbol = items[len(items) - 1]
220 if symbol.startswith('.LTHUNK'):
221 continue
222 section = items[3]
223 if ((symbol in symbol_to_section_map) and
224 (symbol_to_section_map[symbol] != section)):
225 symbol_warnings.Write('WARNING: Symbol ' + symbol +
226 ' in conflicting sections ' + section +
227 ' and ' + symbol_to_section_map[symbol])
228 elif not section.startswith('.text.'):
229 symbol_warnings.Write('WARNING: Symbol ' + symbol +
230 ' in incorrect section ' + section)
231 else:
232 symbol_to_section_map[symbol] = section
233 symbol_warnings.WriteEnd('bad sections')
234 return symbol_to_section_map
182 235
183 def main(): 236 def main():
184 """Write output for profiled run to standard out. 237 """Write output for profiled run to standard out.
185 238
186 The format of the output depends on the output type specified as the third 239 The format of the output depends on the output type specified as the third
187 command line argument. The default output type is to symbolize the addresses 240 command line argument. The default output type is to symbolize the addresses
188 of the functions called. 241 of the functions called.
189 """ 242 """
190 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') 243 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file')
191 parser.add_option('-t', '--outputType', dest='output_type', 244 parser.add_option('-t', '--outputType', dest='output_type',
192 default='symbolize', type='string', 245 default='symbolize', type='string',
193 help='lineize or symbolize or orderfile') 246 help='lineize or symbolize or orderfile')
194 247
195 # Option for output type. The log file and lib file arguments are required 248 # Option for output type. The log file and lib file arguments are required
196 # by the script and therefore are not options. 249 # by the script and therefore are not options.
197 (options, args) = parser.parse_args() 250 (options, args) = parser.parse_args()
198 if len(args) != 2: 251 if len(args) != 2:
199 parser.error('expected 2 args: log_file lib_file') 252 parser.error('expected 2 args: log_file lib_file')
200 253
201 (log_file, lib_file) = args 254 (log_file, lib_file) = args
202 output_type = options.output_type 255 output_type = options.output_type
203 256
257 obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj'))
258
204 log_file_lines = map(string.rstrip, open(log_file).readlines()) 259 log_file_lines = map(string.rstrip, open(log_file).readlines())
205 call_info = ParseLogLines(log_file_lines) 260 call_info = ParseLogLines(log_file_lines)
206 (unique_addrs, address_map) = ParseLibSymbols(lib_file) 261 (unique_addrs, address_map) = ParseLibSymbols(lib_file)
207 262
208 # Check for duplicate addresses in the log file, and print a warning if 263 # Check for duplicate addresses in the log file, and print a warning if
209 # duplicates are found. The instrumentation that produces the log file 264 # duplicates are found. The instrumentation that produces the log file
210 # should only print the first time a function is entered. 265 # should only print the first time a function is entered.
211 addr_list = [] 266 addr_list = []
212 for call in call_info: 267 for call in call_info:
213 addr = call[3] 268 addr = call[3]
214 if addr not in addr_list: 269 if addr not in addr_list:
215 addr_list.append(addr) 270 addr_list.append(addr)
216 else: 271 else:
217 print('WARNING: Address ' + hex(addr) + ' (line= ' + 272 print('WARNING: Address ' + hex(addr) + ' (line= ' +
218 AddrToLine(addr, lib_file) + ') already profiled.') 273 AddrToLine(addr, lib_file) + ') already profiled.')
219 274
275 symbol_to_section_map = SymbolToSection(obj_dir)
276
277 unknown_symbol_warnings = WarningCollector(300)
278 symbol_not_found_warnings = WarningCollector(300)
220 for call in call_info: 279 for call in call_info:
280 addr = call[3]
221 if output_type == 'lineize': 281 if output_type == 'lineize':
222 symbol = AddrToLine(call[3], lib_file) 282 symbol = AddrToLine(addr, lib_file)
223 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' 283 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
224 + symbol) 284 + symbol)
225 elif output_type == 'orderfile': 285 elif output_type == 'orderfile':
226 try: 286 try:
227 symbols = FindFunctions(call[3], unique_addrs, address_map) 287 symbols = FindFunctions(addr, unique_addrs, address_map)
228 for symbol in symbols: 288 for symbol in symbols:
229 print '.text.' + symbol 289 if symbol in symbol_to_section_map:
290 print symbol_to_section_map[symbol]
291 else:
292 unknown_symbol_warnings.Write(
293 'WARNING: No known section for symbol ' + symbol)
230 print '' 294 print ''
231 except SymbolNotFoundException: 295 except SymbolNotFoundException:
232 sys.stderr.write('WARNING: Did not find function in binary. addr: ' 296 symbol_not_found_warnings.Write(
233 + hex(addr) + '\n') 297 'WARNING: Did not find function in binary. addr: '
298 + hex(addr))
234 else: 299 else:
235 try: 300 try:
236 symbols = FindFunctions(call[3], unique_addrs, address_map) 301 symbols = FindFunctions(addr, unique_addrs, address_map)
237 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' 302 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
238 + symbols[0]) 303 + symbols[0])
239 first_symbol = True 304 first_symbol = True
240 for symbol in symbols: 305 for symbol in symbols:
241 if not first_symbol: 306 if not first_symbol:
242 print '\t\t\t\t\t' + symbol 307 print '\t\t\t\t\t' + symbol
243 else: 308 else:
244 first_symbol = False 309 first_symbol = False
245 except SymbolNotFoundException: 310 except SymbolNotFoundException:
246 sys.stderr.write('WARNING: Did not find function in binary. addr: ' 311 symbol_not_found_warnings.Write(
247 + hex(addr) + '\n') 312 'WARNING: Did not find function in binary. addr: '
313 + hex(addr))
314 unknown_symbol_warnings.WriteEnd('no known section for symbol')
315 symbol_not_found_warnings.WriteEnd('did not find function')
248 316
249 if __name__ == '__main__': 317 if __name__ == '__main__':
250 main() 318 main()
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698