Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(89)

Side by Side Diff: tools/cygprofile/symbolize.py

Issue 784333002: Check that all symbolized methods in the output orderfile are in their own linker section in the or… (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 # Copyright 2013 The Chromium Authors. All rights reserved. 2 # Copyright 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Symbolize log file produced by cypgofile instrumentation. 6 """Symbolize log file produced by cypgofile instrumentation.
7 7
8 Given a log file and the binary being profiled (e.g. executable, shared 8 Given a log file and the binary being profiled (e.g. executable, shared
9 library), the script can produce three different outputs: 1) symbols for the 9 library), the script can produce three different outputs: 1) symbols for the
10 addresses, 2) function and line numbers for the addresses, or 3) an order file. 10 addresses, 2) function and line numbers for the addresses, or 3) an order file.
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
56 for call_line in call_lines: 56 for call_line in call_lines:
57 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) 57 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2])
58 callee_id = call_line[2] 58 callee_id = call_line[2]
59 addr = int(call_line[3], 16) 59 addr = int(call_line[3], 16)
60 if vm_start < addr: 60 if vm_start < addr:
61 addr -= vm_start 61 addr -= vm_start
62 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) 62 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr))
63 63
64 return call_info 64 return call_info
65 65
66 def GetOutputLines(cmd):
pasko 2014/12/10 12:07:05 we are ignoring stderr (which is arguable also an
azarchs 2014/12/10 17:13:33 Done.
67 p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
68 output = p.communicate()[0]
69 return output.split('\n')
66 70
67 def ParseLibSymbols(lib_file): 71 def ParseLibSymbols(lib_file):
68 """Get output from running nm and greping for text symbols. 72 """Get output from running nm and greping for text symbols.
69 73
70 Args: 74 Args:
71 lib_file: the library or executable that contains the profiled code 75 lib_file: the library or executable that contains the profiled code
72 76
73 Returns: 77 Returns:
74 list of sorted unique addresses and corresponding size of function symbols 78 list of sorted unique addresses and corresponding size of function symbols
75 in lib_file and map of addresses to all symbols at a particular address 79 in lib_file and map of addresses to all symbols at a particular address
76 """ 80 """
77 cmd = ['nm', '-S', '-n', lib_file] 81 cmd = ['nm', '-S', '-n', lib_file]
78 nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 82 nm_lines = GetOutputLines(cmd)
79 output = nm_p.communicate()[0]
80 nm_lines = output.split('\n')
81 83
82 nm_symbols = [] 84 nm_symbols = []
83 for nm_line in nm_lines: 85 for nm_line in nm_lines:
84 if any(str in nm_line for str in (' t ', ' W ', ' T ')): 86 if any(str in nm_line for str in (' t ', ' W ', ' T ')):
85 nm_symbols.append(nm_line) 87 nm_symbols.append(nm_line)
86 88
87 nm_index = 0 89 nm_index = 0
88 unique_addrs = [] 90 unique_addrs = []
89 address_map = {} 91 address_map = {}
90 while nm_index < len(nm_symbols): 92 while nm_index < len(nm_symbols):
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
163 165
164 def FindFunctions(addr, unique_addrs, address_map): 166 def FindFunctions(addr, unique_addrs, address_map):
165 """Find function symbol names at address addr.""" 167 """Find function symbol names at address addr."""
166 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, 168 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1,
167 unique_addrs)] 169 unique_addrs)]
168 170
169 171
170 def AddrToLine(addr, lib_file): 172 def AddrToLine(addr, lib_file):
171 """Use addr2line to determine line info of a particular address.""" 173 """Use addr2line to determine line info of a particular address."""
172 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] 174 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)]
173 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 175 output = GetOutputLines(cmd)
174 output = (p.communicate()[0]).split('\n')
175 line = output[0] 176 line = output[0]
176 index = 1 177 index = 1
177 while index < len(output): 178 while index < len(output):
pasko 2014/12/10 12:07:05 with the given arguments addr2line always outputs
azarchs 2014/12/10 17:13:33 Done.
178 line = line + ':' + output[index] 179 line = line + ':' + output[index]
179 index += 1 180 index += 1
180 return line 181 return line
181 182
183 def ObjectFiles(obj_dir):
pasko 2014/12/10 12:07:05 GetObjectFiles
azarchs 2014/12/10 17:13:33 Done.
184 """ Gets the list of object files in the output folder """
pasko 2014/12/10 12:07:05 this is on linux, so s/folder/directory./ :)
azarchs 2014/12/10 17:13:33 Done.
185 obj_files = []
186 for (dirpath, dirnames, filenames) in os.walk(obj_dir):
187 for file_name in filenames:
188 if file_name.endswith('.o'):
189 obj_files.append(os.path.join(dirpath, file_name))
190 return obj_files
191
192 def SymbolToSection(obj_dir):
193 """ Gets a mapping from symbol to linker section name by scanning all
194 of the object files """
pasko 2014/12/10 12:07:05 s/files/files./
195 object_files = ObjectFiles(obj_dir)
196 symbol_map = {}
197 for obj_file in object_files:
198 cmd = ['objdump', '-w', '-t', obj_file]
199 objects = GetOutputLines(cmd)
pasko 2014/12/10 12:07:05 in lunix/binutils land 'object' and 'object file'
azarchs 2014/12/10 17:13:33 Done.
200 for object_line in objects:
201 items = object_line.split()
pasko 2014/12/10 12:07:05 it is non-obvious that simple splitting like this
azarchs 2014/12/10 17:13:33 Done.
202 if len(items) > 4 and items[2] == 'F':
203 # This symbol is a function
204 symbol = items[len(items) - 1]
pasko 2014/12/10 12:07:05 more pythonic, but arguably less readable: symbol
azarchs 2014/12/10 17:13:33 Style guide says nothing on the subject. I prefer
pasko 2014/12/10 17:56:09 Acknowledged.
205 if symbol.startswith('.LTHUNK'):
206 continue
207 section = items[3]
208 if symbol in symbol_map and symbol_map[symbol] != section:
209 sys.stderr.write('WARNING: Symbol ' + symbol +
210 ' in conflicting sections ' + section +
pasko 2014/12/10 12:07:05 I really don't see a problem with having a single
azarchs 2014/12/10 17:13:33 It doesn't currently happen. If it did, which sec
211 ' and ' + symbol_map[symbol] + '\n')
212 elif not section.startswith('.text.'):
213 sys.stderr.write('WARNING: Symbol ' + symbol +
pasko 2014/12/10 12:07:04 should we limit the number of messages like this t
azarchs 2014/12/10 17:13:33 Done.
214 ' in incorrect section ' + section + '\n')
215 else:
216 symbol_map[symbol] = section
pasko 2014/12/10 12:07:05 section_map or symbol_to_section_map would be bett
azarchs 2014/12/10 17:13:33 Done.
217 return symbol_map
182 218
183 def main(): 219 def main():
184 """Write output for profiled run to standard out. 220 """Write output for profiled run to standard out.
185 221
186 The format of the output depends on the output type specified as the third 222 The format of the output depends on the output type specified as the third
187 command line argument. The default output type is to symbolize the addresses 223 command line argument. The default output type is to symbolize the addresses
188 of the functions called. 224 of the functions called.
189 """ 225 """
190 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') 226 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file')
191 parser.add_option('-t', '--outputType', dest='output_type', 227 parser.add_option('-t', '--outputType', dest='output_type',
192 default='symbolize', type='string', 228 default='symbolize', type='string',
193 help='lineize or symbolize or orderfile') 229 help='lineize or symbolize or orderfile')
194 230
195 # Option for output type. The log file and lib file arguments are required 231 # Option for output type. The log file and lib file arguments are required
196 # by the script and therefore are not options. 232 # by the script and therefore are not options.
197 (options, args) = parser.parse_args() 233 (options, args) = parser.parse_args()
198 if len(args) != 2: 234 if len(args) != 2:
199 parser.error('expected 2 args: log_file lib_file') 235 parser.error('expected 2 args: log_file lib_file')
200 236
201 (log_file, lib_file) = args 237 (log_file, lib_file) = args
202 output_type = options.output_type 238 output_type = options.output_type
203 239
240 obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj'))
241
204 lib_name = lib_file.split('/')[-1].strip() 242 lib_name = lib_file.split('/')[-1].strip()
205 log_file_lines = map(string.rstrip, open(log_file).readlines()) 243 log_file_lines = map(string.rstrip, open(log_file).readlines())
206 call_info = ParseLogLines(log_file_lines) 244 call_info = ParseLogLines(log_file_lines)
207 (unique_addrs, address_map) = ParseLibSymbols(lib_file) 245 (unique_addrs, address_map) = ParseLibSymbols(lib_file)
208 246
209 # Check for duplicate addresses in the log file, and print a warning if 247 # Check for duplicate addresses in the log file, and print a warning if
210 # duplicates are found. The instrumentation that produces the log file 248 # duplicates are found. The instrumentation that produces the log file
211 # should only print the first time a function is entered. 249 # should only print the first time a function is entered.
212 addr_list = [] 250 addr_list = []
213 for call in call_info: 251 for call in call_info:
214 addr = call[3] 252 addr = call[3]
215 if addr not in addr_list: 253 if addr not in addr_list:
216 addr_list.append(addr) 254 addr_list.append(addr)
217 else: 255 else:
218 print('WARNING: Address ' + hex(addr) + ' (line= ' + 256 print('WARNING: Address ' + hex(addr) + ' (line= ' +
219 AddrToLine(addr, lib_file) + ') already profiled.') 257 AddrToLine(addr, lib_file) + ') already profiled.')
220 258
259 symbol_map = SymbolToSection(obj_dir)
260
221 for call in call_info: 261 for call in call_info:
222 if output_type == 'lineize': 262 if output_type == 'lineize':
223 symbol = AddrToLine(call[3], lib_file) 263 symbol = AddrToLine(call[3], lib_file)
224 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' 264 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
225 + symbol) 265 + symbol)
226 elif output_type == 'orderfile': 266 elif output_type == 'orderfile':
227 try: 267 try:
228 symbols = FindFunctions(call[3], unique_addrs, address_map) 268 symbols = FindFunctions(call[3], unique_addrs, address_map)
229 for symbol in symbols: 269 for symbol in symbols:
230 print '.text.' + symbol 270 if symbol in symbol_map:
271 print symbol_map[symbol]
272 else:
273 sys.stderr.write('WARNING: Unknown symbol ' + symbol + '\n')
pasko 2014/12/10 12:07:05 so this happens when a symbol is in the final libr
azarchs 2014/12/10 17:13:33 Done.
231 print '' 274 print ''
232 except SymbolNotFoundException as e: 275 except SymbolNotFoundException as e:
233 sys.stderr.write('WARNING: Did not find function in binary. addr: ' 276 sys.stderr.write('WARNING: Did not find function in binary. addr: '
234 + hex(addr) + '\n') 277 + hex(addr) + '\n')
235 else: 278 else:
236 try: 279 try:
237 symbols = FindFunctions(call[3], unique_addrs, address_map) 280 symbols = FindFunctions(call[3], unique_addrs, address_map)
238 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' 281 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t'
239 + symbols[0]) 282 + symbols[0])
240 first_symbol = True 283 first_symbol = True
241 for symbol in symbols: 284 for symbol in symbols:
242 if not first_symbol: 285 if not first_symbol:
243 print '\t\t\t\t\t' + symbol 286 print '\t\t\t\t\t' + symbol
244 else: 287 else:
245 first_symbol = False 288 first_symbol = False
246 except SymbolNotFoundException as e: 289 except SymbolNotFoundException as e:
247 sys.stderr.write('WARNING: Did not find function in binary. addr: ' 290 sys.stderr.write('WARNING: Did not find function in binary. addr: '
248 + hex(addr) + '\n') 291 + hex(addr) + '\n')
249 292
250 if __name__ == '__main__': 293 if __name__ == '__main__':
251 main() 294 main()
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698