OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Symbolize log file produced by cypgofile instrumentation. | 6 """Symbolize log file produced by cypgofile instrumentation. |
7 | 7 |
8 Given a log file and the binary being profiled (e.g. executable, shared | 8 Given a log file and the binary being profiled (e.g. executable, shared |
9 library), the script can produce three different outputs: 1) symbols for the | 9 library), the script can produce three different outputs: 1) symbols for the |
10 addresses, 2) function and line numbers for the addresses, or 3) an order file. | 10 addresses, 2) function and line numbers for the addresses, or 3) an order file. |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
56 for call_line in call_lines: | 56 for call_line in call_lines: |
57 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) | 57 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) |
58 callee_id = call_line[2] | 58 callee_id = call_line[2] |
59 addr = int(call_line[3], 16) | 59 addr = int(call_line[3], 16) |
60 if vm_start < addr: | 60 if vm_start < addr: |
61 addr -= vm_start | 61 addr -= vm_start |
62 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) | 62 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) |
63 | 63 |
64 return call_info | 64 return call_info |
65 | 65 |
66 def GetStdOutputLines(cmd): | |
67 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
68 output = p.communicate()[0] | |
69 return output.split('\n') | |
66 | 70 |
67 def ParseLibSymbols(lib_file): | 71 def ParseLibSymbols(lib_file): |
68 """Get output from running nm and greping for text symbols. | 72 """Get output from running nm and greping for text symbols. |
69 | 73 |
70 Args: | 74 Args: |
71 lib_file: the library or executable that contains the profiled code | 75 lib_file: the library or executable that contains the profiled code |
72 | 76 |
73 Returns: | 77 Returns: |
74 list of sorted unique addresses and corresponding size of function symbols | 78 list of sorted unique addresses and corresponding size of function symbols |
75 in lib_file and map of addresses to all symbols at a particular address | 79 in lib_file and map of addresses to all symbols at a particular address |
76 """ | 80 """ |
77 cmd = ['nm', '-S', '-n', lib_file] | 81 cmd = ['nm', '-S', '-n', lib_file] |
78 nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 82 nm_lines = GetStdOutputLines(cmd) |
79 output = nm_p.communicate()[0] | |
80 nm_lines = output.split('\n') | |
81 | 83 |
82 nm_symbols = [] | 84 nm_symbols = [] |
83 for nm_line in nm_lines: | 85 for nm_line in nm_lines: |
84 if any(str in nm_line for str in (' t ', ' W ', ' T ')): | 86 if any(str in nm_line for str in (' t ', ' W ', ' T ')): |
85 nm_symbols.append(nm_line) | 87 nm_symbols.append(nm_line) |
86 | 88 |
87 nm_index = 0 | 89 nm_index = 0 |
88 unique_addrs = [] | 90 unique_addrs = [] |
89 address_map = {} | 91 address_map = {} |
90 while nm_index < len(nm_symbols): | 92 while nm_index < len(nm_symbols): |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
163 | 165 |
164 def FindFunctions(addr, unique_addrs, address_map): | 166 def FindFunctions(addr, unique_addrs, address_map): |
165 """Find function symbol names at address addr.""" | 167 """Find function symbol names at address addr.""" |
166 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, | 168 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, |
167 unique_addrs)] | 169 unique_addrs)] |
168 | 170 |
169 | 171 |
170 def AddrToLine(addr, lib_file): | 172 def AddrToLine(addr, lib_file): |
171 """Use addr2line to determine line info of a particular address.""" | 173 """Use addr2line to determine line info of a particular address.""" |
172 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] | 174 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] |
173 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 175 output = GetStdOutputLines(cmd) |
174 output = (p.communicate()[0]).split('\n') | 176 assert(len(output) == 2) |
175 line = output[0] | 177 return ':'.join(output) |
176 index = 1 | |
177 while index < len(output): | |
178 line = line + ':' + output[index] | |
179 index += 1 | |
180 return line | |
181 | 178 |
179 def GetObjectFileNames(obj_dir): | |
180 """ Gets the list of object files in the output directory. """ | |
181 obj_files = [] | |
182 for (dirpath, dirnames, filenames) in os.walk(obj_dir): | |
183 for file_name in filenames: | |
184 if file_name.endswith('.o'): | |
185 obj_files.append(os.path.join(dirpath, file_name)) | |
186 return obj_files | |
187 | |
188 class WarningCollector(object): | |
189 def __init__(self, max_warnings): | |
190 self._warnings = 0 | |
191 self._max_warnings = max_warnings | |
192 | |
193 def Write(self, message): | |
194 if self._warnings < self._max_warnings: | |
195 sys.stderr.write(message) | |
196 self._warnings += 1 | |
197 | |
198 def WriteEnd(self, message): | |
199 if self._warnings > self._max_warnings: | |
200 sys.stderr.write(str(self._warnings - self._max_warnings) + | |
201 ' more warnings for: ' + message) | |
pasko
2014/12/11 10:25:30
does not have a '\n'
azarchs
2014/12/11 17:01:15
Done.
| |
202 | |
203 def SymbolToSection(obj_dir): | |
204 """ Gets a mapping from symbol to linker section name by scanning all | |
205 of the object files. """ | |
206 object_files = GetObjectFileNames(obj_dir) | |
207 symbol_to_section_map = {} | |
208 symbol_warnings = WarningCollector(300) | |
209 for obj_file in object_files: | |
210 cmd = ['objdump', '-w', '-t', obj_file] | |
211 symbol_lines = GetStdOutputLines(cmd) | |
212 for symbol_line in symbol_lines: | |
213 items = symbol_line.split() | |
214 # All of the symbol lines we care about are in the form | |
215 # 0000000000 g F .text.foo 000000000 [.hidden] foo | |
216 # where g (global) might also be l (local) or w (weak). | |
217 if len(items) > 4 and items[2] == 'F': | |
218 # This symbol is a function | |
219 symbol = items[len(items) - 1] | |
220 if symbol.startswith('.LTHUNK'): | |
221 continue | |
222 section = items[3] | |
223 if ((symbol in symbol_to_section_map) and | |
224 (symbol_to_section_map[symbol] != section)): | |
225 symbol_warnings.Write('WARNING: Symbol ' + symbol + | |
226 ' in conflicting sections ' + section + | |
227 ' and ' + symbol_to_section_map[symbol] + '\n') | |
pasko
2014/12/11 10:25:30
please move the '\n' to WarningCollector.Write, le
azarchs
2014/12/11 17:01:14
Done.
| |
228 elif not section.startswith('.text.'): | |
229 symbol_warnings.Write('WARNING: Symbol ' + symbol + | |
230 ' in incorrect section ' + section + '\n') | |
231 else: | |
232 symbol_to_section_map[symbol] = section | |
233 symbol_warnings.WriteEnd('bad sections') | |
234 return symbol_to_section_map | |
182 | 235 |
183 def main(): | 236 def main(): |
184 """Write output for profiled run to standard out. | 237 """Write output for profiled run to standard out. |
185 | 238 |
186 The format of the output depends on the output type specified as the third | 239 The format of the output depends on the output type specified as the third |
187 command line argument. The default output type is to symbolize the addresses | 240 command line argument. The default output type is to symbolize the addresses |
188 of the functions called. | 241 of the functions called. |
189 """ | 242 """ |
190 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') | 243 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') |
191 parser.add_option('-t', '--outputType', dest='output_type', | 244 parser.add_option('-t', '--outputType', dest='output_type', |
192 default='symbolize', type='string', | 245 default='symbolize', type='string', |
193 help='lineize or symbolize or orderfile') | 246 help='lineize or symbolize or orderfile') |
194 | 247 |
195 # Option for output type. The log file and lib file arguments are required | 248 # Option for output type. The log file and lib file arguments are required |
196 # by the script and therefore are not options. | 249 # by the script and therefore are not options. |
197 (options, args) = parser.parse_args() | 250 (options, args) = parser.parse_args() |
198 if len(args) != 2: | 251 if len(args) != 2: |
199 parser.error('expected 2 args: log_file lib_file') | 252 parser.error('expected 2 args: log_file lib_file') |
200 | 253 |
201 (log_file, lib_file) = args | 254 (log_file, lib_file) = args |
202 output_type = options.output_type | 255 output_type = options.output_type |
203 | 256 |
257 obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj')) | |
258 | |
204 lib_name = lib_file.split('/')[-1].strip() | 259 lib_name = lib_file.split('/')[-1].strip() |
205 log_file_lines = map(string.rstrip, open(log_file).readlines()) | 260 log_file_lines = map(string.rstrip, open(log_file).readlines()) |
206 call_info = ParseLogLines(log_file_lines) | 261 call_info = ParseLogLines(log_file_lines) |
207 (unique_addrs, address_map) = ParseLibSymbols(lib_file) | 262 (unique_addrs, address_map) = ParseLibSymbols(lib_file) |
208 | 263 |
209 # Check for duplicate addresses in the log file, and print a warning if | 264 # Check for duplicate addresses in the log file, and print a warning if |
210 # duplicates are found. The instrumentation that produces the log file | 265 # duplicates are found. The instrumentation that produces the log file |
211 # should only print the first time a function is entered. | 266 # should only print the first time a function is entered. |
212 addr_list = [] | 267 addr_list = [] |
213 for call in call_info: | 268 for call in call_info: |
214 addr = call[3] | 269 addr = call[3] |
215 if addr not in addr_list: | 270 if addr not in addr_list: |
216 addr_list.append(addr) | 271 addr_list.append(addr) |
217 else: | 272 else: |
218 print('WARNING: Address ' + hex(addr) + ' (line= ' + | 273 print('WARNING: Address ' + hex(addr) + ' (line= ' + |
219 AddrToLine(addr, lib_file) + ') already profiled.') | 274 AddrToLine(addr, lib_file) + ') already profiled.') |
220 | 275 |
276 symbol_to_section_map = SymbolToSection(obj_dir) | |
277 | |
278 unknown_symbol_warnings = WarningCollector(300) | |
279 symbol_not_found_warnings = WarningCollector(300) | |
221 for call in call_info: | 280 for call in call_info: |
281 addr = call[3] | |
222 if output_type == 'lineize': | 282 if output_type == 'lineize': |
223 symbol = AddrToLine(call[3], lib_file) | 283 symbol = AddrToLine(addr, lib_file) |
224 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' | 284 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
225 + symbol) | 285 + symbol) |
226 elif output_type == 'orderfile': | 286 elif output_type == 'orderfile': |
227 try: | 287 try: |
228 symbols = FindFunctions(call[3], unique_addrs, address_map) | 288 symbols = FindFunctions(addr, unique_addrs, address_map) |
229 for symbol in symbols: | 289 for symbol in symbols: |
230 print '.text.' + symbol | 290 if symbol in symbol_to_section_map: |
291 print symbol_to_section_map[symbol] | |
292 else: | |
293 unknown_symbol_warnings.write( | |
pasko
2014/12/11 10:25:30
s/write/Write/ here and below
azarchs
2014/12/11 17:01:14
Done.
| |
294 'WARNING: No known section for symbol ' + symbol + '\n') | |
231 print '' | 295 print '' |
232 except SymbolNotFoundException as e: | 296 except SymbolNotFoundException as e: |
233 sys.stderr.write('WARNING: Did not find function in binary. addr: ' | 297 symbol_not_found_warnings.write( |
234 + hex(addr) + '\n') | 298 'WARNING: Did not find function in binary. addr: ' |
299 + hex(addr) + '\n') | |
235 else: | 300 else: |
236 try: | 301 try: |
237 symbols = FindFunctions(call[3], unique_addrs, address_map) | 302 symbols = FindFunctions(addr, unique_addrs, address_map) |
238 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' | 303 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
239 + symbols[0]) | 304 + symbols[0]) |
240 first_symbol = True | 305 first_symbol = True |
241 for symbol in symbols: | 306 for symbol in symbols: |
242 if not first_symbol: | 307 if not first_symbol: |
243 print '\t\t\t\t\t' + symbol | 308 print '\t\t\t\t\t' + symbol |
244 else: | 309 else: |
245 first_symbol = False | 310 first_symbol = False |
246 except SymbolNotFoundException as e: | 311 except SymbolNotFoundException as e: |
247 sys.stderr.write('WARNING: Did not find function in binary. addr: ' | 312 symbol_not_found_warnings.write( |
248 + hex(addr) + '\n') | 313 'WARNING: Did not find function in binary. addr: ' |
314 + hex(addr) + '\n') | |
315 unknown_symbol_warnings.WriteEnd('no known section for symbol') | |
316 symbol_not_found_warnings.WriteEnd('did not find function') | |
249 | 317 |
250 if __name__ == '__main__': | 318 if __name__ == '__main__': |
251 main() | 319 main() |
OLD | NEW |