OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Symbolize log file produced by cypgofile instrumentation. | 6 """Symbolize log file produced by cypgofile instrumentation. |
7 | 7 |
8 Given a log file and the binary being profiled (e.g. executable, shared | 8 Given a log file and the binary being profiled (e.g. executable, shared |
9 library), the script can produce three different outputs: 1) symbols for the | 9 library), the script can produce three different outputs: 1) symbols for the |
10 addresses, 2) function and line numbers for the addresses, or 3) an order file. | 10 addresses, 2) function and line numbers for the addresses, or 3) an order file. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
55 for call_line in call_lines: | 55 for call_line in call_lines: |
56 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) | 56 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) |
57 callee_id = call_line[2] | 57 callee_id = call_line[2] |
58 addr = int(call_line[3], 16) | 58 addr = int(call_line[3], 16) |
59 if vm_start < addr: | 59 if vm_start < addr: |
60 addr -= vm_start | 60 addr -= vm_start |
61 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) | 61 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) |
62 | 62 |
63 return call_info | 63 return call_info |
64 | 64 |
| 65 def GetStdOutputLines(cmd): |
| 66 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) |
| 67 output = p.communicate()[0] |
| 68 return output.split('\n') |
65 | 69 |
66 def ParseLibSymbols(lib_file): | 70 def ParseLibSymbols(lib_file): |
67 """Get output from running nm and greping for text symbols. | 71 """Get output from running nm and greping for text symbols. |
68 | 72 |
69 Args: | 73 Args: |
70 lib_file: the library or executable that contains the profiled code | 74 lib_file: the library or executable that contains the profiled code |
71 | 75 |
72 Returns: | 76 Returns: |
73 list of sorted unique addresses and corresponding size of function symbols | 77 list of sorted unique addresses and corresponding size of function symbols |
74 in lib_file and map of addresses to all symbols at a particular address | 78 in lib_file and map of addresses to all symbols at a particular address |
75 """ | 79 """ |
76 cmd = ['nm', '-S', '-n', lib_file] | 80 cmd = ['nm', '-S', '-n', lib_file] |
77 nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 81 nm_lines = GetStdOutputLines(cmd) |
78 output = nm_p.communicate()[0] | |
79 nm_lines = output.split('\n') | |
80 | 82 |
81 nm_symbols = [] | 83 nm_symbols = [] |
82 for nm_line in nm_lines: | 84 for nm_line in nm_lines: |
83 if any(str in nm_line for str in (' t ', ' W ', ' T ')): | 85 if any(str in nm_line for str in (' t ', ' W ', ' T ')): |
84 nm_symbols.append(nm_line) | 86 nm_symbols.append(nm_line) |
85 | 87 |
86 nm_index = 0 | 88 nm_index = 0 |
87 unique_addrs = [] | 89 unique_addrs = [] |
88 address_map = {} | 90 address_map = {} |
89 while nm_index < len(nm_symbols): | 91 while nm_index < len(nm_symbols): |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
163 | 165 |
164 def FindFunctions(addr, unique_addrs, address_map): | 166 def FindFunctions(addr, unique_addrs, address_map): |
165 """Find function symbol names at address addr.""" | 167 """Find function symbol names at address addr.""" |
166 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, | 168 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, |
167 unique_addrs)] | 169 unique_addrs)] |
168 | 170 |
169 | 171 |
170 def AddrToLine(addr, lib_file): | 172 def AddrToLine(addr, lib_file): |
171 """Use addr2line to determine line info of a particular address.""" | 173 """Use addr2line to determine line info of a particular address.""" |
172 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] | 174 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] |
173 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 175 output = GetStdOutputLines(cmd) |
174 output = (p.communicate()[0]).split('\n') | 176 assert(len(output) == 2) |
175 line = output[0] | 177 return ':'.join(output) |
176 index = 1 | |
177 while index < len(output): | |
178 line = line + ':' + output[index] | |
179 index += 1 | |
180 return line | |
181 | 178 |
| 179 def GetObjectFileNames(obj_dir): |
| 180 """ Gets the list of object files in the output directory. """ |
| 181 obj_files = [] |
| 182 for (dirpath, _, filenames) in os.walk(obj_dir): |
| 183 for file_name in filenames: |
| 184 if file_name.endswith('.o'): |
| 185 obj_files.append(os.path.join(dirpath, file_name)) |
| 186 return obj_files |
| 187 |
| 188 class WarningCollector(object): |
| 189 def __init__(self, max_warnings): |
| 190 self._warnings = 0 |
| 191 self._max_warnings = max_warnings |
| 192 |
| 193 def Write(self, message): |
| 194 if self._warnings < self._max_warnings: |
| 195 sys.stderr.write(message + '\n') |
| 196 self._warnings += 1 |
| 197 |
| 198 def WriteEnd(self, message): |
| 199 if self._warnings > self._max_warnings: |
| 200 sys.stderr.write(str(self._warnings - self._max_warnings) + |
| 201 ' more warnings for: ' + message + '\n') |
| 202 |
| 203 def SymbolToSection(obj_dir): |
| 204 """ Gets a mapping from symbol to linker section name by scanning all |
| 205 of the object files. """ |
| 206 object_files = GetObjectFileNames(obj_dir) |
| 207 symbol_to_section_map = {} |
| 208 symbol_warnings = WarningCollector(300) |
| 209 for obj_file in object_files: |
| 210 cmd = ['objdump', '-w', '-t', obj_file] |
| 211 symbol_lines = GetStdOutputLines(cmd) |
| 212 for symbol_line in symbol_lines: |
| 213 items = symbol_line.split() |
| 214 # All of the symbol lines we care about are in the form |
| 215 # 0000000000 g F .text.foo 000000000 [.hidden] foo |
| 216 # where g (global) might also be l (local) or w (weak). |
| 217 if len(items) > 4 and items[2] == 'F': |
| 218 # This symbol is a function |
| 219 symbol = items[len(items) - 1] |
| 220 if symbol.startswith('.LTHUNK'): |
| 221 continue |
| 222 section = items[3] |
| 223 if ((symbol in symbol_to_section_map) and |
| 224 (symbol_to_section_map[symbol] != section)): |
| 225 symbol_warnings.Write('WARNING: Symbol ' + symbol + |
| 226 ' in conflicting sections ' + section + |
| 227 ' and ' + symbol_to_section_map[symbol]) |
| 228 elif not section.startswith('.text.'): |
| 229 symbol_warnings.Write('WARNING: Symbol ' + symbol + |
| 230 ' in incorrect section ' + section) |
| 231 else: |
| 232 symbol_to_section_map[symbol] = section |
| 233 symbol_warnings.WriteEnd('bad sections') |
| 234 return symbol_to_section_map |
182 | 235 |
183 def main(): | 236 def main(): |
184 """Write output for profiled run to standard out. | 237 """Write output for profiled run to standard out. |
185 | 238 |
186 The format of the output depends on the output type specified as the third | 239 The format of the output depends on the output type specified as the third |
187 command line argument. The default output type is to symbolize the addresses | 240 command line argument. The default output type is to symbolize the addresses |
188 of the functions called. | 241 of the functions called. |
189 """ | 242 """ |
190 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') | 243 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') |
191 parser.add_option('-t', '--outputType', dest='output_type', | 244 parser.add_option('-t', '--outputType', dest='output_type', |
192 default='symbolize', type='string', | 245 default='symbolize', type='string', |
193 help='lineize or symbolize or orderfile') | 246 help='lineize or symbolize or orderfile') |
194 | 247 |
195 # Option for output type. The log file and lib file arguments are required | 248 # Option for output type. The log file and lib file arguments are required |
196 # by the script and therefore are not options. | 249 # by the script and therefore are not options. |
197 (options, args) = parser.parse_args() | 250 (options, args) = parser.parse_args() |
198 if len(args) != 2: | 251 if len(args) != 2: |
199 parser.error('expected 2 args: log_file lib_file') | 252 parser.error('expected 2 args: log_file lib_file') |
200 | 253 |
201 (log_file, lib_file) = args | 254 (log_file, lib_file) = args |
202 output_type = options.output_type | 255 output_type = options.output_type |
203 | 256 |
| 257 obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj')) |
| 258 |
204 log_file_lines = map(string.rstrip, open(log_file).readlines()) | 259 log_file_lines = map(string.rstrip, open(log_file).readlines()) |
205 call_info = ParseLogLines(log_file_lines) | 260 call_info = ParseLogLines(log_file_lines) |
206 (unique_addrs, address_map) = ParseLibSymbols(lib_file) | 261 (unique_addrs, address_map) = ParseLibSymbols(lib_file) |
207 | 262 |
208 # Check for duplicate addresses in the log file, and print a warning if | 263 # Check for duplicate addresses in the log file, and print a warning if |
209 # duplicates are found. The instrumentation that produces the log file | 264 # duplicates are found. The instrumentation that produces the log file |
210 # should only print the first time a function is entered. | 265 # should only print the first time a function is entered. |
211 addr_list = [] | 266 addr_list = [] |
212 for call in call_info: | 267 for call in call_info: |
213 addr = call[3] | 268 addr = call[3] |
214 if addr not in addr_list: | 269 if addr not in addr_list: |
215 addr_list.append(addr) | 270 addr_list.append(addr) |
216 else: | 271 else: |
217 print('WARNING: Address ' + hex(addr) + ' (line= ' + | 272 print('WARNING: Address ' + hex(addr) + ' (line= ' + |
218 AddrToLine(addr, lib_file) + ') already profiled.') | 273 AddrToLine(addr, lib_file) + ') already profiled.') |
219 | 274 |
| 275 symbol_to_section_map = SymbolToSection(obj_dir) |
| 276 |
| 277 unknown_symbol_warnings = WarningCollector(300) |
| 278 symbol_not_found_warnings = WarningCollector(300) |
220 for call in call_info: | 279 for call in call_info: |
| 280 addr = call[3] |
221 if output_type == 'lineize': | 281 if output_type == 'lineize': |
222 symbol = AddrToLine(call[3], lib_file) | 282 symbol = AddrToLine(addr, lib_file) |
223 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' | 283 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
224 + symbol) | 284 + symbol) |
225 elif output_type == 'orderfile': | 285 elif output_type == 'orderfile': |
226 try: | 286 try: |
227 symbols = FindFunctions(call[3], unique_addrs, address_map) | 287 symbols = FindFunctions(addr, unique_addrs, address_map) |
228 for symbol in symbols: | 288 for symbol in symbols: |
229 print '.text.' + symbol | 289 if symbol in symbol_to_section_map: |
| 290 print symbol_to_section_map[symbol] |
| 291 else: |
| 292 unknown_symbol_warnings.Write( |
| 293 'WARNING: No known section for symbol ' + symbol) |
230 print '' | 294 print '' |
231 except SymbolNotFoundException: | 295 except SymbolNotFoundException: |
232 sys.stderr.write('WARNING: Did not find function in binary. addr: ' | 296 symbol_not_found_warnings.Write( |
233 + hex(addr) + '\n') | 297 'WARNING: Did not find function in binary. addr: ' |
| 298 + hex(addr)) |
234 else: | 299 else: |
235 try: | 300 try: |
236 symbols = FindFunctions(call[3], unique_addrs, address_map) | 301 symbols = FindFunctions(addr, unique_addrs, address_map) |
237 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' | 302 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
238 + symbols[0]) | 303 + symbols[0]) |
239 first_symbol = True | 304 first_symbol = True |
240 for symbol in symbols: | 305 for symbol in symbols: |
241 if not first_symbol: | 306 if not first_symbol: |
242 print '\t\t\t\t\t' + symbol | 307 print '\t\t\t\t\t' + symbol |
243 else: | 308 else: |
244 first_symbol = False | 309 first_symbol = False |
245 except SymbolNotFoundException: | 310 except SymbolNotFoundException: |
246 sys.stderr.write('WARNING: Did not find function in binary. addr: ' | 311 symbol_not_found_warnings.Write( |
247 + hex(addr) + '\n') | 312 'WARNING: Did not find function in binary. addr: ' |
| 313 + hex(addr)) |
| 314 unknown_symbol_warnings.WriteEnd('no known section for symbol') |
| 315 symbol_not_found_warnings.WriteEnd('did not find function') |
248 | 316 |
249 if __name__ == '__main__': | 317 if __name__ == '__main__': |
250 main() | 318 main() |
OLD | NEW |