OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Symbolize log file produced by cypgofile instrumentation. | 6 """Symbolize log file produced by cypgofile instrumentation. |
7 | 7 |
8 Given a log file and the binary being profiled (e.g. executable, shared | 8 Given a log file and the binary being profiled (e.g. executable, shared |
9 library), the script can produce three different outputs: 1) symbols for the | 9 library), the script can produce three different outputs: 1) symbols for the |
10 addresses, 2) function and line numbers for the addresses, or 3) an order file. | 10 addresses, 2) function and line numbers for the addresses, or 3) an order file. |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
56 for call_line in call_lines: | 56 for call_line in call_lines: |
57 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) | 57 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) |
58 callee_id = call_line[2] | 58 callee_id = call_line[2] |
59 addr = int(call_line[3], 16) | 59 addr = int(call_line[3], 16) |
60 if vm_start < addr: | 60 if vm_start < addr: |
61 addr -= vm_start | 61 addr -= vm_start |
62 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) | 62 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) |
63 | 63 |
64 return call_info | 64 return call_info |
65 | 65 |
66 def GetOutputLines(cmd): | |
pasko
2014/12/10 12:07:05
we are ignoring stderr (which is arguable also an
azarchs
2014/12/10 17:13:33
Done.
| |
67 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
68 output = p.communicate()[0] | |
69 return output.split('\n') | |
66 | 70 |
67 def ParseLibSymbols(lib_file): | 71 def ParseLibSymbols(lib_file): |
68 """Get output from running nm and greping for text symbols. | 72 """Get output from running nm and greping for text symbols. |
69 | 73 |
70 Args: | 74 Args: |
71 lib_file: the library or executable that contains the profiled code | 75 lib_file: the library or executable that contains the profiled code |
72 | 76 |
73 Returns: | 77 Returns: |
74 list of sorted unique addresses and corresponding size of function symbols | 78 list of sorted unique addresses and corresponding size of function symbols |
75 in lib_file and map of addresses to all symbols at a particular address | 79 in lib_file and map of addresses to all symbols at a particular address |
76 """ | 80 """ |
77 cmd = ['nm', '-S', '-n', lib_file] | 81 cmd = ['nm', '-S', '-n', lib_file] |
78 nm_p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 82 nm_lines = GetOutputLines(cmd) |
79 output = nm_p.communicate()[0] | |
80 nm_lines = output.split('\n') | |
81 | 83 |
82 nm_symbols = [] | 84 nm_symbols = [] |
83 for nm_line in nm_lines: | 85 for nm_line in nm_lines: |
84 if any(str in nm_line for str in (' t ', ' W ', ' T ')): | 86 if any(str in nm_line for str in (' t ', ' W ', ' T ')): |
85 nm_symbols.append(nm_line) | 87 nm_symbols.append(nm_line) |
86 | 88 |
87 nm_index = 0 | 89 nm_index = 0 |
88 unique_addrs = [] | 90 unique_addrs = [] |
89 address_map = {} | 91 address_map = {} |
90 while nm_index < len(nm_symbols): | 92 while nm_index < len(nm_symbols): |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
163 | 165 |
164 def FindFunctions(addr, unique_addrs, address_map): | 166 def FindFunctions(addr, unique_addrs, address_map): |
165 """Find function symbol names at address addr.""" | 167 """Find function symbol names at address addr.""" |
166 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, | 168 return address_map[BinarySearchAddresses(addr, 0, len(unique_addrs) - 1, |
167 unique_addrs)] | 169 unique_addrs)] |
168 | 170 |
169 | 171 |
170 def AddrToLine(addr, lib_file): | 172 def AddrToLine(addr, lib_file): |
171 """Use addr2line to determine line info of a particular address.""" | 173 """Use addr2line to determine line info of a particular address.""" |
172 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] | 174 cmd = ['addr2line', '-f', '-e', lib_file, hex(addr)] |
173 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) | 175 output = GetOutputLines(cmd) |
174 output = (p.communicate()[0]).split('\n') | |
175 line = output[0] | 176 line = output[0] |
176 index = 1 | 177 index = 1 |
177 while index < len(output): | 178 while index < len(output): |
pasko
2014/12/10 12:07:05
with the given arguments addr2line always outputs
azarchs
2014/12/10 17:13:33
Done.
| |
178 line = line + ':' + output[index] | 179 line = line + ':' + output[index] |
179 index += 1 | 180 index += 1 |
180 return line | 181 return line |
181 | 182 |
183 def ObjectFiles(obj_dir): | |
pasko
2014/12/10 12:07:05
GetObjectFiles
azarchs
2014/12/10 17:13:33
Done.
| |
184 """ Gets the list of object files in the output folder """ | |
pasko
2014/12/10 12:07:05
this is on linux, so s/folder/directory./ :)
azarchs
2014/12/10 17:13:33
Done.
| |
185 obj_files = [] | |
186 for (dirpath, dirnames, filenames) in os.walk(obj_dir): | |
187 for file_name in filenames: | |
188 if file_name.endswith('.o'): | |
189 obj_files.append(os.path.join(dirpath, file_name)) | |
190 return obj_files | |
191 | |
192 def SymbolToSection(obj_dir): | |
193 """ Gets a mapping from symbol to linker section name by scanning all | |
194 of the object files """ | |
pasko
2014/12/10 12:07:05
s/files/files./
| |
195 object_files = ObjectFiles(obj_dir) | |
196 symbol_map = {} | |
197 for obj_file in object_files: | |
198 cmd = ['objdump', '-w', '-t', obj_file] | |
199 objects = GetOutputLines(cmd) | |
pasko
2014/12/10 12:07:05
in lunix/binutils land 'object' and 'object file'
azarchs
2014/12/10 17:13:33
Done.
| |
200 for object_line in objects: | |
201 items = object_line.split() | |
pasko
2014/12/10 12:07:05
it is non-obvious that simple splitting like this
azarchs
2014/12/10 17:13:33
Done.
| |
202 if len(items) > 4 and items[2] == 'F': | |
203 # This symbol is a function | |
204 symbol = items[len(items) - 1] | |
pasko
2014/12/10 12:07:05
more pythonic, but arguably less readable:
symbol
azarchs
2014/12/10 17:13:33
Style guide says nothing on the subject. I prefer
pasko
2014/12/10 17:56:09
Acknowledged.
| |
205 if symbol.startswith('.LTHUNK'): | |
206 continue | |
207 section = items[3] | |
208 if symbol in symbol_map and symbol_map[symbol] != section: | |
209 sys.stderr.write('WARNING: Symbol ' + symbol + | |
210 ' in conflicting sections ' + section + | |
pasko
2014/12/10 12:07:05
I really don't see a problem with having a single
azarchs
2014/12/10 17:13:33
It doesn't currently happen. If it did, which sec
| |
211 ' and ' + symbol_map[symbol] + '\n') | |
212 elif not section.startswith('.text.'): | |
213 sys.stderr.write('WARNING: Symbol ' + symbol + | |
pasko
2014/12/10 12:07:04
should we limit the number of messages like this t
azarchs
2014/12/10 17:13:33
Done.
| |
214 ' in incorrect section ' + section + '\n') | |
215 else: | |
216 symbol_map[symbol] = section | |
pasko
2014/12/10 12:07:05
section_map or symbol_to_section_map would be bett
azarchs
2014/12/10 17:13:33
Done.
| |
217 return symbol_map | |
182 | 218 |
183 def main(): | 219 def main(): |
184 """Write output for profiled run to standard out. | 220 """Write output for profiled run to standard out. |
185 | 221 |
186 The format of the output depends on the output type specified as the third | 222 The format of the output depends on the output type specified as the third |
187 command line argument. The default output type is to symbolize the addresses | 223 command line argument. The default output type is to symbolize the addresses |
188 of the functions called. | 224 of the functions called. |
189 """ | 225 """ |
190 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') | 226 parser = optparse.OptionParser('usage: %prog [options] log_file lib_file') |
191 parser.add_option('-t', '--outputType', dest='output_type', | 227 parser.add_option('-t', '--outputType', dest='output_type', |
192 default='symbolize', type='string', | 228 default='symbolize', type='string', |
193 help='lineize or symbolize or orderfile') | 229 help='lineize or symbolize or orderfile') |
194 | 230 |
195 # Option for output type. The log file and lib file arguments are required | 231 # Option for output type. The log file and lib file arguments are required |
196 # by the script and therefore are not options. | 232 # by the script and therefore are not options. |
197 (options, args) = parser.parse_args() | 233 (options, args) = parser.parse_args() |
198 if len(args) != 2: | 234 if len(args) != 2: |
199 parser.error('expected 2 args: log_file lib_file') | 235 parser.error('expected 2 args: log_file lib_file') |
200 | 236 |
201 (log_file, lib_file) = args | 237 (log_file, lib_file) = args |
202 output_type = options.output_type | 238 output_type = options.output_type |
203 | 239 |
240 obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj')) | |
241 | |
204 lib_name = lib_file.split('/')[-1].strip() | 242 lib_name = lib_file.split('/')[-1].strip() |
205 log_file_lines = map(string.rstrip, open(log_file).readlines()) | 243 log_file_lines = map(string.rstrip, open(log_file).readlines()) |
206 call_info = ParseLogLines(log_file_lines) | 244 call_info = ParseLogLines(log_file_lines) |
207 (unique_addrs, address_map) = ParseLibSymbols(lib_file) | 245 (unique_addrs, address_map) = ParseLibSymbols(lib_file) |
208 | 246 |
209 # Check for duplicate addresses in the log file, and print a warning if | 247 # Check for duplicate addresses in the log file, and print a warning if |
210 # duplicates are found. The instrumentation that produces the log file | 248 # duplicates are found. The instrumentation that produces the log file |
211 # should only print the first time a function is entered. | 249 # should only print the first time a function is entered. |
212 addr_list = [] | 250 addr_list = [] |
213 for call in call_info: | 251 for call in call_info: |
214 addr = call[3] | 252 addr = call[3] |
215 if addr not in addr_list: | 253 if addr not in addr_list: |
216 addr_list.append(addr) | 254 addr_list.append(addr) |
217 else: | 255 else: |
218 print('WARNING: Address ' + hex(addr) + ' (line= ' + | 256 print('WARNING: Address ' + hex(addr) + ' (line= ' + |
219 AddrToLine(addr, lib_file) + ') already profiled.') | 257 AddrToLine(addr, lib_file) + ') already profiled.') |
220 | 258 |
259 symbol_map = SymbolToSection(obj_dir) | |
260 | |
221 for call in call_info: | 261 for call in call_info: |
222 if output_type == 'lineize': | 262 if output_type == 'lineize': |
223 symbol = AddrToLine(call[3], lib_file) | 263 symbol = AddrToLine(call[3], lib_file) |
224 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' | 264 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
225 + symbol) | 265 + symbol) |
226 elif output_type == 'orderfile': | 266 elif output_type == 'orderfile': |
227 try: | 267 try: |
228 symbols = FindFunctions(call[3], unique_addrs, address_map) | 268 symbols = FindFunctions(call[3], unique_addrs, address_map) |
229 for symbol in symbols: | 269 for symbol in symbols: |
230 print '.text.' + symbol | 270 if symbol in symbol_map: |
271 print symbol_map[symbol] | |
272 else: | |
273 sys.stderr.write('WARNING: Unknown symbol ' + symbol + '\n') | |
pasko
2014/12/10 12:07:05
so this happens when a symbol is in the final libr
azarchs
2014/12/10 17:13:33
Done.
| |
231 print '' | 274 print '' |
232 except SymbolNotFoundException as e: | 275 except SymbolNotFoundException as e: |
233 sys.stderr.write('WARNING: Did not find function in binary. addr: ' | 276 sys.stderr.write('WARNING: Did not find function in binary. addr: ' |
234 + hex(addr) + '\n') | 277 + hex(addr) + '\n') |
235 else: | 278 else: |
236 try: | 279 try: |
237 symbols = FindFunctions(call[3], unique_addrs, address_map) | 280 symbols = FindFunctions(call[3], unique_addrs, address_map) |
238 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' | 281 print(str(call[0]) + ' ' + str(call[1]) + '\t' + str(call[2]) + '\t' |
239 + symbols[0]) | 282 + symbols[0]) |
240 first_symbol = True | 283 first_symbol = True |
241 for symbol in symbols: | 284 for symbol in symbols: |
242 if not first_symbol: | 285 if not first_symbol: |
243 print '\t\t\t\t\t' + symbol | 286 print '\t\t\t\t\t' + symbol |
244 else: | 287 else: |
245 first_symbol = False | 288 first_symbol = False |
246 except SymbolNotFoundException as e: | 289 except SymbolNotFoundException as e: |
247 sys.stderr.write('WARNING: Did not find function in binary. addr: ' | 290 sys.stderr.write('WARNING: Did not find function in binary. addr: ' |
248 + hex(addr) + '\n') | 291 + hex(addr) + '\n') |
249 | 292 |
250 if __name__ == '__main__': | 293 if __name__ == '__main__': |
251 main() | 294 main() |
OLD | NEW |