Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/python | |
| 2 # Copyright 2015 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Symbolize log file produced by cyprofile instrumentation. | |
| 7 | |
| 8 Given a log file and the binary being profiled, create an order file. | |
| 9 """ | |
| 10 | |
| 11 import logging | |
| 12 import multiprocessing | |
| 13 import optparse | |
| 14 import os | |
| 15 import tempfile | |
| 16 import string | |
| 17 import sys | |
| 18 | |
| 19 import symbol_extractor | |
| 20 | |
| 21 | |
| 22 def _ParseLogLines(log_file_lines): | |
| 23 """Parse a merge cyglog produced by mergetraces.py. | |
|
pasko
2015/01/30 19:31:33
s/Parse/Parses/ (and similarly below)
(sorry forgo
Benoit L
2015/02/02 09:28:14
Ah, I was consistently doing the wrong thing.
Done
| |
| 24 | |
| 25 Args: | |
| 26 log_file_lines: array of lines in log file produced by profiled run | |
| 27 lib_name: library or executable containing symbols | |
| 28 | |
| 29 Below is an example of a small log file: | |
| 30 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so | |
| 31 secs usecs pid:threadid func | |
| 32 START | |
| 33 1314897086 795828 3587:1074648168 0x509e105c | |
| 34 1314897086 795874 3587:1074648168 0x509e0eb4 | |
| 35 1314897086 796326 3587:1074648168 0x509e0e3c | |
| 36 1314897086 796552 3587:1074648168 0x509e07bc | |
| 37 END | |
| 38 | |
| 39 Returns: | |
| 40 A list of tuples of the format (sec, usec, call id, function address called) | |
| 41 """ | |
| 42 call_lines = [] | |
| 43 vm_start = 0 | |
| 44 line = log_file_lines[0] | |
| 45 assert 'r-xp' in line | |
| 46 end_index = line.find('-') | |
| 47 vm_start = int(line[:end_index], 16) | |
| 48 for line in log_file_lines[2:]: | |
| 49 fields = line.split() | |
| 50 if len(fields) == 4: | |
| 51 call_lines.append(fields) | |
|
pasko
2015/01/30 19:31:33
else:
assert fields[0] == 'END'
wdyt?
Benoit L
2015/02/02 09:28:14
There are lines at the start that don't follow thi
pasko-google - do not use
2015/02/02 10:35:49
But but .. you can replace the '2' with '3' in the
| |
| 52 | |
| 53 # Convert strings to int in fields. | |
| 54 call_info = [] | |
| 55 for call_line in call_lines: | |
| 56 (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) | |
|
pasko
2015/01/30 19:31:33
Why are we doing all this? We only use the ordered
Benoit L
2015/02/02 09:28:14
Carried over from the previous version of the code
| |
| 57 callee_id = call_line[2] | |
| 58 addr = int(call_line[3], 16) | |
| 59 if vm_start < addr: | |
| 60 addr -= vm_start | |
| 61 call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) | |
| 62 | |
| 63 return call_info | |
| 64 | |
| 65 | |
| 66 def _GroupLibrarySymbolInfosByOffset(lib_filename): | |
| 67 """Return a dict {offset: [SymbolInfo]} from a library.""" | |
| 68 symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename) | |
| 69 return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos) | |
| 70 | |
| 71 | |
| 72 class SymbolNotFoundException(Exception): | |
| 73 def __init__(self, value): | |
| 74 super(SymbolNotFoundException, self).__init__(value) | |
| 75 self.value = value | |
| 76 | |
| 77 def __str__(self): | |
| 78 return repr(self.value) | |
| 79 | |
| 80 | |
| 81 def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset): | |
|
pasko
2015/01/30 19:31:33
not sure how strongly I feel about this. Usually t
Benoit L
2015/02/02 09:28:14
Acknowledged.
| |
| 82 """Find all SymbolInfo at a given offset. | |
| 83 | |
| 84 Args: | |
| 85 offset_to_symbol_infos: {offset: [SymbolInfo]} | |
| 86 offset: offset to look the symbols at | |
| 87 | |
| 88 Returns: | |
| 89 The list of SymbolInfo at the given offset | |
| 90 | |
| 91 Raises: | |
| 92 SymbolNotFoundException if the offset doesn't match any symbol. | |
| 93 """ | |
| 94 if offset in offset_to_symbol_infos: | |
| 95 return offset_to_symbol_infos[offset] | |
| 96 elif offset % 2 and (offset - 1) in offset_to_symbol_infos: | |
| 97 # On ARM, odd addresses are used to signal thumb instruction. They are | |
| 98 # generated by setting the LSB to 1 (see | |
| 99 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfj hia.html). | |
| 100 # TODO(lizeb): Make sure this hack doesn't propagate to other archs. | |
| 101 return offset_to_symbol_infos[offset - 1] | |
| 102 else: | |
| 103 raise SymbolNotFoundException(offset) | |
| 104 | |
| 105 | |
| 106 class WarningCollector(object): | |
| 107 """Collect warnings, but limit the number printed to a set value.""" | |
| 108 def __init__(self, max_warnings): | |
| 109 self._warnings = 0 | |
| 110 self._max_warnings = max_warnings | |
| 111 | |
| 112 def Write(self, message): | |
| 113 if self._warnings < self._max_warnings: | |
| 114 logging.warning(message) | |
| 115 self._warnings += 1 | |
| 116 | |
| 117 def WriteEnd(self, message): | |
| 118 if self._warnings > self._max_warnings: | |
| 119 logging.warning('%d more warnings for %s' % ( | |
| 120 self._warnings - self._max_warnings, message)) | |
| 121 | |
| 122 | |
| 123 def _GetObjectFileNames(obj_dir): | |
| 124 """Return the list of object files in a directory.""" | |
| 125 obj_files = [] | |
| 126 for (dirpath, _, filenames) in os.walk(obj_dir): | |
| 127 for file_name in filenames: | |
| 128 if file_name.endswith('.o'): | |
| 129 obj_files.append(os.path.join(dirpath, file_name)) | |
| 130 return obj_files | |
| 131 | |
| 132 | |
| 133 def _AllSymbolInfos(object_filenames): | |
| 134 """Return a list of SymbolInfo from an iterable of filenames.""" | |
| 135 pool = multiprocessing.Pool() | |
| 136 symbol_infos_nested = pool.map( | |
| 137 symbol_extractor.SymbolInfosFromBinary, object_filenames) | |
|
pasko
2015/01/30 19:31:32
maybe just write a comment saying that we expect e
Benoit L
2015/02/02 09:28:14
Done.
| |
| 138 result = [] | |
| 139 for symbol_infos in symbol_infos_nested: | |
| 140 result += symbol_infos | |
| 141 return result | |
| 142 | |
| 143 | |
| 144 def _GetSymbolToSectionMapFromObjectFiles(obj_dir): | |
| 145 """ Create a mapping from symbol to linker section name by scanning all | |
| 146 the object files. | |
| 147 """ | |
| 148 object_files = _GetObjectFileNames(obj_dir) | |
| 149 symbol_to_section_map = {} | |
| 150 symbol_warnings = WarningCollector(300) | |
| 151 symbol_infos = _AllSymbolInfos(object_files) | |
| 152 symbol_infos = [s for s in symbol_infos if not s.name.startswith('.LTHUNK')] | |
|
pasko
2015/01/30 19:31:33
I think it would be more readable if it lives in t
Benoit L
2015/02/02 09:28:14
Done.
| |
| 153 for symbol_info in symbol_infos: | |
| 154 symbol = symbol_info.name | |
| 155 section = symbol_info.section | |
| 156 if ((symbol in symbol_to_section_map) and | |
| 157 (symbol_to_section_map[symbol] != symbol_info.section)): | |
| 158 symbol_warnings.Write('Symbol ' + symbol + | |
| 159 ' in conflicting sections ' + section + | |
| 160 ' and ' + symbol_to_section_map[symbol]) | |
| 161 elif not section.startswith('.text'): | |
| 162 symbol_warnings.Write('Symbol ' + symbol + | |
| 163 ' in incorrect section ' + section) | |
|
pasko
2015/01/30 19:31:32
I forgot why it would be incorrect for a symbol to
Benoit L
2015/02/02 09:28:15
It is the contrary, print a warning if the section
pasko-google - do not use
2015/02/02 10:35:49
ah, sorry, did not spot the 'not' in the condition
| |
| 164 else: | |
| 165 symbol_to_section_map[symbol] = section | |
| 166 symbol_warnings.WriteEnd('bad sections') | |
| 167 return symbol_to_section_map | |
| 168 | |
| 169 | |
| 170 def _WarnAboutDuplicates(offsets): | |
| 171 """Warn about duplicate offsets. | |
| 172 | |
| 173 Args: | |
| 174 offsets: list of offsets to check for duplicates | |
| 175 | |
| 176 Returns: | |
| 177 True if there are no duplicates, False otherwise. | |
| 178 """ | |
| 179 seen_offsets = set() | |
| 180 ok = True | |
| 181 for offset in offsets: | |
| 182 if offset not in seen_offsets: | |
| 183 seen_offsets.add(offset) | |
| 184 else: | |
| 185 ok = False | |
| 186 logging.warning('Duplicate offset: ' + hex(offset)) | |
| 187 return ok | |
| 188 | |
| 189 | |
| 190 def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map, | |
| 191 output_file): | |
| 192 """Output the order file to output_file. | |
|
pasko
2015/01/30 19:31:33
s/order file/orderfile/
Benoit L
2015/02/02 09:28:14
Done.
| |
| 193 | |
| 194 Args: | |
| 195 offsets: Iterable of offsets to match to section names | |
| 196 offset_to_symbol_infos: {offset: [SymbolInfo]} | |
| 197 symbol_to_section_map: {name: section} | |
| 198 output_file: file-like object to write the results to | |
| 199 """ | |
| 200 success = True | |
| 201 unknown_symbol_warnings = WarningCollector(300) | |
| 202 symbol_not_found_warnings = WarningCollector(300) | |
| 203 for offset in offsets: | |
| 204 try: | |
| 205 symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset) | |
| 206 for symbol_info in symbol_infos: | |
| 207 if symbol_info.name in symbol_to_section_map: | |
| 208 output_file.write(symbol_to_section_map[symbol_info.name] + '\n') | |
| 209 else: | |
| 210 unknown_symbol_warnings.Write( | |
| 211 'No known section for symbol ' + symbol_info.name) | |
| 212 except SymbolNotFoundException: | |
| 213 symbol_not_found_warnings.Write( | |
| 214 'Did not find function in binary. offset: ' + hex(offset)) | |
| 215 success = False | |
| 216 unknown_symbol_warnings.WriteEnd('no known section for symbol') | |
| 217 symbol_not_found_warnings.WriteEnd('did not find function') | |
|
pasko
2015/01/30 19:31:32
"X more warnings for did not find function" sounds
Benoit L
2015/02/02 09:28:14
Done.
| |
| 218 return success | |
| 219 | |
| 220 | |
| 221 def main(): | |
| 222 if len(sys.argv) != 4: | |
| 223 logging.error('Usage: cyglog_to_orderfile.py <merged_cyglog> ' | |
| 224 '<library> <output_filename>') | |
| 225 return 1 | |
| 226 (log_file, lib_file, output_filename) = sys.argv[1:] | |
|
pasko
2015/01/30 19:31:33
nit: they are all file names, but two of them are
Benoit L
2015/02/02 09:28:15
Done.
| |
| 227 | |
| 228 obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj')) | |
| 229 | |
| 230 log_file_lines = map(string.rstrip, open(log_file).readlines()) | |
| 231 call_info = _ParseLogLines(log_file_lines) | |
|
pasko
2015/01/30 19:31:33
s/call_info/call_infos/
Benoit L
2015/02/02 09:28:15
Done.
| |
| 232 offsets = [call[3] for call in call_info] | |
| 233 offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_file) | |
| 234 | |
| 235 _WarnAboutDuplicates(offsets) | |
| 236 | |
| 237 symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir) | |
| 238 success = False | |
| 239 temp_filename = None | |
| 240 output_file = None | |
| 241 try: | |
| 242 (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename)) | |
| 243 output_file = os.fdopen(fd, 'w') | |
| 244 ok = _OutputOrderfile(offsets, offset_to_symbol_infos, | |
| 245 symbol_to_section_map, output_file) | |
|
pasko
2015/01/30 19:31:33
nit: less whitespace
Benoit L
2015/02/02 09:28:14
Done.
| |
| 246 output_file.close() | |
| 247 os.rename(temp_filename, output_filename) | |
| 248 temp_filename = None | |
| 249 success = ok | |
| 250 finally: | |
| 251 if output_file: | |
| 252 output_file.close() | |
| 253 if temp_filename: | |
| 254 os.remove(temp_filename) | |
| 255 | |
| 256 return 0 if success else 1 | |
| 257 | |
| 258 | |
| 259 if __name__ == '__main__': | |
| 260 logging.basicConfig(level=logging.INFO) | |
| 261 sys.exit(main()) | |
|
pasko
2015/01/30 19:31:32
so, symbol_extractor.CreateNameToSymbolInfo() is n
Benoit L
2015/02/02 09:28:15
Used later on. :-)
| |
| OLD | NEW |