OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 """Symbolizes a log file produced by cyprofile instrumentation. |
| 7 |
| 8 Given a log file and the binary being profiled, creates an orderfile. |
| 9 """ |
| 10 |
| 11 import logging |
| 12 import multiprocessing |
| 13 import optparse |
| 14 import os |
| 15 import tempfile |
| 16 import string |
| 17 import sys |
| 18 |
| 19 import symbol_extractor |
| 20 |
| 21 |
| 22 def _ParseLogLines(log_file_lines): |
| 23 """Parses a merged cyglog produced by mergetraces.py. |
| 24 |
| 25 Args: |
| 26 log_file_lines: array of lines in log file produced by profiled run |
| 27 lib_name: library or executable containing symbols |
| 28 |
| 29 Below is an example of a small log file: |
| 30 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so |
| 31 secs usecs pid:threadid func |
| 32 START |
| 33 1314897086 795828 3587:1074648168 0x509e105c |
| 34 1314897086 795874 3587:1074648168 0x509e0eb4 |
| 35 1314897086 796326 3587:1074648168 0x509e0e3c |
| 36 1314897086 796552 3587:1074648168 0x509e07bc |
| 37 END |
| 38 |
| 39 Returns: |
| 40 An ordered list of callee offsets. |
| 41 """ |
| 42 call_lines = [] |
| 43 vm_start = 0 |
| 44 line = log_file_lines[0] |
| 45 assert 'r-xp' in line |
| 46 end_index = line.find('-') |
| 47 vm_start = int(line[:end_index], 16) |
| 48 for line in log_file_lines[3:]: |
| 49 fields = line.split() |
| 50 if len(fields) == 4: |
| 51 call_lines.append(fields) |
| 52 else: |
| 53 assert fields[0] == 'END' |
| 54 # Convert strings to int in fields. |
| 55 call_info = [] |
| 56 for call_line in call_lines: |
| 57 addr = int(call_line[3], 16) |
| 58 if vm_start < addr: |
| 59 addr -= vm_start |
| 60 call_info.append(addr) |
| 61 return call_info |
| 62 |
| 63 |
| 64 def _GroupLibrarySymbolInfosByOffset(lib_filename): |
| 65 """Returns a dict {offset: [SymbolInfo]} from a library.""" |
| 66 symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename) |
| 67 return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos) |
| 68 |
| 69 |
| 70 class SymbolNotFoundException(Exception): |
| 71 def __init__(self, value): |
| 72 super(SymbolNotFoundException, self).__init__(value) |
| 73 self.value = value |
| 74 |
| 75 def __str__(self): |
| 76 return repr(self.value) |
| 77 |
| 78 |
| 79 def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset): |
| 80 """Finds all SymbolInfo at a given offset. |
| 81 |
| 82 Args: |
| 83 offset_to_symbol_infos: {offset: [SymbolInfo]} |
| 84 offset: offset to look the symbols at |
| 85 |
| 86 Returns: |
| 87 The list of SymbolInfo at the given offset |
| 88 |
| 89 Raises: |
| 90 SymbolNotFoundException if the offset doesn't match any symbol. |
| 91 """ |
| 92 if offset in offset_to_symbol_infos: |
| 93 return offset_to_symbol_infos[offset] |
| 94 elif offset % 2 and (offset - 1) in offset_to_symbol_infos: |
| 95 # On ARM, odd addresses are used to signal thumb instruction. They are |
| 96 # generated by setting the LSB to 1 (see |
| 97 # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfj
hia.html). |
| 98 # TODO(lizeb): Make sure this hack doesn't propagate to other archs. |
| 99 return offset_to_symbol_infos[offset - 1] |
| 100 else: |
| 101 raise SymbolNotFoundException(offset) |
| 102 |
| 103 |
| 104 class WarningCollector(object): |
| 105 """Collect warnings, but limit the number printed to a set value.""" |
| 106 def __init__(self, max_warnings): |
| 107 self._warnings = 0 |
| 108 self._max_warnings = max_warnings |
| 109 |
| 110 def Write(self, message): |
| 111 if self._warnings < self._max_warnings: |
| 112 logging.warning(message) |
| 113 self._warnings += 1 |
| 114 |
| 115 def WriteEnd(self, message): |
| 116 if self._warnings > self._max_warnings: |
| 117 logging.warning('%d more warnings for: %s' % ( |
| 118 self._warnings - self._max_warnings, message)) |
| 119 |
| 120 |
| 121 def _GetObjectFileNames(obj_dir): |
| 122 """Returns the list of object files in a directory.""" |
| 123 obj_files = [] |
| 124 for (dirpath, _, filenames) in os.walk(obj_dir): |
| 125 for file_name in filenames: |
| 126 if file_name.endswith('.o'): |
| 127 obj_files.append(os.path.join(dirpath, file_name)) |
| 128 return obj_files |
| 129 |
| 130 |
| 131 def _AllSymbolInfos(object_filenames): |
| 132 """Returns a list of SymbolInfo from an iterable of filenames.""" |
| 133 pool = multiprocessing.Pool() |
| 134 # Hopefully the object files are in the page cache at this step, so IO should |
| 135 # not be a problem (hence no concurrency limit on the pool). |
| 136 symbol_infos_nested = pool.map( |
| 137 symbol_extractor.SymbolInfosFromBinary, object_filenames) |
| 138 result = [] |
| 139 for symbol_infos in symbol_infos_nested: |
| 140 result += symbol_infos |
| 141 return result |
| 142 |
| 143 |
| 144 def _GetSymbolToSectionMapFromObjectFiles(obj_dir): |
| 145 """ Creates a mapping from symbol to linker section name by scanning all |
| 146 the object files. |
| 147 """ |
| 148 object_files = _GetObjectFileNames(obj_dir) |
| 149 symbol_to_section_map = {} |
| 150 symbol_warnings = WarningCollector(300) |
| 151 symbol_infos = _AllSymbolInfos(object_files) |
| 152 for symbol_info in symbol_infos: |
| 153 symbol = symbol_info.name |
| 154 if symbol.startswith('.LTHUNK'): |
| 155 continue |
| 156 section = symbol_info.section |
| 157 if ((symbol in symbol_to_section_map) and |
| 158 (symbol_to_section_map[symbol] != symbol_info.section)): |
| 159 symbol_warnings.Write('Symbol ' + symbol + |
| 160 ' in conflicting sections ' + section + |
| 161 ' and ' + symbol_to_section_map[symbol]) |
| 162 elif not section.startswith('.text'): |
| 163 symbol_warnings.Write('Symbol ' + symbol + |
| 164 ' in incorrect section ' + section) |
| 165 else: |
| 166 symbol_to_section_map[symbol] = section |
| 167 symbol_warnings.WriteEnd('bad sections') |
| 168 return symbol_to_section_map |
| 169 |
| 170 |
| 171 def _WarnAboutDuplicates(offsets): |
| 172 """Warns about duplicate offsets. |
| 173 |
| 174 Args: |
| 175 offsets: list of offsets to check for duplicates |
| 176 |
| 177 Returns: |
| 178 True if there are no duplicates, False otherwise. |
| 179 """ |
| 180 seen_offsets = set() |
| 181 ok = True |
| 182 for offset in offsets: |
| 183 if offset not in seen_offsets: |
| 184 seen_offsets.add(offset) |
| 185 else: |
| 186 ok = False |
| 187 logging.warning('Duplicate offset: ' + hex(offset)) |
| 188 return ok |
| 189 |
| 190 |
| 191 def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map, |
| 192 output_file): |
| 193 """Outputs the orderfile to output_file. |
| 194 |
| 195 Args: |
| 196 offsets: Iterable of offsets to match to section names |
| 197 offset_to_symbol_infos: {offset: [SymbolInfo]} |
| 198 symbol_to_section_map: {name: section} |
| 199 output_file: file-like object to write the results to |
| 200 """ |
| 201 success = True |
| 202 unknown_symbol_warnings = WarningCollector(300) |
| 203 symbol_not_found_warnings = WarningCollector(300) |
| 204 for offset in offsets: |
| 205 try: |
| 206 symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset) |
| 207 for symbol_info in symbol_infos: |
| 208 if symbol_info.name in symbol_to_section_map: |
| 209 output_file.write(symbol_to_section_map[symbol_info.name] + '\n') |
| 210 else: |
| 211 unknown_symbol_warnings.Write( |
| 212 'No known section for symbol ' + symbol_info.name) |
| 213 except SymbolNotFoundException: |
| 214 symbol_not_found_warnings.Write( |
| 215 'Did not find function in binary. offset: ' + hex(offset)) |
| 216 success = False |
| 217 unknown_symbol_warnings.WriteEnd('no known section for symbol.') |
| 218 symbol_not_found_warnings.WriteEnd('symbol not found in the binary.') |
| 219 return success |
| 220 |
| 221 |
| 222 def main(): |
| 223 if len(sys.argv) != 4: |
| 224 logging.error('Usage: cyglog_to_orderfile.py <merged_cyglog> ' |
| 225 '<library> <output_filename>') |
| 226 return 1 |
| 227 (log_filename, lib_filename, output_filename) = sys.argv[1:] |
| 228 |
| 229 obj_dir = os.path.abspath(os.path.join( |
| 230 os.path.dirname(lib_filename), '../obj')) |
| 231 |
| 232 log_file_lines = map(string.rstrip, open(log_filename).readlines()) |
| 233 offsets = _ParseLogLines(log_file_lines) |
| 234 _WarnAboutDuplicates(offsets) |
| 235 |
| 236 offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename) |
| 237 symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir) |
| 238 |
| 239 success = False |
| 240 temp_filename = None |
| 241 output_file = None |
| 242 try: |
| 243 (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename)) |
| 244 output_file = os.fdopen(fd, 'w') |
| 245 ok = _OutputOrderfile( |
| 246 offsets, offset_to_symbol_infos, symbol_to_section_map, output_file) |
| 247 output_file.close() |
| 248 os.rename(temp_filename, output_filename) |
| 249 temp_filename = None |
| 250 success = ok |
| 251 finally: |
| 252 if output_file: |
| 253 output_file.close() |
| 254 if temp_filename: |
| 255 os.remove(temp_filename) |
| 256 |
| 257 return 0 if success else 1 |
| 258 |
| 259 |
| 260 if __name__ == '__main__': |
| 261 logging.basicConfig(level=logging.INFO) |
| 262 sys.exit(main()) |
OLD | NEW |