Chromium Code Reviews| Index: tools/cygprofile/cyglog_to_orderfile.py |
| diff --git a/tools/cygprofile/cyglog_to_orderfile.py b/tools/cygprofile/cyglog_to_orderfile.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..f3e04accf7997d0b51a35c7397d7f6900fd0d708 |
| --- /dev/null |
| +++ b/tools/cygprofile/cyglog_to_orderfile.py |
| @@ -0,0 +1,261 @@ |
| +#!/usr/bin/python |
| +# Copyright 2015 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Symbolize log file produced by cyprofile instrumentation. |
| + |
| +Given a log file and the binary being profiled, create an order file. |
| +""" |
| + |
| +import logging |
| +import multiprocessing |
| +import optparse |
| +import os |
| +import tempfile |
| +import string |
| +import sys |
| + |
| +import symbol_extractor |
| + |
| + |
| +def _ParseLogLines(log_file_lines): |
| + """Parse a merge cyglog produced by mergetraces.py. |
|
pasko
2015/01/30 19:31:33
s/Parse/Parses/ (and similarly below)
(sorry forgo
Benoit L
2015/02/02 09:28:14
Ah, I was consistently doing the wrong thing.
Done
|
| + |
| + Args: |
| + log_file_lines: array of lines in log file produced by profiled run |
| + lib_name: library or executable containing symbols |
| + |
| + Below is an example of a small log file: |
| + 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so |
| + secs usecs pid:threadid func |
| + START |
| + 1314897086 795828 3587:1074648168 0x509e105c |
| + 1314897086 795874 3587:1074648168 0x509e0eb4 |
| + 1314897086 796326 3587:1074648168 0x509e0e3c |
| + 1314897086 796552 3587:1074648168 0x509e07bc |
| + END |
| + |
| + Returns: |
| + A list of tuples of the format (sec, usec, call id, function address called) |
| + """ |
| + call_lines = [] |
| + vm_start = 0 |
| + line = log_file_lines[0] |
| + assert 'r-xp' in line |
| + end_index = line.find('-') |
| + vm_start = int(line[:end_index], 16) |
| + for line in log_file_lines[2:]: |
| + fields = line.split() |
| + if len(fields) == 4: |
| + call_lines.append(fields) |
|
pasko
2015/01/30 19:31:33
else:
assert fields[0] == 'END'
wdyt?
Benoit L
2015/02/02 09:28:14
There are lines at the start that don't follow thi
pasko-google - do not use
2015/02/02 10:35:49
But but .. you can replace the '2' with '3' in the
|
| + |
| + # Convert strings to int in fields. |
| + call_info = [] |
| + for call_line in call_lines: |
| + (sec_timestamp, usec_timestamp) = map(int, call_line[0:2]) |
|
pasko
2015/01/30 19:31:33
Why are we doing all this? We only use the ordered
Benoit L
2015/02/02 09:28:14
Carried over from the previous version of the code
|
| + callee_id = call_line[2] |
| + addr = int(call_line[3], 16) |
| + if vm_start < addr: |
| + addr -= vm_start |
| + call_info.append((sec_timestamp, usec_timestamp, callee_id, addr)) |
| + |
| + return call_info |
| + |
| + |
| +def _GroupLibrarySymbolInfosByOffset(lib_filename): |
| + """Return a dict {offset: [SymbolInfo]} from a library.""" |
| + symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename) |
| + return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos) |
| + |
| + |
| +class SymbolNotFoundException(Exception): |
| + def __init__(self, value): |
| + super(SymbolNotFoundException, self).__init__(value) |
| + self.value = value |
| + |
| + def __str__(self): |
| + return repr(self.value) |
| + |
| + |
| +def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset): |
|
pasko
2015/01/30 19:31:33
not sure how strongly I feel about this. Usually t
Benoit L
2015/02/02 09:28:14
Acknowledged.
|
| + """Find all SymbolInfo at a given offset. |
| + |
| + Args: |
| + offset_to_symbol_infos: {offset: [SymbolInfo]} |
| + offset: offset to look the symbols at |
| + |
| + Returns: |
| + The list of SymbolInfo at the given offset |
| + |
| + Raises: |
| + SymbolNotFoundException if the offset doesn't match any symbol. |
| + """ |
| + if offset in offset_to_symbol_infos: |
| + return offset_to_symbol_infos[offset] |
| + elif offset % 2 and (offset - 1) in offset_to_symbol_infos: |
| + # On ARM, odd addresses are used to signal thumb instruction. They are |
| + # generated by setting the LSB to 1 (see |
| + # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html). |
| + # TODO(lizeb): Make sure this hack doesn't propagate to other archs. |
| + return offset_to_symbol_infos[offset - 1] |
| + else: |
| + raise SymbolNotFoundException(offset) |
| + |
| + |
| +class WarningCollector(object): |
| + """Collect warnings, but limit the number printed to a set value.""" |
| + def __init__(self, max_warnings): |
| + self._warnings = 0 |
| + self._max_warnings = max_warnings |
| + |
| + def Write(self, message): |
| + if self._warnings < self._max_warnings: |
| + logging.warning(message) |
| + self._warnings += 1 |
| + |
| + def WriteEnd(self, message): |
| + if self._warnings > self._max_warnings: |
| + logging.warning('%d more warnings for %s' % ( |
| + self._warnings - self._max_warnings, message)) |
| + |
| + |
| +def _GetObjectFileNames(obj_dir): |
| + """Return the list of object files in a directory.""" |
| + obj_files = [] |
| + for (dirpath, _, filenames) in os.walk(obj_dir): |
| + for file_name in filenames: |
| + if file_name.endswith('.o'): |
| + obj_files.append(os.path.join(dirpath, file_name)) |
| + return obj_files |
| + |
| + |
| +def _AllSymbolInfos(object_filenames): |
| + """Return a list of SymbolInfo from an iterable of filenames.""" |
| + pool = multiprocessing.Pool() |
| + symbol_infos_nested = pool.map( |
| + symbol_extractor.SymbolInfosFromBinary, object_filenames) |
|
pasko
2015/01/30 19:31:32
maybe just write a comment saying that we expect e
Benoit L
2015/02/02 09:28:14
Done.
|
| + result = [] |
| + for symbol_infos in symbol_infos_nested: |
| + result += symbol_infos |
| + return result |
| + |
| + |
| +def _GetSymbolToSectionMapFromObjectFiles(obj_dir): |
| + """ Create a mapping from symbol to linker section name by scanning all |
| + the object files. |
| + """ |
| + object_files = _GetObjectFileNames(obj_dir) |
| + symbol_to_section_map = {} |
| + symbol_warnings = WarningCollector(300) |
| + symbol_infos = _AllSymbolInfos(object_files) |
| + symbol_infos = [s for s in symbol_infos if not s.name.startswith('.LTHUNK')] |
|
pasko
2015/01/30 19:31:33
I think it would be more readable if it lives in t
Benoit L
2015/02/02 09:28:14
Done.
|
| + for symbol_info in symbol_infos: |
| + symbol = symbol_info.name |
| + section = symbol_info.section |
| + if ((symbol in symbol_to_section_map) and |
| + (symbol_to_section_map[symbol] != symbol_info.section)): |
| + symbol_warnings.Write('Symbol ' + symbol + |
| + ' in conflicting sections ' + section + |
| + ' and ' + symbol_to_section_map[symbol]) |
| + elif not section.startswith('.text'): |
| + symbol_warnings.Write('Symbol ' + symbol + |
| + ' in incorrect section ' + section) |
|
pasko
2015/01/30 19:31:32
I forgot why it would be incorrect for a symbol to
Benoit L
2015/02/02 09:28:15
It is the contrary, print a warning if the section
pasko-google - do not use
2015/02/02 10:35:49
ah, sorry, did not spot the 'not' in the condition
|
| + else: |
| + symbol_to_section_map[symbol] = section |
| + symbol_warnings.WriteEnd('bad sections') |
| + return symbol_to_section_map |
| + |
| + |
| +def _WarnAboutDuplicates(offsets): |
| + """Warn about duplicate offsets. |
| + |
| + Args: |
| + offsets: list of offsets to check for duplicates |
| + |
| + Returns: |
| + True if there are no duplicates, False otherwise. |
| + """ |
| + seen_offsets = set() |
| + ok = True |
| + for offset in offsets: |
| + if offset not in seen_offsets: |
| + seen_offsets.add(offset) |
| + else: |
| + ok = False |
| + logging.warning('Duplicate offset: ' + hex(offset)) |
| + return ok |
| + |
| + |
| +def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map, |
| + output_file): |
| + """Output the order file to output_file. |
|
pasko
2015/01/30 19:31:33
s/order file/orderfile/
Benoit L
2015/02/02 09:28:14
Done.
|
| + |
| + Args: |
| + offsets: Iterable of offsets to match to section names |
| + offset_to_symbol_infos: {offset: [SymbolInfo]} |
| + symbol_to_section_map: {name: section} |
| + output_file: file-like object to write the results to |
| + """ |
| + success = True |
| + unknown_symbol_warnings = WarningCollector(300) |
| + symbol_not_found_warnings = WarningCollector(300) |
| + for offset in offsets: |
| + try: |
| + symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset) |
| + for symbol_info in symbol_infos: |
| + if symbol_info.name in symbol_to_section_map: |
| + output_file.write(symbol_to_section_map[symbol_info.name] + '\n') |
| + else: |
| + unknown_symbol_warnings.Write( |
| + 'No known section for symbol ' + symbol_info.name) |
| + except SymbolNotFoundException: |
| + symbol_not_found_warnings.Write( |
| + 'Did not find function in binary. offset: ' + hex(offset)) |
| + success = False |
| + unknown_symbol_warnings.WriteEnd('no known section for symbol') |
| + symbol_not_found_warnings.WriteEnd('did not find function') |
|
pasko
2015/01/30 19:31:32
"X more warnings for did not find function" sounds
Benoit L
2015/02/02 09:28:14
Done.
|
| + return success |
| + |
| + |
| +def main(): |
| + if len(sys.argv) != 4: |
| + logging.error('Usage: cyglog_to_orderfile.py <merged_cyglog> ' |
| + '<library> <output_filename>') |
| + return 1 |
| + (log_file, lib_file, output_filename) = sys.argv[1:] |
|
pasko
2015/01/30 19:31:33
nit: they are all file names, but two of them are
Benoit L
2015/02/02 09:28:15
Done.
|
| + |
| + obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj')) |
| + |
| + log_file_lines = map(string.rstrip, open(log_file).readlines()) |
| + call_info = _ParseLogLines(log_file_lines) |
|
pasko
2015/01/30 19:31:33
s/call_info/call_infos/
Benoit L
2015/02/02 09:28:15
Done.
|
| + offsets = [call[3] for call in call_info] |
| + offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_file) |
| + |
| + _WarnAboutDuplicates(offsets) |
| + |
| + symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir) |
| + success = False |
| + temp_filename = None |
| + output_file = None |
| + try: |
| + (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename)) |
| + output_file = os.fdopen(fd, 'w') |
| + ok = _OutputOrderfile(offsets, offset_to_symbol_infos, |
| + symbol_to_section_map, output_file) |
|
pasko
2015/01/30 19:31:33
nit: less whitespace
Benoit L
2015/02/02 09:28:14
Done.
|
| + output_file.close() |
| + os.rename(temp_filename, output_filename) |
| + temp_filename = None |
| + success = ok |
| + finally: |
| + if output_file: |
| + output_file.close() |
| + if temp_filename: |
| + os.remove(temp_filename) |
| + |
| + return 0 if success else 1 |
| + |
| + |
| +if __name__ == '__main__': |
| + logging.basicConfig(level=logging.INFO) |
| + sys.exit(main()) |
|
pasko
2015/01/30 19:31:32
so, symbol_extractor.CreateNameToSymbolInfo() is n
Benoit L
2015/02/02 09:28:15
Used later on. :-)
|