tools/cygprofile/cyglog_to_orderfile.py - Issue 874683004: Refactor the symbolize step of the orderfile generation.

Unified Diff: tools/cygprofile/cyglog_to_orderfile.py

Issue 874683004: Refactor the symbolize step of the orderfile generation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Address comments. Change filenames. Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/cygprofile/cyglog_to_orderfile.py

diff --git a/tools/cygprofile/cyglog_to_orderfile.py b/tools/cygprofile/cyglog_to_orderfile.py

new file mode 100755

index 0000000000000000000000000000000000000000..f3e04accf7997d0b51a35c7397d7f6900fd0d708

--- /dev/null

+++ b/tools/cygprofile/cyglog_to_orderfile.py

@@ -0,0 +1,261 @@

+#!/usr/bin/python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Symbolize log file produced by cyprofile instrumentation.

+Given a log file and the binary being profiled, create an order file.

+"""

+import logging

+import multiprocessing

+import optparse

+import os

+import tempfile

+import string

+import sys

+import symbol_extractor

+def _ParseLogLines(log_file_lines):

+ """Parse a merge cyglog produced by mergetraces.py.

pasko 2015/01/30 19:31:33 s/Parse/Parses/ (and similarly below) (sorry forgo

Benoit L 2015/02/02 09:28:14 Ah, I was consistently doing the wrong thing. Done

+ Args:

+ log_file_lines: array of lines in log file produced by profiled run

+ lib_name: library or executable containing symbols

+ Below is an example of a small log file:

+ 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so

+ secs usecs pid:threadid func

+ START

+ 1314897086 795828 3587:1074648168 0x509e105c

+ 1314897086 795874 3587:1074648168 0x509e0eb4

+ 1314897086 796326 3587:1074648168 0x509e0e3c

+ 1314897086 796552 3587:1074648168 0x509e07bc

+ END

+ Returns:

+ A list of tuples of the format (sec, usec, call id, function address called)

+ """

+ call_lines = []

+ vm_start = 0

+ line = log_file_lines[0]

+ assert 'r-xp' in line

+ end_index = line.find('-')

+ vm_start = int(line[:end_index], 16)

+ for line in log_file_lines[2:]:

+ fields = line.split()

+ if len(fields) == 4:

+ call_lines.append(fields)

pasko 2015/01/30 19:31:33 else: assert fields[0] == 'END' wdyt?

Benoit L 2015/02/02 09:28:14 There are lines at the start that don't follow thi

pasko-google - do not use 2015/02/02 10:35:49 But but .. you can replace the '2' with '3' in the

+ # Convert strings to int in fields.

+ call_info = []

+ for call_line in call_lines:

+ (sec_timestamp, usec_timestamp) = map(int, call_line[0:2])

pasko 2015/01/30 19:31:33 Why are we doing all this? We only use the ordered

Benoit L 2015/02/02 09:28:14 Carried over from the previous version of the code

+ callee_id = call_line[2]

+ addr = int(call_line[3], 16)

+ if vm_start < addr:

+ addr -= vm_start

+ call_info.append((sec_timestamp, usec_timestamp, callee_id, addr))

+ return call_info

+def _GroupLibrarySymbolInfosByOffset(lib_filename):

+ """Return a dict {offset: [SymbolInfo]} from a library."""

+ symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename)

+ return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)

+class SymbolNotFoundException(Exception):

+ def __init__(self, value):

+ super(SymbolNotFoundException, self).__init__(value)

+ self.value = value

+ def __str__(self):

+ return repr(self.value)

+def _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset):

pasko 2015/01/30 19:31:33 not sure how strongly I feel about this. Usually t

Benoit L 2015/02/02 09:28:14 Acknowledged.

+ """Find all SymbolInfo at a given offset.

+ Args:

+ offset_to_symbol_infos: {offset: [SymbolInfo]}

+ offset: offset to look the symbols at

+ Returns:

+ The list of SymbolInfo at the given offset

+ Raises:

+ SymbolNotFoundException if the offset doesn't match any symbol.

+ """

+ if offset in offset_to_symbol_infos:

+ return offset_to_symbol_infos[offset]

+ elif offset % 2 and (offset - 1) in offset_to_symbol_infos:

+ # On ARM, odd addresses are used to signal thumb instruction. They are

+ # generated by setting the LSB to 1 (see

+ # http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html).

+ # TODO(lizeb): Make sure this hack doesn't propagate to other archs.

+ return offset_to_symbol_infos[offset - 1]

+ else:

+ raise SymbolNotFoundException(offset)

+class WarningCollector(object):

+ """Collect warnings, but limit the number printed to a set value."""

+ def __init__(self, max_warnings):

+ self._warnings = 0

+ self._max_warnings = max_warnings

+ def Write(self, message):

+ if self._warnings < self._max_warnings:

+ logging.warning(message)

+ self._warnings += 1

+ def WriteEnd(self, message):

+ if self._warnings > self._max_warnings:

+ logging.warning('%d more warnings for %s' % (

+ self._warnings - self._max_warnings, message))

+def _GetObjectFileNames(obj_dir):

+ """Return the list of object files in a directory."""

+ obj_files = []

+ for (dirpath, _, filenames) in os.walk(obj_dir):

+ for file_name in filenames:

+ if file_name.endswith('.o'):

+ obj_files.append(os.path.join(dirpath, file_name))

+ return obj_files

+def _AllSymbolInfos(object_filenames):

+ """Return a list of SymbolInfo from an iterable of filenames."""

+ pool = multiprocessing.Pool()

+ symbol_infos_nested = pool.map(

+ symbol_extractor.SymbolInfosFromBinary, object_filenames)

pasko 2015/01/30 19:31:32 maybe just write a comment saying that we expect e

Benoit L 2015/02/02 09:28:14 Done.

+ result = []

+ for symbol_infos in symbol_infos_nested:

+ result += symbol_infos

+ return result

+def _GetSymbolToSectionMapFromObjectFiles(obj_dir):

+ """ Create a mapping from symbol to linker section name by scanning all

+ the object files.

+ """

+ object_files = _GetObjectFileNames(obj_dir)

+ symbol_to_section_map = {}

+ symbol_warnings = WarningCollector(300)

+ symbol_infos = _AllSymbolInfos(object_files)

+ symbol_infos = [s for s in symbol_infos if not s.name.startswith('.LTHUNK')]

pasko 2015/01/30 19:31:33 I think it would be more readable if it lives in t

Benoit L 2015/02/02 09:28:14 Done.

+ for symbol_info in symbol_infos:

+ symbol = symbol_info.name

+ section = symbol_info.section

+ if ((symbol in symbol_to_section_map) and

+ (symbol_to_section_map[symbol] != symbol_info.section)):

+ symbol_warnings.Write('Symbol ' + symbol +

+ ' in conflicting sections ' + section +

+ ' and ' + symbol_to_section_map[symbol])

+ elif not section.startswith('.text'):

+ symbol_warnings.Write('Symbol ' + symbol +

+ ' in incorrect section ' + section)

pasko 2015/01/30 19:31:32 I forgot why it would be incorrect for a symbol to

Benoit L 2015/02/02 09:28:15 It is the contrary, print a warning if the section

pasko-google - do not use 2015/02/02 10:35:49 ah, sorry, did not spot the 'not' in the condition

+ else:

+ symbol_to_section_map[symbol] = section

+ symbol_warnings.WriteEnd('bad sections')

+ return symbol_to_section_map

+def _WarnAboutDuplicates(offsets):

+ """Warn about duplicate offsets.

+ Args:

+ offsets: list of offsets to check for duplicates

+ Returns:

+ True if there are no duplicates, False otherwise.

+ """

+ seen_offsets = set()

+ ok = True

+ for offset in offsets:

+ if offset not in seen_offsets:

+ seen_offsets.add(offset)

+ else:

+ ok = False

+ logging.warning('Duplicate offset: ' + hex(offset))

+ return ok

+def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map,

+ output_file):

+ """Output the order file to output_file.

pasko 2015/01/30 19:31:33 s/order file/orderfile/

Benoit L 2015/02/02 09:28:14 Done.

+ Args:

+ offsets: Iterable of offsets to match to section names

+ offset_to_symbol_infos: {offset: [SymbolInfo]}

+ symbol_to_section_map: {name: section}

+ output_file: file-like object to write the results to

+ """

+ success = True

+ unknown_symbol_warnings = WarningCollector(300)

+ symbol_not_found_warnings = WarningCollector(300)

+ for offset in offsets:

+ try:

+ symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset)

+ for symbol_info in symbol_infos:

+ if symbol_info.name in symbol_to_section_map:

+ output_file.write(symbol_to_section_map[symbol_info.name] + '\n')

+ else:

+ unknown_symbol_warnings.Write(

+ 'No known section for symbol ' + symbol_info.name)

+ except SymbolNotFoundException:

+ symbol_not_found_warnings.Write(

+ 'Did not find function in binary. offset: ' + hex(offset))

+ success = False

+ unknown_symbol_warnings.WriteEnd('no known section for symbol')

+ symbol_not_found_warnings.WriteEnd('did not find function')

pasko 2015/01/30 19:31:32 "X more warnings for did not find function" sounds

Benoit L 2015/02/02 09:28:14 Done.

+ return success

+def main():

+ if len(sys.argv) != 4:

+ logging.error('Usage: cyglog_to_orderfile.py <merged_cyglog> '

+ '<library> <output_filename>')

+ return 1

+ (log_file, lib_file, output_filename) = sys.argv[1:]

pasko 2015/01/30 19:31:33 nit: they are all file names, but two of them are

Benoit L 2015/02/02 09:28:15 Done.

+ obj_dir = os.path.abspath(os.path.join(os.path.dirname(lib_file), '../obj'))

+ log_file_lines = map(string.rstrip, open(log_file).readlines())

+ call_info = _ParseLogLines(log_file_lines)

pasko 2015/01/30 19:31:33 s/call_info/call_infos/

Benoit L 2015/02/02 09:28:15 Done.

+ offsets = [call[3] for call in call_info]

+ offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_file)

+ _WarnAboutDuplicates(offsets)

+ symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir)

+ success = False

+ temp_filename = None

+ output_file = None

+ try:

+ (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename))

+ output_file = os.fdopen(fd, 'w')

+ ok = _OutputOrderfile(offsets, offset_to_symbol_infos,

+ symbol_to_section_map, output_file)

pasko 2015/01/30 19:31:33 nit: less whitespace

Benoit L 2015/02/02 09:28:14 Done.

+ output_file.close()

+ os.rename(temp_filename, output_filename)

+ temp_filename = None

+ success = ok

+ finally:

+ if output_file:

+ output_file.close()

+ if temp_filename:

+ os.remove(temp_filename)

+ return 0 if success else 1

+if __name__ == '__main__':

+ logging.basicConfig(level=logging.INFO)

+ sys.exit(main())

pasko 2015/01/30 19:31:32 so, symbol_extractor.CreateNameToSymbolInfo() is n

Benoit L 2015/02/02 09:28:15 Used later on. :-)

« no previous file with comments | « no previous file | tools/cygprofile/cyglog_to_orderfile_unittest.py » ('j') | tools/cygprofile/cyglog_to_orderfile_unittest.py » ('J')