tools/find_runtime_symbols/procedure_boundaries.py - Issue 10795028: A tool for mapping runtime addresses to symbol names.

Unified Diff: tools/find_runtime_symbols/procedure_boundaries.py

Issue 10795028: A tool for mapping runtime addresses to symbol names. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: updated Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/find_runtime_symbols/procedure_boundaries.py

diff --git a/tools/find_runtime_symbols/procedure_boundaries.py b/tools/find_runtime_symbols/procedure_boundaries.py

new file mode 100644

index 0000000000000000000000000000000000000000..be1d76cc3345ba983f6860f60218532a299c27a1

--- /dev/null

+++ b/tools/find_runtime_symbols/procedure_boundaries.py

@@ -0,0 +1,148 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import bisect

+import os

+import re

+import sys

+_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')

+_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')

+_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')

+class ParsingException(Exception):

+ def __str__(self):

+ return repr(self.args[0])

+class ProcedureBoundary(object):

+ """A class for a procedure symbol and an address range for the symbol."""

+ def __init__(self, start, end, name):

+ self.start = start

+ self.end = end

+ self.name = name

+class ProcedureBoundaryTable(object):

+ """A class of a set of ProcedureBoundary."""

+ def __init__(self):

+ self.sorted_value_list = []

+ self.dictionary = {}

+ self.sorted = True

+ def append(self, entry):

+ if self.sorted_value_list:

+ if self.sorted_value_list[-1] > entry.start:

+ self.sorted = False

+ elif self.sorted_value_list[-1] == entry.start:

+ return

+ self.sorted_value_list.append(entry.start)

+ self.dictionary[entry.start] = entry

+ def find_procedure(self, address):

+ if not self.sorted:

+ self.sorted_value_list.sort()

+ self.sorted = True

+ found_index = bisect.bisect_left(self.sorted_value_list, address)

+ found_start_address = self.sorted_value_list[found_index - 1]

+ return self.dictionary[found_start_address]

+def _get_short_function_name(function):

+ while True:

+ function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)

+ if not number:

+ break

+ while True:

+ function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)

+ if not number:

+ break

+ return _LEADING_TYPE_PATTERN.sub('\g<1>', function)

+def get_procedure_boundaries_from_nm_bsd(f, mangled=False):

+ """Gets procedure boundaries from a result of nm -n --format bsd.

+ Args:

+ f: A file object containing a result of nm. It must be sorted and

+ in BSD-style. (Use "[eu-]nm -n --format bsd")

+ Returns:

+ A result ProcedureBoundaryTable object.

+ """

+ symbol_table = ProcedureBoundaryTable()

+ last_start = 0

+ routine = ''

+ for line in f:

+ symbol_info = line.rstrip().split(None, 2)

+ if len(symbol_info) == 3:

+ if len(symbol_info[0]) == 1:

+ symbol_info = line.split(None, 1)

+ (sym_type, this_routine) = symbol_info

+ sym_value = ''

+ else:

+ (sym_value, sym_type, this_routine) = symbol_info

+ elif len(symbol_info) == 2:

+ if len(symbol_info[0]) == 1:

+ (sym_type, this_routine) = symbol_info

+ sym_value = ''

+ elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16:

+ (sym_value, this_routine) = symbol_info

+ sym_type = ' '

+ else:

+ raise ParsingException('Invalid output 1 from (eu-)nm.')

+ else:

+ raise ParsingException('Invalid output 2 from (eu-)nm.')

+ if sym_value == '':

+ continue

+ start_val = int(sym_value, 16)

+ # It's possible for two symbols to share the same address, if

+ # one is a zero-length variable (like __start_google_malloc) or

+ # one symbol is a weak alias to another (like __libc_malloc).

+ # In such cases, we want to ignore all values except for the

+ # actual symbol, which in nm-speak has type "T". The logic

+ # below does this, though it's a bit tricky: what happens when

+ # we have a series of lines with the same address, is the first

+ # one gets queued up to be processed. However, it won't

+ # *actually* be processed until later, when we read a line with

+ # a different address. That means that as long as we're reading

+ # lines with the same address, we have a chance to replace that

+ # item in the queue, which we do whenever we see a 'T' entry --

+ # that is, a line with type 'T'. If we never see a 'T' entry,

+ # we'll just go ahead and process the first entry (which never

+ # got touched in the queue), and ignore the others.

+ if start_val == last_start and (sym_type == 't' or sym_type == 'T'):

+ # We are the 'T' symbol at this address, replace previous symbol.

+ routine = this_routine

+ continue

+ elif start_val == last_start:

+ # We're not the 'T' symbol at this address, so ignore us.

+ continue

+ # Tag this routine with the starting address in case the image

+ # has multiple occurrences of this routine. We use a syntax

+ # that resembles template paramters that are automatically

+ # stripped out by ShortFunctionName()

+ this_routine += "<%016x>" % start_val

+ if not mangled:

+ routine = _get_short_function_name(routine)

+ symbol_table.append(ProcedureBoundary(last_start, start_val, routine))

+ last_start = start_val

+ routine = this_routine

+ if not mangled:

+ routine = _get_short_function_name(routine)

+ symbol_table.append(ProcedureBoundary(last_start, last_start, routine))

+ return symbol_table

« no previous file with comments | « tools/find_runtime_symbols/prepare_symbol_info.py ('k') | tools/find_runtime_symbols/util.py » ('j') | no next file with comments »