| Index: tools/find_runtime_symbols/procedure_boundaries.py
|
| diff --git a/tools/find_runtime_symbols/procedure_boundaries.py b/tools/find_runtime_symbols/procedure_boundaries.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..be1d76cc3345ba983f6860f60218532a299c27a1
|
| --- /dev/null
|
| +++ b/tools/find_runtime_symbols/procedure_boundaries.py
|
| @@ -0,0 +1,148 @@
|
| +# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +import bisect
|
| +import os
|
| +import re
|
| +import sys
|
| +
|
| +
|
| +_ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?')
|
| +_TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')
|
| +_LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')
|
| +
|
| +
|
| +class ParsingException(Exception):
|
| + def __str__(self):
|
| + return repr(self.args[0])
|
| +
|
| +
|
| +class ProcedureBoundary(object):
|
| + """A class for a procedure symbol and an address range for the symbol."""
|
| +
|
| + def __init__(self, start, end, name):
|
| + self.start = start
|
| + self.end = end
|
| + self.name = name
|
| +
|
| +
|
| +class ProcedureBoundaryTable(object):
|
| + """A class of a set of ProcedureBoundary."""
|
| +
|
| + def __init__(self):
|
| + self.sorted_value_list = []
|
| + self.dictionary = {}
|
| + self.sorted = True
|
| +
|
| + def append(self, entry):
|
| + if self.sorted_value_list:
|
| + if self.sorted_value_list[-1] > entry.start:
|
| + self.sorted = False
|
| + elif self.sorted_value_list[-1] == entry.start:
|
| + return
|
| + self.sorted_value_list.append(entry.start)
|
| + self.dictionary[entry.start] = entry
|
| +
|
| + def find_procedure(self, address):
|
| + if not self.sorted:
|
| + self.sorted_value_list.sort()
|
| + self.sorted = True
|
| + found_index = bisect.bisect_left(self.sorted_value_list, address)
|
| + found_start_address = self.sorted_value_list[found_index - 1]
|
| + return self.dictionary[found_start_address]
|
| +
|
| +
|
| +def _get_short_function_name(function):
|
| + while True:
|
| + function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)
|
| + if not number:
|
| + break
|
| + while True:
|
| + function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)
|
| + if not number:
|
| + break
|
| + return _LEADING_TYPE_PATTERN.sub('\g<1>', function)
|
| +
|
| +
|
| +def get_procedure_boundaries_from_nm_bsd(f, mangled=False):
|
| + """Gets procedure boundaries from a result of nm -n --format bsd.
|
| +
|
| + Args:
|
| + f: A file object containing a result of nm. It must be sorted and
|
| + in BSD-style. (Use "[eu-]nm -n --format bsd")
|
| +
|
| + Returns:
|
| + A result ProcedureBoundaryTable object.
|
| + """
|
| + symbol_table = ProcedureBoundaryTable()
|
| +
|
| + last_start = 0
|
| + routine = ''
|
| +
|
| + for line in f:
|
| + symbol_info = line.rstrip().split(None, 2)
|
| + if len(symbol_info) == 3:
|
| + if len(symbol_info[0]) == 1:
|
| + symbol_info = line.split(None, 1)
|
| + (sym_type, this_routine) = symbol_info
|
| + sym_value = ''
|
| + else:
|
| + (sym_value, sym_type, this_routine) = symbol_info
|
| + elif len(symbol_info) == 2:
|
| + if len(symbol_info[0]) == 1:
|
| + (sym_type, this_routine) = symbol_info
|
| + sym_value = ''
|
| + elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16:
|
| + (sym_value, this_routine) = symbol_info
|
| + sym_type = ' '
|
| + else:
|
| + raise ParsingException('Invalid output 1 from (eu-)nm.')
|
| + else:
|
| + raise ParsingException('Invalid output 2 from (eu-)nm.')
|
| +
|
| + if sym_value == '':
|
| + continue
|
| +
|
| + start_val = int(sym_value, 16)
|
| +
|
| + # It's possible for two symbols to share the same address, if
|
| + # one is a zero-length variable (like __start_google_malloc) or
|
| + # one symbol is a weak alias to another (like __libc_malloc).
|
| + # In such cases, we want to ignore all values except for the
|
| + # actual symbol, which in nm-speak has type "T". The logic
|
| + # below does this, though it's a bit tricky: what happens when
|
| + # we have a series of lines with the same address, is the first
|
| + # one gets queued up to be processed. However, it won't
|
| + # *actually* be processed until later, when we read a line with
|
| + # a different address. That means that as long as we're reading
|
| + # lines with the same address, we have a chance to replace that
|
| + # item in the queue, which we do whenever we see a 'T' entry --
|
| + # that is, a line with type 'T'. If we never see a 'T' entry,
|
| + # we'll just go ahead and process the first entry (which never
|
| + # got touched in the queue), and ignore the others.
|
| + if start_val == last_start and (sym_type == 't' or sym_type == 'T'):
|
| + # We are the 'T' symbol at this address, replace previous symbol.
|
| + routine = this_routine
|
| + continue
|
| + elif start_val == last_start:
|
| + # We're not the 'T' symbol at this address, so ignore us.
|
| + continue
|
| +
|
| + # Tag this routine with the starting address in case the image
|
| + # has multiple occurrences of this routine. We use a syntax
|
| + # that resembles template paramters that are automatically
|
| + # stripped out by ShortFunctionName()
|
| + this_routine += "<%016x>" % start_val
|
| +
|
| + if not mangled:
|
| + routine = _get_short_function_name(routine)
|
| + symbol_table.append(ProcedureBoundary(last_start, start_val, routine))
|
| +
|
| + last_start = start_val
|
| + routine = this_routine
|
| +
|
| + if not mangled:
|
| + routine = _get_short_function_name(routine)
|
| + symbol_table.append(ProcedureBoundary(last_start, last_start, routine))
|
| + return symbol_table
|
|
|