tools/find_runtime_symbols/procedure_boundaries.py - Issue 10795028: A tool for mapping runtime addresses to symbol names.

Side by Side Diff: tools/find_runtime_symbols/procedure_boundaries.py

Issue 10795028: A tool for mapping runtime addresses to symbol names. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: updated Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import bisect

	6 import os

	7 import re

	8 import sys

	9

	10

	11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]\)(\sconst)?')

	12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>')

	13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)')

	14

	15

	16 class ParsingException(Exception):

	17 def __str__(self):

	18 return repr(self.args[0])

	19

	20

	21 class ProcedureBoundary(object):

	22 """A class for a procedure symbol and an address range for the symbol."""

	23

	24 def __init__(self, start, end, name):

	25 self.start = start

	26 self.end = end

	27 self.name = name

	28

	29

	30 class ProcedureBoundaryTable(object):

	31 """A class of a set of ProcedureBoundary."""

	32

	33 def __init__(self):

	34 self.sorted_value_list = []

	35 self.dictionary = {}

	36 self.sorted = True

	37

	38 def append(self, entry):

	39 if self.sorted_value_list:

	40 if self.sorted_value_list[-1] > entry.start:

	41 self.sorted = False

	42 elif self.sorted_value_list[-1] == entry.start:

	43 return

	44 self.sorted_value_list.append(entry.start)

	45 self.dictionary[entry.start] = entry

	46

	47 def find_procedure(self, address):

	48 if not self.sorted:

	49 self.sorted_value_list.sort()

	50 self.sorted = True

	51 found_index = bisect.bisect_left(self.sorted_value_list, address)

	52 found_start_address = self.sorted_value_list[found_index - 1]

	53 return self.dictionary[found_start_address]

	54

	55

	56 def _get_short_function_name(function):

	57 while True:

	58 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function)

	59 if not number:

	60 break

	61 while True:

	62 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function)

	63 if not number:

	64 break

	65 return _LEADING_TYPE_PATTERN.sub('\g<1>', function)

	66

	67

	68 def get_procedure_boundaries_from_nm_bsd(f, mangled=False):

	69 """Gets procedure boundaries from a result of nm -n --format bsd.

	70

	71 Args:

	72 f: A file object containing a result of nm. It must be sorted and

	73 in BSD-style. (Use "[eu-]nm -n --format bsd")

	74

	75 Returns:

	76 A result ProcedureBoundaryTable object.

	77 """

	78 symbol_table = ProcedureBoundaryTable()

	79

	80 last_start = 0

	81 routine = ''

	82

	83 for line in f:

	84 symbol_info = line.rstrip().split(None, 2)

	85 if len(symbol_info) == 3:

	86 if len(symbol_info[0]) == 1:

	87 symbol_info = line.split(None, 1)

	88 (sym_type, this_routine) = symbol_info

	89 sym_value = ''

	90 else:

	91 (sym_value, sym_type, this_routine) = symbol_info

	92 elif len(symbol_info) == 2:

	93 if len(symbol_info[0]) == 1:

	94 (sym_type, this_routine) = symbol_info

	95 sym_value = ''

	96 elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16:

	97 (sym_value, this_routine) = symbol_info

	98 sym_type = ' '

	99 else:

	100 raise ParsingException('Invalid output 1 from (eu-)nm.')

	101 else:

	102 raise ParsingException('Invalid output 2 from (eu-)nm.')

	103

	104 if sym_value == '':

	105 continue

	106

	107 start_val = int(sym_value, 16)

	108

	109 # It's possible for two symbols to share the same address, if

	110 # one is a zero-length variable (like __start_google_malloc) or

	111 # one symbol is a weak alias to another (like __libc_malloc).

	112 # In such cases, we want to ignore all values except for the

	113 # actual symbol, which in nm-speak has type "T". The logic

	114 # below does this, though it's a bit tricky: what happens when

	115 # we have a series of lines with the same address, is the first

	116 # one gets queued up to be processed. However, it won't

	117 # actually be processed until later, when we read a line with

	118 # a different address. That means that as long as we're reading

	119 # lines with the same address, we have a chance to replace that

	120 # item in the queue, which we do whenever we see a 'T' entry --

	121 # that is, a line with type 'T'. If we never see a 'T' entry,

	122 # we'll just go ahead and process the first entry (which never

	123 # got touched in the queue), and ignore the others.

	124 if start_val == last_start and (sym_type == 't' or sym_type == 'T'):

	125 # We are the 'T' symbol at this address, replace previous symbol.

	126 routine = this_routine

	127 continue

	128 elif start_val == last_start:

	129 # We're not the 'T' symbol at this address, so ignore us.

	130 continue

	131

	132 # Tag this routine with the starting address in case the image

	133 # has multiple occurrences of this routine. We use a syntax

	134 # that resembles template paramters that are automatically

	135 # stripped out by ShortFunctionName()

	136 this_routine += "<%016x>" % start_val

	137

	138 if not mangled:

	139 routine = _get_short_function_name(routine)

	140 symbol_table.append(ProcedureBoundary(last_start, start_val, routine))

	141

	142 last_start = start_val

	143 routine = this_routine

	144

	145 if not mangled:

	146 routine = _get_short_function_name(routine)

	147 symbol_table.append(ProcedureBoundary(last_start, last_start, routine))

	148 return symbol_table

OLD	NEW

« no previous file with comments | « tools/find_runtime_symbols/prepare_symbol_info.py ('k') | tools/find_runtime_symbols/util.py » ('j') | no next file with comments »