OLD | NEW |
(Empty) | |
| 1 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 import bisect |
| 6 import os |
| 7 import re |
| 8 import sys |
| 9 |
| 10 |
| 11 _ARGUMENT_TYPE_PATTERN = re.compile('\([^()]*\)(\s*const)?') |
| 12 _TEMPLATE_ARGUMENT_PATTERN = re.compile('<[^<>]*>') |
| 13 _LEADING_TYPE_PATTERN = re.compile('^.*\s+(\w+::)') |
| 14 |
| 15 |
| 16 class ParsingException(Exception): |
| 17 def __str__(self): |
| 18 return repr(self.args[0]) |
| 19 |
| 20 |
| 21 class ProcedureBoundary(object): |
| 22 """A class for a procedure symbol and an address range for the symbol.""" |
| 23 |
| 24 def __init__(self, start, end, name): |
| 25 self.start = start |
| 26 self.end = end |
| 27 self.name = name |
| 28 |
| 29 |
| 30 class ProcedureBoundaryTable(object): |
| 31 """A class of a set of ProcedureBoundary.""" |
| 32 |
| 33 def __init__(self): |
| 34 self.sorted_value_list = [] |
| 35 self.dictionary = {} |
| 36 self.sorted = True |
| 37 |
| 38 def append(self, entry): |
| 39 if self.sorted_value_list: |
| 40 if self.sorted_value_list[-1] > entry.start: |
| 41 self.sorted = False |
| 42 elif self.sorted_value_list[-1] == entry.start: |
| 43 return |
| 44 self.sorted_value_list.append(entry.start) |
| 45 self.dictionary[entry.start] = entry |
| 46 |
| 47 def find_procedure(self, address): |
| 48 if not self.sorted: |
| 49 self.sorted_value_list.sort() |
| 50 self.sorted = True |
| 51 found_index = bisect.bisect_left(self.sorted_value_list, address) |
| 52 found_start_address = self.sorted_value_list[found_index - 1] |
| 53 return self.dictionary[found_start_address] |
| 54 |
| 55 |
| 56 def _get_short_function_name(function): |
| 57 while True: |
| 58 function, number = _ARGUMENT_TYPE_PATTERN.subn('', function) |
| 59 if not number: |
| 60 break |
| 61 while True: |
| 62 function, number = _TEMPLATE_ARGUMENT_PATTERN.subn('', function) |
| 63 if not number: |
| 64 break |
| 65 return _LEADING_TYPE_PATTERN.sub('\g<1>', function) |
| 66 |
| 67 |
| 68 def get_procedure_boundaries_from_nm_bsd(f, mangled=False): |
| 69 """Gets procedure boundaries from a result of nm -n --format bsd. |
| 70 |
| 71 Args: |
| 72 f: A file object containing a result of nm. It must be sorted and |
| 73 in BSD-style. (Use "[eu-]nm -n --format bsd") |
| 74 |
| 75 Returns: |
| 76 A result ProcedureBoundaryTable object. |
| 77 """ |
| 78 symbol_table = ProcedureBoundaryTable() |
| 79 |
| 80 last_start = 0 |
| 81 routine = '' |
| 82 |
| 83 for line in f: |
| 84 symbol_info = line.rstrip().split(None, 2) |
| 85 if len(symbol_info) == 3: |
| 86 if len(symbol_info[0]) == 1: |
| 87 symbol_info = line.split(None, 1) |
| 88 (sym_type, this_routine) = symbol_info |
| 89 sym_value = '' |
| 90 else: |
| 91 (sym_value, sym_type, this_routine) = symbol_info |
| 92 elif len(symbol_info) == 2: |
| 93 if len(symbol_info[0]) == 1: |
| 94 (sym_type, this_routine) = symbol_info |
| 95 sym_value = '' |
| 96 elif len(symbol_info[0]) == 8 or len(symbol_info[0]) == 16: |
| 97 (sym_value, this_routine) = symbol_info |
| 98 sym_type = ' ' |
| 99 else: |
| 100 raise ParsingException('Invalid output 1 from (eu-)nm.') |
| 101 else: |
| 102 raise ParsingException('Invalid output 2 from (eu-)nm.') |
| 103 |
| 104 if sym_value == '': |
| 105 continue |
| 106 |
| 107 start_val = int(sym_value, 16) |
| 108 |
| 109 # It's possible for two symbols to share the same address, if |
| 110 # one is a zero-length variable (like __start_google_malloc) or |
| 111 # one symbol is a weak alias to another (like __libc_malloc). |
| 112 # In such cases, we want to ignore all values except for the |
| 113 # actual symbol, which in nm-speak has type "T". The logic |
| 114 # below does this, though it's a bit tricky: what happens when |
| 115 # we have a series of lines with the same address, is the first |
| 116 # one gets queued up to be processed. However, it won't |
| 117 # *actually* be processed until later, when we read a line with |
| 118 # a different address. That means that as long as we're reading |
| 119 # lines with the same address, we have a chance to replace that |
| 120 # item in the queue, which we do whenever we see a 'T' entry -- |
| 121 # that is, a line with type 'T'. If we never see a 'T' entry, |
| 122 # we'll just go ahead and process the first entry (which never |
| 123 # got touched in the queue), and ignore the others. |
| 124 if start_val == last_start and (sym_type == 't' or sym_type == 'T'): |
| 125 # We are the 'T' symbol at this address, replace previous symbol. |
| 126 routine = this_routine |
| 127 continue |
| 128 elif start_val == last_start: |
| 129 # We're not the 'T' symbol at this address, so ignore us. |
| 130 continue |
| 131 |
| 132 # Tag this routine with the starting address in case the image |
| 133 # has multiple occurrences of this routine. We use a syntax |
| 134 # that resembles template paramters that are automatically |
| 135 # stripped out by ShortFunctionName() |
| 136 this_routine += "<%016x>" % start_val |
| 137 |
| 138 if not mangled: |
| 139 routine = _get_short_function_name(routine) |
| 140 symbol_table.append(ProcedureBoundary(last_start, start_val, routine)) |
| 141 |
| 142 last_start = start_val |
| 143 routine = this_routine |
| 144 |
| 145 if not mangled: |
| 146 routine = _get_short_function_name(routine) |
| 147 symbol_table.append(ProcedureBoundary(last_start, last_start, routine)) |
| 148 return symbol_table |
OLD | NEW |