OLD | NEW |
(Empty) | |
| 1 # Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. |
| 4 |
| 5 """Functions that rely on parsing output of "nm" tool.""" |
| 6 |
| 7 import collections |
| 8 import logging |
| 9 import os |
| 10 import subprocess |
| 11 import sys |
| 12 |
| 13 import concurrent |
| 14 |
| 15 |
| 16 def CollectAliasesByAddress(elf_path, tool_prefix): |
| 17 """Runs nm on |elf_path| and returns a dict of address->[names]""" |
| 18 names_by_address = collections.defaultdict(list) |
| 19 |
| 20 # About 60mb of output, but piping takes ~30s, and loading it into RAM |
| 21 # directly takes 3s. |
| 22 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle', |
| 23 elf_path] |
| 24 output = subprocess.check_output(args) |
| 25 for line in output.splitlines(): |
| 26 address_str, section, name = line.split(' ', 2) |
| 27 # To verify that rodata does not have aliases: |
| 28 # nm --no-sort --defined-only libchrome.so > nm.out |
| 29 # grep -v '\$' nm.out | grep ' r ' | sort | cut -d' ' -f1 > addrs |
| 30 # wc -l < addrs; uniq < addrs | wc -l |
| 31 if section not in 'tT' or not name or name[0] == '$': |
| 32 continue |
| 33 |
| 34 address = int(address_str, 16) |
| 35 if not address: |
| 36 continue |
| 37 # Constructors often show up twice. |
| 38 name_list = names_by_address[address] |
| 39 if name not in name_list: |
| 40 name_list.append(name) |
| 41 |
| 42 # Since this is run in a separate process, minimize data passing by returning |
| 43 # only aliased symbols. |
| 44 names_by_address = {k: v for k, v in names_by_address.iteritems() |
| 45 if len(v) > 1} |
| 46 |
| 47 return names_by_address |
| 48 |
| 49 |
| 50 def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix): |
| 51 result = CollectAliasesByAddress(elf_path, tool_prefix) |
| 52 return concurrent.EncodeDictOfLists(result, key_transform=str) |
| 53 |
| 54 |
| 55 def CollectAliasesByAddressAsync(elf_path, tool_prefix): |
| 56 """Calls CollectAliasesByAddress in a helper process. Returns a Result.""" |
| 57 def decode(encoded): |
| 58 return concurrent.DecodeDictOfLists( |
| 59 encoded[0], encoded[1], key_transform=int) |
| 60 return concurrent.ForkAndCall( |
| 61 _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix), |
| 62 decode_func=decode) |
| 63 |
| 64 |
| 65 def _ParseOneObjectFileOutput(lines): |
| 66 ret = [] |
| 67 for line in lines: |
| 68 if not line: |
| 69 break |
| 70 sep = line.find(' ') # Skip over address. |
| 71 sep = line.find(' ', sep + 1) # Skip over symbol type. |
| 72 name = line[sep + 1:] |
| 73 # Skip lines like: |
| 74 # 00000000 t $t |
| 75 # 00000000 r $d |
| 76 # 0000041b r .L.str.38 |
| 77 if name[0] not in '$.': |
| 78 ret.append(name) |
| 79 return ret |
| 80 |
| 81 |
| 82 def _BatchCollectNames(target, tool_prefix, output_directory): |
| 83 is_archive = isinstance(target, basestring) |
| 84 # Ensure tool_prefix is absolute so that CWD does not affect it |
| 85 if os.path.sep in tool_prefix: |
| 86 # Use abspath() on the dirname to avoid it stripping a trailing /. |
| 87 dirname = os.path.dirname(tool_prefix) |
| 88 tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):] |
| 89 |
| 90 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle'] |
| 91 if is_archive: |
| 92 args.append(target) |
| 93 else: |
| 94 args.extend(target) |
| 95 output = subprocess.check_output(args, cwd=output_directory) |
| 96 lines = output.splitlines() |
| 97 if not lines: |
| 98 return '', '' |
| 99 is_multi_file = not lines[0] |
| 100 lines = iter(lines) |
| 101 if is_multi_file: |
| 102 next(lines) |
| 103 path = next(lines)[:-1] # Path ends with a colon. |
| 104 else: |
| 105 assert not is_archive |
| 106 path = target[0] |
| 107 |
| 108 ret = {} |
| 109 while True: |
| 110 if is_archive: |
| 111 # E.g. foo/bar.a(baz.o) |
| 112 path = '%s(%s)' % (target, path) |
| 113 # The multiprocess API uses pickle, which is ridiculously slow. More than 2x |
| 114 # faster to use join & split. |
| 115 ret[path] = _ParseOneObjectFileOutput(lines) |
| 116 path = next(lines, ':')[:-1] |
| 117 if not path: |
| 118 return concurrent.EncodeDictOfLists(ret) |
| 119 |
| 120 |
| 121 class _BulkObjectFileAnalyzerWorker(object): |
| 122 """Runs nm on all given paths and returns a dict of name->[paths]""" |
| 123 |
| 124 def __init__(self, tool_prefix, output_directory): |
| 125 self._tool_prefix = tool_prefix |
| 126 self._output_directory = output_directory |
| 127 self._batches = [] |
| 128 self._result = None |
| 129 |
| 130 def AnalyzePaths(self, paths): |
| 131 def iter_job_params(): |
| 132 object_paths = [] |
| 133 for path in paths: |
| 134 if path.endswith('.a'): |
| 135 yield path, self._tool_prefix, self._output_directory |
| 136 else: |
| 137 object_paths.append(path) |
| 138 |
| 139 BATCH_SIZE = 50 # Chosen arbitrarily. |
| 140 for i in xrange(0, len(object_paths), BATCH_SIZE): |
| 141 batch = object_paths[i:i + BATCH_SIZE] |
| 142 yield batch, self._tool_prefix, self._output_directory |
| 143 |
| 144 paths_by_name = collections.defaultdict(list) |
| 145 params = list(iter_job_params()) |
| 146 for encoded_ret in concurrent.BulkForkAndCall(_BatchCollectNames, params): |
| 147 names_by_path = concurrent.DecodeDictOfLists(*encoded_ret) |
| 148 for path, names in names_by_path.iteritems(): |
| 149 for name in names: |
| 150 paths_by_name[name].append(path) |
| 151 self._batches.append(paths_by_name) |
| 152 |
| 153 def Close(self): |
| 154 assert self._result is None |
| 155 assert self._batches |
| 156 paths_by_name = self._batches[0] |
| 157 for batch in self._batches[1:]: |
| 158 for name, path_list in batch.iteritems(): |
| 159 paths_by_name.setdefault(name, []).extend(path_list) |
| 160 |
| 161 # It would speed up mashalling of the values by removing all entries |
| 162 # that have only 1 path. However, these entries are needed to give |
| 163 # path information to symbol aliases. |
| 164 self._result = paths_by_name |
| 165 |
| 166 def Get(self): |
| 167 assert self._result is not None |
| 168 return self._result |
| 169 |
| 170 |
| 171 class _BulkObjectFileAnalyzerMaster(object): |
| 172 """Runs BulkObjectFileAnalyzer in a subprocess.""" |
| 173 |
| 174 def __init__(self, tool_prefix, output_directory): |
| 175 self._process = None |
| 176 self._tool_prefix = tool_prefix |
| 177 self._output_directory = output_directory |
| 178 |
| 179 def _Spawn(self): |
| 180 log_level = str(logging.getLogger().getEffectiveLevel()) |
| 181 args = [sys.executable, __file__, log_level, self._tool_prefix, |
| 182 self._output_directory] |
| 183 self._process = subprocess.Popen( |
| 184 args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 185 |
| 186 def AnalyzePaths(self, paths): |
| 187 if self._process is None: |
| 188 self._Spawn() |
| 189 |
| 190 logging.debug('Sending batch of %d paths to subprocess', len(paths)) |
| 191 payload = '\x01'.join(paths) |
| 192 self._process.stdin.write('{:08x}'.format(len(payload))) |
| 193 self._process.stdin.write(payload) |
| 194 |
| 195 def Close(self): |
| 196 assert not self._process.stdin.closed |
| 197 self._process.stdin.close() |
| 198 |
| 199 def Get(self): |
| 200 assert self._process.stdin.closed |
| 201 logging.debug('Decoding nm results from forked process') |
| 202 |
| 203 encoded_keys_len = int(self._process.stdout.read(8), 16) |
| 204 encoded_keys = self._process.stdout.read(encoded_keys_len) |
| 205 encoded_values = self._process.stdout.read() |
| 206 return concurrent.DecodeDictOfLists(encoded_keys, encoded_values) |
| 207 |
| 208 |
| 209 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster |
| 210 if concurrent.DISABLE_ASYNC: |
| 211 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker |
| 212 |
| 213 |
| 214 def _SubMain(log_level, tool_prefix, output_directory): |
| 215 logging.basicConfig(level=int(log_level), |
| 216 format='%(levelname).1s %(relativeCreated)6d %(message)s') |
| 217 bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory) |
| 218 while True: |
| 219 payload_len = int(sys.stdin.read(8) or '0', 16) |
| 220 if not payload_len: |
| 221 logging.debug('nm bulk subprocess received eof.') |
| 222 break |
| 223 paths = sys.stdin.read(payload_len).split('\x01') |
| 224 bulk_analyzer.AnalyzePaths(paths) |
| 225 |
| 226 bulk_analyzer.Close() |
| 227 paths_by_name = bulk_analyzer.Get() |
| 228 encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name) |
| 229 sys.stdout.write('%08x' % len(encoded_keys)) |
| 230 sys.stdout.write(encoded_keys) |
| 231 sys.stdout.write(encoded_values) |
| 232 logging.debug('nm bulk subprocess finished.') |
| 233 |
| 234 |
| 235 if __name__ == '__main__': |
| 236 _SubMain(*sys.argv[1:]) |
OLD | NEW |