Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2017 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Functions that rely on parsing output of "nm" tool.""" | |
| 6 | |
| 7 import collections | |
| 8 import logging | |
| 9 import os | |
| 10 import subprocess | |
| 11 import sys | |
| 12 | |
| 13 import concurrent | |
| 14 | |
| 15 | |
| 16 def CollectAliasesByAddress(elf_path, tool_prefix): | |
| 17 """Runs nm on |elf_path| and returns a dict of address->[names]""" | |
| 18 names_by_address = collections.defaultdict(list) | |
| 19 | |
| 20 # About 60mb of output, but piping takes ~30s, and loading it into RAM | |
| 21 # directly takes 3s. | |
| 22 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle', | |
| 23 elf_path] | |
| 24 output = subprocess.check_output(args) | |
| 25 for line in output.splitlines(): | |
| 26 address_str, section, name = line.split(' ', 2) | |
| 27 if section not in 'tT' or not name or name[0] == '$': | |
|
estevenson
2017/04/28 17:06:11
should readonly symbols be included in this?
agrieve
2017/04/28 19:26:59
Added a comment. Looks like no.
| |
| 28 continue | |
| 29 | |
| 30 address = int(address_str, 16) & 0xfffffffffffffffe | |
|
estevenson
2017/04/28 17:06:11
This needs a comment
agrieve
2017/04/28 19:26:59
Turns out it wasn't needed :P
| |
| 31 if not address: | |
| 32 continue | |
| 33 # Constructors often show up twice. | |
| 34 name_list = names_by_address[address] | |
| 35 if name not in name_list: | |
| 36 name_list.append(name) | |
| 37 | |
| 38 # Since this is run in a separate process, minimize data passing by returning | |
| 39 # only aliased symbols. | |
| 40 names_by_address = {k: v for k, v in names_by_address.iteritems() | |
| 41 if len(v) > 1} | |
| 42 | |
| 43 return names_by_address | |
| 44 | |
| 45 | |
| 46 def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix): | |
| 47 result = CollectAliasesByAddress(elf_path, tool_prefix) | |
| 48 return concurrent.EncodeDictOfLists(result, key_transform=str) | |
| 49 | |
| 50 | |
| 51 def CollectAliasesByAddressAsync(elf_path, tool_prefix): | |
| 52 """Calls CollectAliasesByAddress in a helper process. Returns a Result.""" | |
| 53 def decode(encoded): | |
| 54 return concurrent.DecodeDictOfLists( | |
| 55 encoded[0], encoded[1], key_transform=int) | |
| 56 return concurrent.ForkAndCall( | |
| 57 _CollectAliasesByAddressAsyncHelper, (elf_path, tool_prefix), | |
| 58 decode_func=decode) | |
| 59 | |
| 60 | |
| 61 def _ParseOneObjectFileOutput(lines): | |
| 62 ret = [] | |
| 63 for line in lines: | |
| 64 if not line: | |
| 65 break | |
| 66 sep = line.find(' ') # Skip over address. | |
| 67 sep = line.find(' ', sep + 1) # Skip over symbol type. | |
| 68 name = line[sep + 1:] | |
| 69 # Skip lines like: | |
| 70 # 00000000 t $t | |
| 71 # 00000000 r $d | |
| 72 # 0000041b r .L.str.38 | |
| 73 if name[0] not in '$.': | |
| 74 ret.append(name) | |
| 75 return ret | |
| 76 | |
| 77 | |
| 78 def _BatchCollectNames(target, tool_prefix, output_directory): | |
| 79 is_archive = isinstance(target, basestring) | |
| 80 # Ensure tool_prefix is absolute so that CWD does not affect it | |
| 81 if os.path.sep in tool_prefix: | |
| 82 # Use abspath() on the dirname to avoid it stripping a trailing /. | |
| 83 dirname = os.path.dirname(tool_prefix) | |
| 84 tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):] | |
| 85 | |
| 86 args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle'] | |
| 87 if is_archive: | |
| 88 args.append(target) | |
| 89 else: | |
| 90 args.extend(target) | |
| 91 output = subprocess.check_output(args, cwd=output_directory) | |
| 92 lines = output.splitlines() | |
| 93 if not lines: | |
| 94 return '', '' | |
| 95 is_multi_file = not lines[0] | |
| 96 lines = iter(lines) | |
| 97 if is_multi_file: | |
| 98 next(lines) | |
| 99 path = next(lines)[:-1] # Path ends with a colon. | |
| 100 else: | |
| 101 assert not is_archive | |
| 102 path = target[0] | |
| 103 | |
| 104 ret = {} | |
| 105 while True: | |
| 106 if is_archive: | |
| 107 # E.g. foo/bar.a(baz.o) | |
| 108 path = '%s(%s)' % (target, path) | |
| 109 # The multiprocess API uses pickle, which is ridiculously slow. More than 2x | |
| 110 # faster to use join & split. | |
| 111 ret[path] = _ParseOneObjectFileOutput(lines) | |
| 112 path = next(lines, ':')[:-1] | |
| 113 if not path: | |
| 114 return concurrent.EncodeDictOfLists(ret) | |
| 115 | |
| 116 | |
| 117 class _BulkObjectFileAnalyzerWorker(object): | |
| 118 """Runs nm on all given paths and returns a dict of name->[paths]""" | |
| 119 | |
| 120 def __init__(self, tool_prefix, output_directory): | |
| 121 self._tool_prefix = tool_prefix | |
| 122 self._output_directory = output_directory | |
| 123 self._batches = [] | |
| 124 self._result = None | |
| 125 | |
| 126 def AnalyzePaths(self, paths): | |
| 127 def iter_job_params(): | |
| 128 object_paths = [] | |
| 129 for path in paths: | |
| 130 if path.endswith('.a'): | |
| 131 yield path, self._tool_prefix, self._output_directory | |
| 132 else: | |
| 133 object_paths.append(path) | |
| 134 | |
| 135 BATCH_SIZE = 50 # Chosen arbitrarily. | |
| 136 for i in xrange(0, len(object_paths), BATCH_SIZE): | |
| 137 batch = object_paths[i:i + BATCH_SIZE] | |
| 138 yield batch, self._tool_prefix, self._output_directory | |
| 139 | |
| 140 paths_by_name = collections.defaultdict(list) | |
| 141 params = list(iter_job_params()) | |
| 142 for encoded_ret in concurrent.BulkForkAndCall(_BatchCollectNames, params): | |
| 143 names_by_path = concurrent.DecodeDictOfLists(*encoded_ret) | |
| 144 for path, names in names_by_path.iteritems(): | |
| 145 for name in names: | |
| 146 paths_by_name[name].append(path) | |
| 147 self._batches.append(paths_by_name) | |
| 148 | |
| 149 def Close(self): | |
| 150 assert self._result is None | |
| 151 assert self._batches | |
| 152 paths_by_name = self._batches[0] | |
| 153 for batch in self._batches[1:]: | |
| 154 for name, path_list in batch.iteritems(): | |
| 155 paths_by_name.setdefault(name, []).extend(path_list) | |
| 156 | |
| 157 # It would speed up mashalling of the values by removing all entries | |
| 158 # that have only 1 path. However, these entries are needed to give | |
| 159 # path information to symbol aliases. | |
| 160 self._result = paths_by_name | |
| 161 | |
| 162 def Get(self): | |
| 163 assert self._result is not None | |
| 164 return self._result | |
| 165 | |
| 166 | |
| 167 class _BulkObjectFileAnalyzerMaster(object): | |
| 168 """Runs BulkObjectFileAnalyzer in a subprocess.""" | |
| 169 | |
| 170 def __init__(self, tool_prefix, output_directory): | |
| 171 self._process = None | |
| 172 self._tool_prefix = tool_prefix | |
| 173 self._output_directory = output_directory | |
| 174 | |
| 175 def _Spawn(self): | |
| 176 log_level = str(logging.getLogger().getEffectiveLevel()) | |
| 177 args = [sys.executable, __file__, log_level, self._tool_prefix, | |
| 178 self._output_directory] | |
| 179 self._process = subprocess.Popen( | |
| 180 args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) | |
| 181 | |
| 182 def AnalyzePaths(self, paths): | |
| 183 if self._process is None: | |
| 184 self._Spawn() | |
| 185 | |
| 186 logging.debug('Sending batch of %d paths to subprocess', len(paths)) | |
| 187 payload = '\x01'.join(paths) | |
| 188 self._process.stdin.write('{:08x}'.format(len(payload))) | |
| 189 self._process.stdin.write(payload) | |
| 190 | |
| 191 def Close(self): | |
| 192 assert not self._process.stdin.closed | |
| 193 self._process.stdin.close() | |
| 194 | |
| 195 def Get(self): | |
| 196 assert self._process.stdin.closed | |
| 197 logging.debug('Decoding nm results from forked process') | |
| 198 | |
| 199 encoded_keys_len = int(self._process.stdout.read(8), 16) | |
| 200 encoded_keys = self._process.stdout.read(encoded_keys_len) | |
| 201 encoded_values = self._process.stdout.read() | |
| 202 return concurrent.DecodeDictOfLists(encoded_keys, encoded_values) | |
| 203 | |
| 204 | |
| 205 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerMaster | |
| 206 if concurrent.DISABLE_ASYNC: | |
| 207 BulkObjectFileAnalyzer = _BulkObjectFileAnalyzerWorker | |
| 208 | |
| 209 | |
| 210 def _SubMain(log_level, tool_prefix, output_directory): | |
| 211 logging.basicConfig(level=int(log_level), | |
| 212 format='%(levelname).1s %(relativeCreated)6d %(message)s') | |
| 213 bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory) | |
| 214 while True: | |
| 215 payload_len = int(sys.stdin.read(8) or '0', 16) | |
| 216 if not payload_len: | |
| 217 logging.debug('nm bulk subprocess received eof.') | |
| 218 break | |
| 219 paths = sys.stdin.read(payload_len).split('\x01') | |
| 220 bulk_analyzer.AnalyzePaths(paths) | |
| 221 | |
| 222 bulk_analyzer.Close() | |
| 223 paths_by_name = bulk_analyzer.Get() | |
| 224 encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name) | |
| 225 sys.stdout.write('%08x' % len(encoded_keys)) | |
| 226 sys.stdout.write(encoded_keys) | |
| 227 sys.stdout.write(encoded_values) | |
| 228 logging.debug('nm bulk subprocess finished.') | |
| 229 | |
| 230 | |
| 231 if __name__ == '__main__': | |
| 232 _SubMain(*sys.argv[1:]) | |
| OLD | NEW |