OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 |
| 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # found in the LICENSE file. |
| 6 |
| 7 from third_party import asan_symbolize |
| 8 |
| 9 import argparse |
| 10 import base64 |
| 11 import json |
| 12 import os |
| 13 import platform |
| 14 import re |
| 15 import subprocess |
| 16 import sys |
| 17 |
| 18 class LineBuffered(object): |
| 19 """Disable buffering on a file object.""" |
| 20 def __init__(self, stream): |
| 21 self.stream = stream |
| 22 |
| 23 def write(self, data): |
| 24 self.stream.write(data) |
| 25 if '\n' in data: |
| 26 self.stream.flush() |
| 27 |
| 28 def __getattr__(self, attr): |
| 29 return getattr(self.stream, attr) |
| 30 |
| 31 |
| 32 def disable_buffering(): |
| 33 """Makes this process and child processes stdout unbuffered.""" |
| 34 if not os.environ.get('PYTHONUNBUFFERED'): |
| 35 # Since sys.stdout is a C++ object, it's impossible to do |
| 36 # sys.stdout.write = lambda... |
| 37 sys.stdout = LineBuffered(sys.stdout) |
| 38 os.environ['PYTHONUNBUFFERED'] = 'x' |
| 39 |
| 40 |
| 41 def set_symbolizer_path(): |
| 42 """Set the path to the llvm-symbolize binary in the Chromium source tree.""" |
| 43 if not os.environ.get('LLVM_SYMBOLIZER_PATH'): |
| 44 script_dir = os.path.dirname(os.path.abspath(__file__)) |
| 45 # Assume this script resides three levels below src/ (i.e. |
| 46 # src/tools/valgrind/asan/). |
| 47 src_root = os.path.join(script_dir, "..", "..", "..") |
| 48 symbolizer_path = os.path.join(src_root, 'third_party', |
| 49 'llvm-build', 'Release+Asserts', 'bin', 'llvm-symbolizer') |
| 50 assert(os.path.isfile(symbolizer_path)) |
| 51 os.environ['LLVM_SYMBOLIZER_PATH'] = os.path.abspath(symbolizer_path) |
| 52 |
| 53 |
| 54 def is_hash_name(name): |
| 55 match = re.match('[0-9a-f]+$', name) |
| 56 return bool(match) |
| 57 |
| 58 |
| 59 def split_path(path): |
| 60 ret = [] |
| 61 while True: |
| 62 head, tail = os.path.split(path) |
| 63 if head == path: |
| 64 return [head] + ret |
| 65 ret, path = [tail] + ret, head |
| 66 |
| 67 |
| 68 def chrome_product_dir_path(exe_path): |
| 69 if exe_path is None: |
| 70 return None |
| 71 path_parts = split_path(exe_path) |
| 72 # Make sure the product dir path isn't empty if |exe_path| consists of |
| 73 # a single component. |
| 74 if len(path_parts) == 1: |
| 75 path_parts = ['.'] + path_parts |
| 76 for index, part in enumerate(path_parts): |
| 77 if part.endswith('.app'): |
| 78 return os.path.join(*path_parts[:index]) |
| 79 # If the executable isn't an .app bundle, it's a commandline binary that |
| 80 # resides right in the product dir. |
| 81 return os.path.join(*path_parts[:-1]) |
| 82 |
| 83 |
| 84 inode_path_cache = {} |
| 85 |
| 86 |
| 87 def find_inode_at_path(inode, path): |
| 88 if inode in inode_path_cache: |
| 89 return inode_path_cache[inode] |
| 90 cmd = ['find', path, '-inum', str(inode)] |
| 91 find_line = subprocess.check_output(cmd).rstrip() |
| 92 lines = find_line.split('\n') |
| 93 ret = None |
| 94 if lines: |
| 95 # `find` may give us several paths (e.g. 'Chromium Framework' in the |
| 96 # product dir and 'Chromium Framework' inside 'Chromium.app', |
| 97 # chrome_dsym_hints() will produce correct .dSYM path for any of them. |
| 98 ret = lines[0] |
| 99 inode_path_cache[inode] = ret |
| 100 return ret |
| 101 |
| 102 |
| 103 # Create a binary name filter that works around https://crbug.com/444835. |
| 104 # When running tests on OSX swarming servers, ASan sometimes prints paths to |
| 105 # files in cache (ending with SHA1 filenames) instead of paths to hardlinks to |
| 106 # those files in the product dir. |
| 107 # For a given |binary_path| chrome_osx_binary_name_filter() returns one of the |
| 108 # hardlinks to the same inode in |product_dir_path|. |
| 109 def make_chrome_osx_binary_name_filter(product_dir_path=''): |
| 110 def chrome_osx_binary_name_filter(binary_path): |
| 111 basename = os.path.basename(binary_path) |
| 112 if is_hash_name(basename) and product_dir_path: |
| 113 inode = os.stat(binary_path).st_ino |
| 114 new_binary_path = find_inode_at_path(inode, product_dir_path) |
| 115 if new_binary_path: |
| 116 return new_binary_path |
| 117 return binary_path |
| 118 return chrome_osx_binary_name_filter |
| 119 |
| 120 |
| 121 # Construct a path to the .dSYM bundle for the given binary. |
| 122 # There are three possible cases for binary location in Chromium: |
| 123 # 1. The binary is a standalone executable or dynamic library in the product |
| 124 # dir, the debug info is in "binary.dSYM" in the product dir. |
| 125 # 2. The binary is a standalone framework or .app bundle, the debug info is in |
| 126 # "Framework.framework.dSYM" or "App.app.dSYM" in the product dir. |
| 127 # 3. The binary is a framework or an .app bundle within another .app bundle |
| 128 # (e.g. Outer.app/Contents/Versions/1.2.3.4/Inner.app), and the debug info |
| 129 # is in Inner.app.dSYM in the product dir. |
| 130 # The first case is handled by llvm-symbolizer, so we only need to construct |
| 131 # .dSYM paths for .app bundles and frameworks. |
| 132 # We're assuming that there're no more than two nested bundles in the binary |
| 133 # path. Only one of these bundles may be a framework and frameworks cannot |
| 134 # contain other bundles. |
| 135 def chrome_dsym_hints(binary): |
| 136 path_parts = split_path(binary) |
| 137 app_positions = [] |
| 138 framework_positions = [] |
| 139 for index, part in enumerate(path_parts): |
| 140 if part.endswith('.app'): |
| 141 app_positions.append(index) |
| 142 elif part.endswith('.framework'): |
| 143 framework_positions.append(index) |
| 144 bundle_positions = app_positions + framework_positions |
| 145 bundle_positions.sort() |
| 146 assert len(bundle_positions) <= 2, \ |
| 147 "The path contains more than two nested bundles: %s" % binary |
| 148 if len(bundle_positions) == 0: |
| 149 # Case 1: this is a standalone executable or dylib. |
| 150 return [] |
| 151 assert (not (len(app_positions) == 1 and |
| 152 len(framework_positions) == 1 and |
| 153 app_positions[0] > framework_positions[0])), \ |
| 154 "The path contains an app bundle inside a framework: %s" % binary |
| 155 # Cases 2 and 3. The outermost bundle (which is the only bundle in the case 2) |
| 156 # is located in the product dir. |
| 157 outermost_bundle = bundle_positions[0] |
| 158 product_dir = path_parts[:outermost_bundle] |
| 159 # In case 2 this is the same as |outermost_bundle|. |
| 160 innermost_bundle = bundle_positions[-1] |
| 161 dsym_path = product_dir + [path_parts[innermost_bundle]] |
| 162 result = '%s.dSYM' % os.path.join(*dsym_path) |
| 163 return [result] |
| 164 |
| 165 |
| 166 # We want our output to match base::EscapeJSONString(), which produces |
| 167 # doubly-escaped strings. The first escaping pass is handled by this class. The |
| 168 # second pass happens when JSON data is dumped to file. |
| 169 class StringEncoder(json.JSONEncoder): |
| 170 def __init__(self): |
| 171 json.JSONEncoder.__init__(self) |
| 172 |
| 173 def encode(self, s): |
| 174 assert(isinstance(s, basestring)) |
| 175 encoded = json.JSONEncoder.encode(self, s) |
| 176 assert(len(encoded) >= 2) |
| 177 assert(encoded[0] == '"') |
| 178 assert(encoded[-1] == '"') |
| 179 encoded = encoded[1:-1] |
| 180 # Special case from base::EscapeJSONString(). |
| 181 encoded = encoded.replace('<', '\u003C') |
| 182 return encoded |
| 183 |
| 184 |
| 185 class JSONTestRunSymbolizer(object): |
| 186 def __init__(self, symbolization_loop): |
| 187 self.string_encoder = StringEncoder() |
| 188 self.symbolization_loop = symbolization_loop |
| 189 |
| 190 def symbolize_snippet(self, snippet): |
| 191 symbolized_lines = [] |
| 192 for line in snippet.split('\n'): |
| 193 symbolized_lines += self.symbolization_loop.process_line(line) |
| 194 return '\n'.join(symbolized_lines) |
| 195 |
| 196 def symbolize(self, test_run): |
| 197 original_snippet = base64.b64decode(test_run['output_snippet_base64']) |
| 198 symbolized_snippet = self.symbolize_snippet(original_snippet) |
| 199 if symbolized_snippet == original_snippet: |
| 200 # No sanitizer reports in snippet. |
| 201 return |
| 202 |
| 203 test_run['original_output_snippet'] = test_run['output_snippet'] |
| 204 test_run['original_output_snippet_base64'] = \ |
| 205 test_run['output_snippet_base64'] |
| 206 |
| 207 escaped_snippet = StringEncoder().encode(symbolized_snippet) |
| 208 test_run['output_snippet'] = escaped_snippet |
| 209 test_run['output_snippet_base64'] = \ |
| 210 base64.b64encode(symbolized_snippet) |
| 211 test_run['snippet_processed_by'] = 'asan_symbolize.py' |
| 212 # Originally, "lossless" refers to "no Unicode data lost while encoding the |
| 213 # string". However, since we're applying another kind of transformation |
| 214 # (symbolization), it doesn't seem right to consider the snippet lossless. |
| 215 test_run['losless_snippet'] = False |
| 216 |
| 217 |
| 218 def symbolize_snippets_in_json(filename, symbolization_loop): |
| 219 with open(filename, 'r') as f: |
| 220 json_data = json.load(f) |
| 221 |
| 222 test_run_symbolizer = JSONTestRunSymbolizer(symbolization_loop) |
| 223 for iteration_data in json_data['per_iteration_data']: |
| 224 for test_name, test_runs in iteration_data.iteritems(): |
| 225 for test_run in test_runs: |
| 226 test_run_symbolizer.symbolize(test_run) |
| 227 |
| 228 with open(filename, 'w') as f: |
| 229 json.dump(json_data, f, indent=3, sort_keys=True) |
| 230 |
| 231 |
| 232 def main(): |
| 233 parser = argparse.ArgumentParser(description='Symbolize sanitizer reports.') |
| 234 parser.add_argument('--test-summary-json-file', |
| 235 help='Path to a JSON file produced by the test launcher. The script will ' |
| 236 'ignore stdandard input and instead symbolize the output stnippets ' |
| 237 'inside the JSON file. The result will be written back to the JSON ' |
| 238 'file.') |
| 239 parser.add_argument('strip_path_prefix', nargs='*', |
| 240 help='When printing source file names, the longest prefix ending in one ' |
| 241 'of these substrings will be stripped. E.g.: "Release/../../".') |
| 242 parser.add_argument('--executable-path', |
| 243 help='Path to program executable. Used on OSX swarming bots to locate ' |
| 244 'dSYM bundles for associated frameworks and bundles.') |
| 245 args = parser.parse_args() |
| 246 |
| 247 disable_buffering() |
| 248 set_symbolizer_path() |
| 249 asan_symbolize.demangle = True |
| 250 asan_symbolize.fix_filename_patterns = args.strip_path_prefix |
| 251 # Most source paths for Chromium binaries start with |
| 252 # /path/to/src/out/Release/../../ |
| 253 asan_symbolize.fix_filename_patterns.append('Release/../../') |
| 254 binary_name_filter = None |
| 255 if platform.uname()[0] == 'Darwin': |
| 256 binary_name_filter = make_chrome_osx_binary_name_filter( |
| 257 chrome_product_dir_path(args.executable_path)) |
| 258 loop = asan_symbolize.SymbolizationLoop( |
| 259 binary_name_filter=binary_name_filter, |
| 260 dsym_hint_producer=chrome_dsym_hints) |
| 261 |
| 262 if args.test_summary_json_file: |
| 263 symbolize_snippets_in_json(args.test_summary_json_file, loop) |
| 264 else: |
| 265 # Process stdin. |
| 266 asan_symbolize.logfile = sys.stdin |
| 267 loop.process_logfile() |
| 268 |
| 269 if __name__ == '__main__': |
| 270 main() |
OLD | NEW |