| Index: tools/valgrind/asan/asan_symbolize.py
|
| diff --git a/tools/valgrind/asan/asan_symbolize.py b/tools/valgrind/asan/asan_symbolize.py
|
| new file mode 100755
|
| index 0000000000000000000000000000000000000000..010740671aae94880bfaa87f659a55054ff983ff
|
| --- /dev/null
|
| +++ b/tools/valgrind/asan/asan_symbolize.py
|
| @@ -0,0 +1,270 @@
|
| +#!/usr/bin/env python
|
| +
|
| +# Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| +# Use of this source code is governed by a BSD-style license that can be
|
| +# found in the LICENSE file.
|
| +
|
| +from third_party import asan_symbolize
|
| +
|
| +import argparse
|
| +import base64
|
| +import json
|
| +import os
|
| +import platform
|
| +import re
|
| +import subprocess
|
| +import sys
|
| +
|
| +class LineBuffered(object):
|
| + """Disable buffering on a file object."""
|
| + def __init__(self, stream):
|
| + self.stream = stream
|
| +
|
| + def write(self, data):
|
| + self.stream.write(data)
|
| + if '\n' in data:
|
| + self.stream.flush()
|
| +
|
| + def __getattr__(self, attr):
|
| + return getattr(self.stream, attr)
|
| +
|
| +
|
| +def disable_buffering():
|
| + """Makes this process and child processes stdout unbuffered."""
|
| + if not os.environ.get('PYTHONUNBUFFERED'):
|
| + # Since sys.stdout is a C++ object, it's impossible to do
|
| + # sys.stdout.write = lambda...
|
| + sys.stdout = LineBuffered(sys.stdout)
|
| + os.environ['PYTHONUNBUFFERED'] = 'x'
|
| +
|
| +
|
| +def set_symbolizer_path():
|
| + """Set the path to the llvm-symbolize binary in the Chromium source tree."""
|
| + if not os.environ.get('LLVM_SYMBOLIZER_PATH'):
|
| + script_dir = os.path.dirname(os.path.abspath(__file__))
|
| + # Assume this script resides three levels below src/ (i.e.
|
| + # src/tools/valgrind/asan/).
|
| + src_root = os.path.join(script_dir, "..", "..", "..")
|
| + symbolizer_path = os.path.join(src_root, 'third_party',
|
| + 'llvm-build', 'Release+Asserts', 'bin', 'llvm-symbolizer')
|
| + assert(os.path.isfile(symbolizer_path))
|
| + os.environ['LLVM_SYMBOLIZER_PATH'] = os.path.abspath(symbolizer_path)
|
| +
|
| +
|
| +def is_hash_name(name):
|
| + match = re.match('[0-9a-f]+$', name)
|
| + return bool(match)
|
| +
|
| +
|
| +def split_path(path):
|
| + ret = []
|
| + while True:
|
| + head, tail = os.path.split(path)
|
| + if head == path:
|
| + return [head] + ret
|
| + ret, path = [tail] + ret, head
|
| +
|
| +
|
| +def chrome_product_dir_path(exe_path):
|
| + if exe_path is None:
|
| + return None
|
| + path_parts = split_path(exe_path)
|
| + # Make sure the product dir path isn't empty if |exe_path| consists of
|
| + # a single component.
|
| + if len(path_parts) == 1:
|
| + path_parts = ['.'] + path_parts
|
| + for index, part in enumerate(path_parts):
|
| + if part.endswith('.app'):
|
| + return os.path.join(*path_parts[:index])
|
| + # If the executable isn't an .app bundle, it's a commandline binary that
|
| + # resides right in the product dir.
|
| + return os.path.join(*path_parts[:-1])
|
| +
|
| +
|
| +inode_path_cache = {}
|
| +
|
| +
|
| +def find_inode_at_path(inode, path):
|
| + if inode in inode_path_cache:
|
| + return inode_path_cache[inode]
|
| + cmd = ['find', path, '-inum', str(inode)]
|
| + find_line = subprocess.check_output(cmd).rstrip()
|
| + lines = find_line.split('\n')
|
| + ret = None
|
| + if lines:
|
| + # `find` may give us several paths (e.g. 'Chromium Framework' in the
|
| + # product dir and 'Chromium Framework' inside 'Chromium.app',
|
| + # chrome_dsym_hints() will produce correct .dSYM path for any of them.
|
| + ret = lines[0]
|
| + inode_path_cache[inode] = ret
|
| + return ret
|
| +
|
| +
|
| +# Create a binary name filter that works around https://crbug.com/444835.
|
| +# When running tests on OSX swarming servers, ASan sometimes prints paths to
|
| +# files in cache (ending with SHA1 filenames) instead of paths to hardlinks to
|
| +# those files in the product dir.
|
| +# For a given |binary_path| chrome_osx_binary_name_filter() returns one of the
|
| +# hardlinks to the same inode in |product_dir_path|.
|
| +def make_chrome_osx_binary_name_filter(product_dir_path=''):
|
| + def chrome_osx_binary_name_filter(binary_path):
|
| + basename = os.path.basename(binary_path)
|
| + if is_hash_name(basename) and product_dir_path:
|
| + inode = os.stat(binary_path).st_ino
|
| + new_binary_path = find_inode_at_path(inode, product_dir_path)
|
| + if new_binary_path:
|
| + return new_binary_path
|
| + return binary_path
|
| + return chrome_osx_binary_name_filter
|
| +
|
| +
|
| +# Construct a path to the .dSYM bundle for the given binary.
|
| +# There are three possible cases for binary location in Chromium:
|
| +# 1. The binary is a standalone executable or dynamic library in the product
|
| +# dir, the debug info is in "binary.dSYM" in the product dir.
|
| +# 2. The binary is a standalone framework or .app bundle, the debug info is in
|
| +# "Framework.framework.dSYM" or "App.app.dSYM" in the product dir.
|
| +# 3. The binary is a framework or an .app bundle within another .app bundle
|
| +# (e.g. Outer.app/Contents/Versions/1.2.3.4/Inner.app), and the debug info
|
| +# is in Inner.app.dSYM in the product dir.
|
| +# The first case is handled by llvm-symbolizer, so we only need to construct
|
| +# .dSYM paths for .app bundles and frameworks.
|
| +# We're assuming that there're no more than two nested bundles in the binary
|
| +# path. Only one of these bundles may be a framework and frameworks cannot
|
| +# contain other bundles.
|
| +def chrome_dsym_hints(binary):
|
| + path_parts = split_path(binary)
|
| + app_positions = []
|
| + framework_positions = []
|
| + for index, part in enumerate(path_parts):
|
| + if part.endswith('.app'):
|
| + app_positions.append(index)
|
| + elif part.endswith('.framework'):
|
| + framework_positions.append(index)
|
| + bundle_positions = app_positions + framework_positions
|
| + bundle_positions.sort()
|
| + assert len(bundle_positions) <= 2, \
|
| + "The path contains more than two nested bundles: %s" % binary
|
| + if len(bundle_positions) == 0:
|
| + # Case 1: this is a standalone executable or dylib.
|
| + return []
|
| + assert (not (len(app_positions) == 1 and
|
| + len(framework_positions) == 1 and
|
| + app_positions[0] > framework_positions[0])), \
|
| + "The path contains an app bundle inside a framework: %s" % binary
|
| + # Cases 2 and 3. The outermost bundle (which is the only bundle in the case 2)
|
| + # is located in the product dir.
|
| + outermost_bundle = bundle_positions[0]
|
| + product_dir = path_parts[:outermost_bundle]
|
| + # In case 2 this is the same as |outermost_bundle|.
|
| + innermost_bundle = bundle_positions[-1]
|
| + dsym_path = product_dir + [path_parts[innermost_bundle]]
|
| + result = '%s.dSYM' % os.path.join(*dsym_path)
|
| + return [result]
|
| +
|
| +
|
| +# We want our output to match base::EscapeJSONString(), which produces
|
| +# doubly-escaped strings. The first escaping pass is handled by this class. The
|
| +# second pass happens when JSON data is dumped to file.
|
| +class StringEncoder(json.JSONEncoder):
|
| + def __init__(self):
|
| + json.JSONEncoder.__init__(self)
|
| +
|
| + def encode(self, s):
|
| + assert(isinstance(s, basestring))
|
| + encoded = json.JSONEncoder.encode(self, s)
|
| + assert(len(encoded) >= 2)
|
| + assert(encoded[0] == '"')
|
| + assert(encoded[-1] == '"')
|
| + encoded = encoded[1:-1]
|
| + # Special case from base::EscapeJSONString().
|
| + encoded = encoded.replace('<', '\u003C')
|
| + return encoded
|
| +
|
| +
|
| +class JSONTestRunSymbolizer(object):
|
| + def __init__(self, symbolization_loop):
|
| + self.string_encoder = StringEncoder()
|
| + self.symbolization_loop = symbolization_loop
|
| +
|
| + def symbolize_snippet(self, snippet):
|
| + symbolized_lines = []
|
| + for line in snippet.split('\n'):
|
| + symbolized_lines += self.symbolization_loop.process_line(line)
|
| + return '\n'.join(symbolized_lines)
|
| +
|
| + def symbolize(self, test_run):
|
| + original_snippet = base64.b64decode(test_run['output_snippet_base64'])
|
| + symbolized_snippet = self.symbolize_snippet(original_snippet)
|
| + if symbolized_snippet == original_snippet:
|
| + # No sanitizer reports in snippet.
|
| + return
|
| +
|
| + test_run['original_output_snippet'] = test_run['output_snippet']
|
| + test_run['original_output_snippet_base64'] = \
|
| + test_run['output_snippet_base64']
|
| +
|
| + escaped_snippet = StringEncoder().encode(symbolized_snippet)
|
| + test_run['output_snippet'] = escaped_snippet
|
| + test_run['output_snippet_base64'] = \
|
| + base64.b64encode(symbolized_snippet)
|
| + test_run['snippet_processed_by'] = 'asan_symbolize.py'
|
| + # Originally, "lossless" refers to "no Unicode data lost while encoding the
|
| + # string". However, since we're applying another kind of transformation
|
| + # (symbolization), it doesn't seem right to consider the snippet lossless.
|
| + test_run['losless_snippet'] = False
|
| +
|
| +
|
| +def symbolize_snippets_in_json(filename, symbolization_loop):
|
| + with open(filename, 'r') as f:
|
| + json_data = json.load(f)
|
| +
|
| + test_run_symbolizer = JSONTestRunSymbolizer(symbolization_loop)
|
| + for iteration_data in json_data['per_iteration_data']:
|
| + for test_name, test_runs in iteration_data.iteritems():
|
| + for test_run in test_runs:
|
| + test_run_symbolizer.symbolize(test_run)
|
| +
|
| + with open(filename, 'w') as f:
|
| + json.dump(json_data, f, indent=3, sort_keys=True)
|
| +
|
| +
|
| +def main():
|
| + parser = argparse.ArgumentParser(description='Symbolize sanitizer reports.')
|
| + parser.add_argument('--test-summary-json-file',
|
| + help='Path to a JSON file produced by the test launcher. The script will '
|
| + 'ignore stdandard input and instead symbolize the output stnippets '
|
| + 'inside the JSON file. The result will be written back to the JSON '
|
| + 'file.')
|
| + parser.add_argument('strip_path_prefix', nargs='*',
|
| + help='When printing source file names, the longest prefix ending in one '
|
| + 'of these substrings will be stripped. E.g.: "Release/../../".')
|
| + parser.add_argument('--executable-path',
|
| + help='Path to program executable. Used on OSX swarming bots to locate '
|
| + 'dSYM bundles for associated frameworks and bundles.')
|
| + args = parser.parse_args()
|
| +
|
| + disable_buffering()
|
| + set_symbolizer_path()
|
| + asan_symbolize.demangle = True
|
| + asan_symbolize.fix_filename_patterns = args.strip_path_prefix
|
| + # Most source paths for Chromium binaries start with
|
| + # /path/to/src/out/Release/../../
|
| + asan_symbolize.fix_filename_patterns.append('Release/../../')
|
| + binary_name_filter = None
|
| + if platform.uname()[0] == 'Darwin':
|
| + binary_name_filter = make_chrome_osx_binary_name_filter(
|
| + chrome_product_dir_path(args.executable_path))
|
| + loop = asan_symbolize.SymbolizationLoop(
|
| + binary_name_filter=binary_name_filter,
|
| + dsym_hint_producer=chrome_dsym_hints)
|
| +
|
| + if args.test_summary_json_file:
|
| + symbolize_snippets_in_json(args.test_summary_json_file, loop)
|
| + else:
|
| + # Process stdin.
|
| + asan_symbolize.logfile = sys.stdin
|
| + loop.process_logfile()
|
| +
|
| +if __name__ == '__main__':
|
| + main()
|
|
|