Chromium Code Reviews| Index: tools/isolate/trace_inputs.py |
| diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..97a3928a1fe0c580906e35a209507aa277bcbbb9 |
| --- /dev/null |
| +++ b/tools/isolate/trace_inputs.py |
| @@ -0,0 +1,354 @@ |
| +#!/usr/bin/env python |
| +# Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +"""Runs strace or dtrace on a test and processes the logs to extract the |
| +dependencies from the source tree. |
| + |
| +Automatically extracts directories where all the files are used to make the |
| +dependencies list more compact. |
| +""" |
| + |
| +import logging |
| +import optparse |
| +import os |
| +import re |
| +import subprocess |
| +import sys |
| + |
| + |
| +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
| +ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) |
| + |
| + |
| +def isEnabledFor(level): |
| + return logging.getLogger().isEnabledFor(level) |
| + |
| + |
| +class Strace(object): |
| + """strace implies linux.""" |
| + IGNORED = ( |
| + '/dev', |
| + '/etc', |
| + '/lib', |
| + '/proc', |
| + '/sys', |
| + '/tmp', |
| + '/usr', |
| + '/var', |
| + ) |
| + |
| + @staticmethod |
| + def gen_trace(cmd, cwd, logname): |
| + """Runs strace on an executable.""" |
| + silent = not isEnabledFor(logging.DEBUG) |
| + stdout = stderr = None |
| + if silent: |
| + stdout = subprocess.PIPE |
| + stderr = subprocess.PIPE |
| + trace_cmd = ['strace', '-f', '-e', 'trace=open', '-o', logname] |
| + cmd = [os.path.normpath(os.path.join(cwd, c)) for c in cmd] |
| + p = subprocess.Popen( |
| + trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr) |
| + out, err = p.communicate() |
| + if p.returncode != 0: |
| + print 'Failure: %d' % p.returncode |
| + # pylint: disable=E1103 |
| + print ''.join(out.splitlines(True)[-100:]) |
| + print ''.join(err.splitlines(True)[-100:]) |
| + return p.returncode |
| + |
| + @staticmethod |
| + def parse_log(filename, blacklist): |
| + """Processes a strace log and returns the files opened and the files that do |
| + not exist. |
| + |
| + Most of the time, files that do not exist are temporary test files that |
| + should be put in /tmp instead. See http://crbug.com/116251 |
| + |
| + TODO(maruel): Process chdir() calls so relative paths can be processed. |
| + """ |
| + files = set() |
| + non_existent = set() |
| + # 1=pid, 2=filepath, 3=mode, 4=result |
| + RE = re.compile( |
| + # PID open(PATH, MODE) = RESULT |
| + r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$') |
| + for line in open(filename): |
| + m = RE.match(line) |
| + if not m: |
| + continue |
| + if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3): |
| + # Not present or a directory. |
| + continue |
| + filepath = m.group(2) |
| + if blacklist(filepath): |
| + continue |
| + if filepath not in files and filepath not in non_existent: |
| + if os.path.isfile(filepath): |
| + files.add(filepath) |
| + else: |
| + non_existent.add(filepath) |
| + return files, non_existent |
| + |
| + |
| +class Dtrace(object): |
| + """Uses DTrace framework through dtrace. Requires root access. |
| + |
| + Implies Mac OSX. |
| + |
| + dtruss can't be used because it has compatibility issues with python. |
| + """ |
| + IGNORED = ( |
| + '/.vol', |
| + '/Library', |
| + '/System', |
| + '/dev', |
| + '/etc', |
| + '/private/var', |
| + '/tmp', |
| + '/usr', |
| + '/var', |
| + ) |
| + |
| + # pylint: disable=C0301 |
| + # To understand the following code, you'll want to take a look at: |
| + # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.html |
| + # and |
| + # https://wikis.oracle.com/display/DTrace/Variables |
| + D_CODE = """ |
| + /* Child process tracking. |
| + * I'm really depressed that I need to do it myself. */ |
| + dtrace:::BEGIN { |
|
Roger Tawa OOO till Jul 10th
2012/03/15 18:07:16
triple ::: ? is that correct? same at line 134.
M-A Ruel
2012/03/15 18:09:55
Yep, that's how it works.
|
| + trackedpid[ppid] = 1; |
| + trackedpid[pid] = 1; |
| + /* Signal gen_trace() that we are ready to trace. */ |
| + printf("Go! 1 %d:%d %s", ppid, pid, execname); |
| + } |
| + |
| + /* Make sure all child processes are tracked. This is not very efficient |
| + * but for our use case, it's fine enough. |
| + * TODO(maruel): We should properly track fork, execve, vfork and friends |
| + * instead. */ |
| + syscall:::entry /trackedpid[ppid]/ { |
| + trackedpid[pid] = 1; |
|
Roger Tawa OOO till Jul 10th
2012/03/15 18:07:16
should you be using ppid here?
M-A Ruel
2012/03/15 18:09:55
No. What it is doing is: if the parent's process i
|
| + } |
| + syscall::exit:entry /trackedpid[pid]/ { |
| + trackedpid[pid] = 0; |
| + } |
| + |
| + /* Finally what we care about! */ |
| + syscall::open*:entry /trackedpid[pid]/ { |
| + self->arg0 = copyinstr(arg0); |
| + self->arg1 = arg1; |
| + self->arg2 = arg2; |
| + } |
| + syscall::open*:return /trackedpid[pid]/ { |
| + printf("%d:%d \\"%s\\"; \\"%s\\"; \\"%d\\"; \\"%d\\" = %d", |
| + ppid, pid, execname, self->arg0, self->arg1, self->arg2, errno); |
| + self->arg0 = 0; |
| + self->arg1 = 0; |
| + self->arg2 = 0; |
| + } |
| + """ |
| + |
| + @classmethod |
| + def gen_trace(cls, cmd, cwd, logname): |
| + """Runs dtrace on an executable.""" |
| + silent = not isEnabledFor(logging.DEBUG) |
| + print 'Running: %s' % cmd |
| + signal = 'Go!' |
| + logging.debug('Our pid: %d' % os.getpid()) |
| + |
| + # Part 1: start the child process. |
| + stdout = stderr = None |
| + if silent: |
| + stdout = subprocess.PIPE |
| + stderr = subprocess.PIPE |
| + child_cmd = [ |
| + sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'), |
| + ] |
| + child = subprocess.Popen( |
| + child_cmd + cmd, |
| + stdin=subprocess.PIPE, |
| + stdout=stdout, |
| + stderr=stderr, |
| + cwd=cwd) |
| + logging.debug('Started child pid: %d' % child.pid) |
| + |
| + # Part 2: start dtrace process. |
| + trace_cmd = [ |
| + 'sudo', |
| + 'dtrace', |
| + '-x', 'dynvarsize=4m', |
| + '-x', 'evaltime=exec', |
| + '-n', cls.D_CODE, |
| + '-o', '/dev/stderr', |
| + '-p', str(child.pid), |
| + ] |
| + with open(logname, 'w') as logfile: |
| + dtrace = subprocess.Popen( |
| + trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) |
| + logging.debug('Started dtrace pid: %d' % dtrace.pid) |
| + |
| + # Part 3: Read until the Go! signal is sent. |
| + with open(logname, 'r') as logfile: |
| + while True: |
| + x = logfile.readline() |
| + if signal in x: |
| + break |
| + |
| + # Part 4: We can now tell our child to go. |
| + child.communicate(signal) |
| + |
| + dtrace.wait() |
| + if dtrace.returncode != 0: |
| + print 'Failure: %d' % dtrace.returncode |
| + with open(logname) as logfile: |
| + print ''.join(logfile.readlines()[-100:]) |
| + # Find a better way. |
| + os.remove(logname) |
| + return dtrace.returncode |
| + |
| + @staticmethod |
| + def parse_log(filename, blacklist): |
| + """Processes a dtrace log and returns the files opened and the files that do |
| + not exist. |
| + |
| + Most of the time, files that do not exist are temporary test files that |
| + should be put in /tmp instead. See http://crbug.com/116251 |
| + |
| + TODO(maruel): Process chdir() calls so relative paths can be processed. |
| + """ |
| + files = set() |
| + non_existent = set() |
| + ignored = set() |
| + # 1=filepath, 2=flags, 3=mode, 4=result |
| + RE = re.compile( |
| + # CPU ID PROBE PPID PID EXECNAME |
| + r'^\s+\d+\s+\d+\s+open[^\:]+:return \d+:\d+ \"[^\"]+\"; ' |
| + # PATH FLAGS MODE RESULT |
| + r'\"([^\"]+)\"; \"([^\"]+)\"; \"([^\"]+)\" \= (.+)$') |
| + for line in open(filename): |
| + m = RE.match(line) |
| + if not m: |
| + continue |
| + if not m.group(4).startswith('0'): |
| + # Called failed. |
| + continue |
| + filepath = m.group(1) |
| + if blacklist(filepath): |
| + continue |
| + if (filepath not in files and |
| + filepath not in non_existent and |
| + filepath not in ignored): |
| + if os.path.isfile(filepath): |
| + files.add(filepath) |
| + elif not os.path.isdir(filepath): |
| + # Silently ignore directories. |
| + non_existent.add(filepath) |
| + else: |
| + ignored.add(filepath) |
| + return files, non_existent |
| + |
| + |
| +def relevant_files(files, root): |
| + """Trims the list of files to keep the expected files and unexpected files. |
| + |
| + Unexpected files are files that are not based inside the |root| directory. |
| + """ |
| + expected = [] |
| + unexpected = [] |
| + for f in files: |
| + if f.startswith(root): |
| + expected.append(f[len(root):]) |
| + else: |
| + unexpected.append(f) |
| + return sorted(set(expected)), sorted(set(unexpected)) |
| + |
| + |
| +def extract_directories(files, root): |
| + """Detects if all the files in a directory were loaded and if so, replace the |
| + individual files by the directory entry. |
| + """ |
| + directories = set(os.path.dirname(f) for f in files) |
| + files = set(files) |
| + for directory in sorted(directories, reverse=True): |
| + actual = set( |
| + os.path.join(directory, f) for f in |
| + os.listdir(os.path.join(root, directory)) |
| + if not f.endswith(('.svn', '.pyc')) |
| + ) |
| + if not (actual - files): |
| + files -= actual |
| + files.add(directory + '/') |
| + return sorted(files) |
| + |
| + |
| +def trace_inputs(log, cmd, api): |
| + """Tries to load the logs if available. If not, trace the test.""" |
| + logname = os.path.join(BASE_DIR, os.path.basename(log)) |
| + if not os.path.isfile(logname): |
| + print 'Tracing... %s' % cmd |
| + returncode = api.gen_trace(cmd, ROOT_DIR, logname) |
| + if returncode: |
| + return returncode |
| + |
| + def blacklist(f): |
| + """Strips ignored paths.""" |
| + return f.startswith(api.IGNORED) or f.endswith('.pyc') |
| + |
| + print 'Loading traces... %s' % logname |
| + files, non_existent = api.parse_log(logname, blacklist) |
| + print('Total: %d' % len(files)) |
| + print('Non existent: %d' % len(non_existent)) |
| + for f in non_existent: |
| + print(' %s' % f) |
| + |
| + expected, unexpected = relevant_files(files, ROOT_DIR + '/') |
| + if unexpected: |
| + print('Unexpected: %d' % len(unexpected)) |
| + for f in unexpected: |
| + print(' %s' % f) |
| + |
| + simplified = extract_directories(expected, ROOT_DIR) |
| + print('Interesting: %d reduced to %d' % (len(expected), len(simplified))) |
| + for f in simplified: |
| + print(' %s' % f) |
| + |
| + return 0 |
| + |
| + |
| +def main(): |
| + parser = optparse.OptionParser( |
| + usage='%prog <options> [cmd line...]') |
| + parser.allow_interspersed_args = False |
| + parser.add_option( |
| + '-v', '--verbose', action='count', default=0, help='Use multiple times') |
| + parser.add_option('-l', '--log', help='Log file') |
| + |
| + options, args = parser.parse_args() |
| + level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)] |
| + logging.basicConfig( |
| + level=level, |
| + format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s') |
| + |
| + if not args: |
| + parser.error('Must supply a command to run') |
| + if not options.log: |
| + parser.error('Must supply a log file with -l') |
| + |
| + if sys.platform == 'linux2': |
| + api = Strace |
| + elif sys.platform == 'darwin': |
| + api = Dtrace |
| + else: |
| + print >> sys.stderr, 'Unsupported platform' |
| + return 1 |
| + |
| + return trace_inputs(options.log, args, api) |
| + |
| + |
| +if __name__ == '__main__': |
| + sys.exit(main()) |