| Index: tools/isolate/trace_inputs.py
|
| diff --git a/tools/isolate/trace_inputs.py b/tools/isolate/trace_inputs.py
|
| index 040652556353461b59e5e5ea127f01d943c437be..512a4ec67f33ce0893532866a623cdd6be9f3cf4 100755
|
| --- a/tools/isolate/trace_inputs.py
|
| +++ b/tools/isolate/trace_inputs.py
|
| @@ -29,6 +29,7 @@ def isEnabledFor(level):
|
| class Strace(object):
|
| """strace implies linux."""
|
| IGNORED = (
|
| + '/bin',
|
| '/dev',
|
| '/etc',
|
| '/lib',
|
| @@ -39,8 +40,146 @@ class Strace(object):
|
| '/var',
|
| )
|
|
|
| - @staticmethod
|
| - def gen_trace(cmd, cwd, logname):
|
| + class _Context(object):
|
| + """Processes a strace log line and keeps the list of existent and non
|
| + existent files accessed.
|
| +
|
| + Ignores directories.
|
| + """
|
| + # This is the most common format. pid function(args) = result
|
| + RE_HEADER = re.compile(r'^(\d+)\s+([^\(]+)\((.+?)\)\s+= (.+)$')
|
| + # An interrupted function call, only grab the minimal header.
|
| + RE_UNFINISHED = re.compile(r'^(\d+)\s+([^\(]+).*$')
|
| + UNFINISHED = ' <unfinished ...>'
|
| + # A resumed function call.
|
| + RE_RESUMED = re.compile(r'^(\d+)\s+<\.\.\. ([^ ]+) resumed> (.+)$')
|
| + # A process received a signal.
|
| + RE_SIGNAL = re.compile(r'^\d+\s+--- SIG[A-Z]+ .+ ---')
|
| + # A process didn't handle a signal.
|
| + RE_KILLED = re.compile(r'^(\d+) \+\+\+ killed by ([A-Z]+) \+\+\+$')
|
| +
|
| + # Arguments parsing.
|
| + RE_CHDIR = re.compile(r'^\"(.+?)\"$')
|
| + RE_EXECVE = re.compile(r'^\"(.+?)\", \[.+?\], \[.+?\]$')
|
| + RE_OPEN2 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+)$')
|
| + RE_OPEN3 = re.compile(r'^\"(.*?)\", ([A-Z\_\|]+), (\d+)$')
|
| + RE_RENAME = re.compile(r'^\"(.+?)\", \"(.+?)\"$')
|
| +
|
| + def __init__(self, blacklist):
|
| + self._cwd = {}
|
| + self.blacklist = blacklist
|
| + self.files = set()
|
| + self.non_existent = set()
|
| + # Key is a tuple(pid, function name)
|
| + self._pending_calls = {}
|
| +
|
| + @classmethod
|
| + def traces(cls):
|
| + prefix = 'handle_'
|
| + return [i[len(prefix):] for i in dir(cls) if i.startswith(prefix)]
|
| +
|
| + def on_line(self, line):
|
| + line = line.strip()
|
| + if self.RE_SIGNAL.match(line):
|
| + # Ignore signals.
|
| + return
|
| +
|
| + m = self.RE_KILLED.match(line)
|
| + if m:
|
| + self.handle_exit_group(int(m.group(1)), m.group(2), None, None)
|
| + return
|
| +
|
| + if line.endswith(self.UNFINISHED):
|
| + line = line[:-len(self.UNFINISHED)]
|
| + m = self.RE_UNFINISHED.match(line)
|
| + assert m, line
|
| + self._pending_calls[(m.group(1), m.group(2))] = line
|
| + return
|
| +
|
| + m = self.RE_RESUMED.match(line)
|
| + if m:
|
| + pending = self._pending_calls.pop((m.group(1), m.group(2)))
|
| + # Reconstruct the line.
|
| + line = pending + m.group(3)
|
| +
|
| + m = self.RE_HEADER.match(line)
|
| + assert m, line
|
| + return getattr(self, 'handle_%s' % m.group(2))(
|
| + int(m.group(1)),
|
| + m.group(2),
|
| + m.group(3),
|
| + m.group(4))
|
| +
|
| + def handle_chdir(self, pid, _function, args, result):
|
| + """Updates cwd."""
|
| + if result.startswith('0'):
|
| + cwd = self.RE_CHDIR.match(args).group(1)
|
| + if not cwd.startswith('/'):
|
| + cwd2 = os.path.join(self._cwd[pid], cwd)
|
| + logging.debug('handle_chdir(%d, %s) -> %s' % (pid, cwd, cwd2))
|
| + self._cwd[pid] = cwd2
|
| + else:
|
| + logging.debug('handle_chdir(%d, %s)' % (pid, cwd))
|
| + self._cwd[pid] = cwd
|
| + else:
|
| + assert False, 'Unexecpected fail: %s' % result
|
| +
|
| + def handle_clone(self, pid, _function, _args, result):
|
| + """Transfers cwd."""
|
| + if result == '? ERESTARTNOINTR (To be restarted)':
|
| + return
|
| + self._cwd[int(result)] = self._cwd[pid]
|
| +
|
| + def handle_execve(self, pid, _function, args, result):
|
| + self._handle_file(pid, self.RE_EXECVE.match(args).group(1), result)
|
| +
|
| + def handle_exit_group(self, pid, _function, _args, _result):
|
| + """Removes cwd."""
|
| + del self._cwd[pid]
|
| +
|
| + @staticmethod
|
| + def handle_fork(_pid, _function, args, result):
|
| + assert False, (args, result)
|
| +
|
| + def handle_open(self, pid, _function, args, result):
|
| + args = (self.RE_OPEN3.match(args) or self.RE_OPEN2.match(args)).groups()
|
| + if 'O_DIRECTORY' in args[1]:
|
| + return
|
| + self._handle_file(pid, args[0], result)
|
| +
|
| + def handle_rename(self, pid, _function, args, result):
|
| + args = self.RE_RENAME.match(args).groups()
|
| + self._handle_file(pid, args[0], result)
|
| + self._handle_file(pid, args[1], result)
|
| +
|
| + @staticmethod
|
| + def handle_stat64(_pid, _function, args, result):
|
| + assert False, (args, result)
|
| +
|
| + @staticmethod
|
| + def handle_vfork(_pid, _function, args, result):
|
| + assert False, (args, result)
|
| +
|
| + def _handle_file(self, pid, filepath, result):
|
| + if result.startswith('-1'):
|
| + return
|
| + if not filepath.startswith('/'):
|
| + filepath2 = os.path.join(self._cwd[pid], filepath)
|
| + logging.debug('_handle_file(%d, %s) -> %s' % (pid, filepath, filepath2))
|
| + filepath = filepath2
|
| + else:
|
| + logging.debug('_handle_file(%d, %s)' % (pid, filepath))
|
| +
|
| + if self.blacklist(filepath):
|
| + return
|
| + if filepath not in self.files and filepath not in self.non_existent:
|
| + if os.path.isfile(filepath):
|
| + self.files.add(filepath)
|
| + else:
|
| + self.non_existent.add(filepath)
|
| +
|
| + @classmethod
|
| + def gen_trace(cls, cmd, cwd, logname):
|
| """Runs strace on an executable."""
|
| logging.info('gen_trace(%s, %s, %s)' % (cmd, cwd, logname))
|
| silent = not isEnabledFor(logging.INFO)
|
| @@ -48,7 +187,8 @@ class Strace(object):
|
| if silent:
|
| stdout = subprocess.PIPE
|
| stderr = subprocess.PIPE
|
| - trace_cmd = ['strace', '-f', '-e', 'trace=open,chdir', '-o', logname]
|
| + traces = ','.join(cls._Context.traces())
|
| + trace_cmd = ['strace', '-f', '-e', 'trace=%s' % traces, '-o', logname]
|
| p = subprocess.Popen(
|
| trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr)
|
| out, err = p.communicate()
|
| @@ -60,7 +200,8 @@ class Strace(object):
|
| with open(logname) as f:
|
| content = f.read()
|
| with open(logname, 'w') as f:
|
| - f.write('0 chdir("%s") = 0\n' % cwd)
|
| + pid = content.split(' ', 1)[0]
|
| + f.write('%s chdir("%s") = 0\n' % (pid, cwd))
|
| f.write(content)
|
|
|
| if p.returncode != 0:
|
| @@ -72,53 +213,24 @@ class Strace(object):
|
| print ''.join(err.splitlines(True)[-100:])
|
| return p.returncode
|
|
|
| - @staticmethod
|
| - def parse_log(filename, blacklist):
|
| + @classmethod
|
| + def parse_log(cls, filename, blacklist):
|
| """Processes a strace log and returns the files opened and the files that do
|
| not exist.
|
|
|
| + It does not track directories.
|
| +
|
| Most of the time, files that do not exist are temporary test files that
|
| should be put in /tmp instead. See http://crbug.com/116251
|
| """
|
| logging.info('parse_log(%s, %s)' % (filename, blacklist))
|
| - files = set()
|
| - non_existent = set()
|
| - # 1=pid, 2=filepath, 3=mode, 4=result
|
| - re_open = re.compile(
|
| - # PID open(PATH, MODE) = RESULT
|
| - r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$')
|
| - # 1=pid 2=path 3=result
|
| - re_chdir = re.compile(
|
| - # PID chdir(PATH) = RESULT
|
| - r'^(\d+)\s+chdir\("([^"]+)"\)\s+= (.+)$')
|
| -
|
| - # TODO(maruel): This code is totally wrong. cwd is a process local variable
|
| - # so this needs to be a dict with key = pid.
|
| - cwd = None
|
| + context = cls._Context(blacklist)
|
| for line in open(filename):
|
| - m = re_open.match(line)
|
| - if m:
|
| - if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3):
|
| - # Not present or a directory.
|
| - continue
|
| - filepath = m.group(2)
|
| - if not filepath.startswith('/'):
|
| - filepath = os.path.join(cwd, filepath)
|
| - if blacklist(filepath):
|
| - continue
|
| - if filepath not in files and filepath not in non_existent:
|
| - if os.path.isfile(filepath):
|
| - files.add(filepath)
|
| - else:
|
| - non_existent.add(filepath)
|
| - m = re_chdir.match(line)
|
| - if m:
|
| - if m.group(3).startswith('0'):
|
| - cwd = m.group(2)
|
| - else:
|
| - assert False, 'Unexecpected fail: %s' % line
|
| -
|
| - return files, non_existent
|
| + context.on_line(line)
|
| + # Resolve any symlink we hit.
|
| + return (
|
| + set(os.path.realpath(f) for f in context.files),
|
| + set(os.path.realpath(f) for f in context.non_existent))
|
|
|
|
|
| class Dtrace(object):
|
| @@ -316,7 +428,9 @@ def relevant_files(files, root):
|
| unexpected = []
|
| for f in files:
|
| if f.startswith(root):
|
| - expected.append(f[len(root):])
|
| + f = f[len(root):]
|
| + assert f
|
| + expected.append(f)
|
| else:
|
| unexpected.append(f)
|
| return sorted(set(expected)), sorted(set(unexpected))
|
| @@ -347,13 +461,17 @@ def trace_inputs(
|
| Symlinks are not processed at all.
|
| """
|
| logging.debug(
|
| - 'trace_inputs(%s, %s, %s, %s, %s)' % (
|
| - logfile, cmd, root_dir, gyp_proj_dir, product_dir))
|
| + 'trace_inputs(%s, %s, %s, %s, %s, %s)' % (
|
| + logfile, cmd, root_dir, gyp_proj_dir, product_dir, force_trace))
|
|
|
| # It is important to have unambiguous path.
|
| assert os.path.isabs(root_dir), root_dir
|
| assert os.path.isabs(logfile), logfile
|
| - assert os.path.isabs(cmd[0]), cmd[0]
|
| + assert (
|
| + (os.path.isfile(logfile) and not force_trace) or os.path.isabs(cmd[0])
|
| + ), cmd[0]
|
| + # Resolve any symlink
|
| + root_dir = os.path.realpath(root_dir)
|
|
|
| def print_if(txt):
|
| if gyp_proj_dir is None:
|
| @@ -371,7 +489,11 @@ def trace_inputs(
|
| if os.path.isfile(logfile):
|
| os.remove(logfile)
|
| print_if('Tracing... %s' % cmd)
|
| - returncode = api.gen_trace(cmd, root_dir, logfile)
|
| + cwd = root_dir
|
| + # TODO(maruel): If --gyp is specified, use it as the cwd.
|
| + #if gyp_proj_dir:
|
| + # cwd = os.path.join(cwd, gyp_proj_dir)
|
| + returncode = api.gen_trace(cmd, cwd, logfile)
|
| if returncode and not force_trace:
|
| return returncode
|
|
|
| @@ -400,6 +522,7 @@ def trace_inputs(
|
|
|
| if gyp_proj_dir is not None:
|
| def cleanuppath(x):
|
| + """Cleans up a relative path."""
|
| if x:
|
| x = x.rstrip('/')
|
| if x == '.':
|
| @@ -413,10 +536,12 @@ def trace_inputs(
|
|
|
| def fix(f):
|
| """Bases the file on the most restrictive variable."""
|
| + logging.debug('fix(%s)' % f)
|
| if product_dir and f.startswith(product_dir):
|
| return '<(PRODUCT_DIR)/%s' % f[len(product_dir):]
|
| elif gyp_proj_dir and f.startswith(gyp_proj_dir):
|
| - return f[len(gyp_proj_dir):]
|
| + # May be empty if the whole directory containing the gyp file is needed.
|
| + return f[len(gyp_proj_dir):] or './'
|
| else:
|
| return '<(DEPTH)/%s' % f
|
|
|
| @@ -456,7 +581,8 @@ def main():
|
| parser.add_option(
|
| '--root-dir', default=ROOT_DIR,
|
| help='Root directory to base everything off. Default: %default')
|
| - parser.add_option('-f', '--force', help='Force to retrace the file')
|
| + parser.add_option(
|
| + '-f', '--force', action='store_true', help='Force to retrace the file')
|
|
|
| options, args = parser.parse_args()
|
| level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
|
| @@ -464,12 +590,17 @@ def main():
|
| level=level,
|
| format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s')
|
|
|
| - if not args:
|
| - parser.error('Must supply a command to run')
|
| if not options.log:
|
| parser.error('Must supply a log file with -l')
|
| + if not args:
|
| + if not os.path.isfile(options.log) or options.force:
|
| + parser.error('Must supply a command to run')
|
| + else:
|
| + args[0] = os.path.abspath(args[0])
|
| +
|
| + if options.root_dir:
|
| + options.root_dir = os.path.abspath(options.root_dir)
|
|
|
| - args[0] = os.path.abspath(args[0])
|
| return trace_inputs(
|
| os.path.abspath(options.log),
|
| args,
|
|
|