Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """Runs strace or dtrace on a test and processes the logs to extract the | |
| 7 dependencies from the source tree. | |
| 8 | |
| 9 Automatically extracts directories where all the files are used to make the | |
| 10 dependencies list more compact. | |
| 11 """ | |
| 12 | |
| 13 import logging | |
| 14 import optparse | |
| 15 import os | |
| 16 import re | |
| 17 import subprocess | |
| 18 import sys | |
| 19 | |
| 20 | |
| 21 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| 22 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) | |
| 23 | |
| 24 | |
| 25 def isEnabledFor(level): | |
| 26 return logging.getLogger().isEnabledFor(level) | |
| 27 | |
| 28 | |
| 29 class Strace(object): | |
| 30 """strace implies linux.""" | |
| 31 IGNORED = ( | |
| 32 '/dev', | |
| 33 '/etc', | |
| 34 '/lib', | |
| 35 '/proc', | |
| 36 '/sys', | |
| 37 '/tmp', | |
| 38 '/usr', | |
| 39 '/var', | |
| 40 ) | |
| 41 | |
| 42 @staticmethod | |
| 43 def gen_trace(cmd, cwd, logname): | |
| 44 """Runs strace on an executable.""" | |
| 45 silent = not isEnabledFor(logging.DEBUG) | |
| 46 stdout = stderr = None | |
| 47 if silent: | |
| 48 stdout = subprocess.PIPE | |
| 49 stderr = subprocess.PIPE | |
| 50 trace_cmd = ['strace', '-f', '-e', 'trace=open', '-o', logname] | |
| 51 cmd = [os.path.normpath(os.path.join(cwd, c)) for c in cmd] | |
| 52 p = subprocess.Popen( | |
| 53 trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr) | |
| 54 out, err = p.communicate() | |
| 55 if p.returncode != 0: | |
| 56 print 'Failure: %d' % p.returncode | |
| 57 # pylint: disable=E1103 | |
| 58 print ''.join(out.splitlines(True)[-100:]) | |
| 59 print ''.join(err.splitlines(True)[-100:]) | |
| 60 return p.returncode | |
| 61 | |
| 62 @staticmethod | |
| 63 def parse_log(filename, blacklist): | |
| 64 """Processes a strace log and returns the files opened and the files that do | |
| 65 not exist. | |
| 66 | |
| 67 Most of the time, files that do not exist are temporary test files that | |
| 68 should be put in /tmp instead. See http://crbug.com/116251 | |
| 69 | |
| 70 TODO(maruel): Process chdir() calls so relative paths can be processed. | |
| 71 """ | |
| 72 files = set() | |
| 73 non_existent = set() | |
| 74 # 1=pid, 2=filepath, 3=mode, 4=result | |
| 75 RE = re.compile( | |
| 76 # PID open(PATH, MODE) = RESULT | |
| 77 r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$') | |
| 78 for line in open(filename): | |
| 79 m = RE.match(line) | |
| 80 if not m: | |
| 81 continue | |
| 82 if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3): | |
| 83 # Not present or a directory. | |
| 84 continue | |
| 85 filepath = m.group(2) | |
| 86 if blacklist(filepath): | |
| 87 continue | |
| 88 if filepath not in files and filepath not in non_existent: | |
| 89 if os.path.isfile(filepath): | |
| 90 files.add(filepath) | |
| 91 else: | |
| 92 non_existent.add(filepath) | |
| 93 return files, non_existent | |
| 94 | |
| 95 | |
| 96 class Dtrace(object): | |
| 97 """Uses DTrace framework through dtrace. Requires root access. | |
| 98 | |
| 99 Implies Mac OSX. | |
| 100 | |
| 101 dtruss can't be used because it has compatibility issues with python. | |
| 102 """ | |
| 103 IGNORED = ( | |
| 104 '/.vol', | |
| 105 '/Library', | |
| 106 '/System', | |
| 107 '/dev', | |
| 108 '/etc', | |
| 109 '/private/var', | |
| 110 '/tmp', | |
| 111 '/usr', | |
| 112 '/var', | |
| 113 ) | |
| 114 | |
| 115 # pylint: disable=C0301 | |
| 116 # To understand the following code, you'll want to take a look at: | |
| 117 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.h tml | |
| 118 # and | |
| 119 # https://wikis.oracle.com/display/DTrace/Variables | |
| 120 D_CODE = """ | |
| 121 /* Child process tracking. | |
| 122 * I'm really depressed that I need to do it myself. */ | |
| 123 dtrace:::BEGIN { | |
|
Roger Tawa OOO till Jul 10th
2012/03/15 18:07:16
triple ::: ? is that correct? same at line 134.
M-A Ruel
2012/03/15 18:09:55
Yep, that's how it works.
| |
| 124 trackedpid[ppid] = 1; | |
| 125 trackedpid[pid] = 1; | |
| 126 /* Signal gen_trace() that we are ready to trace. */ | |
| 127 printf("Go! 1 %d:%d %s", ppid, pid, execname); | |
| 128 } | |
| 129 | |
| 130 /* Make sure all child processes are tracked. This is not very efficient | |
| 131 * but for our use case, it's fine enough. | |
| 132 * TODO(maruel): We should properly track fork, execve, vfork and friends | |
| 133 * instead. */ | |
| 134 syscall:::entry /trackedpid[ppid]/ { | |
| 135 trackedpid[pid] = 1; | |
|
Roger Tawa OOO till Jul 10th
2012/03/15 18:07:16
should you be using ppid here?
M-A Ruel
2012/03/15 18:09:55
No. What it is doing is: if the parent's process i
| |
| 136 } | |
| 137 syscall::exit:entry /trackedpid[pid]/ { | |
| 138 trackedpid[pid] = 0; | |
| 139 } | |
| 140 | |
| 141 /* Finally what we care about! */ | |
| 142 syscall::open*:entry /trackedpid[pid]/ { | |
| 143 self->arg0 = copyinstr(arg0); | |
| 144 self->arg1 = arg1; | |
| 145 self->arg2 = arg2; | |
| 146 } | |
| 147 syscall::open*:return /trackedpid[pid]/ { | |
| 148 printf("%d:%d \\"%s\\"; \\"%s\\"; \\"%d\\"; \\"%d\\" = %d", | |
| 149 ppid, pid, execname, self->arg0, self->arg1, self->arg2, errno); | |
| 150 self->arg0 = 0; | |
| 151 self->arg1 = 0; | |
| 152 self->arg2 = 0; | |
| 153 } | |
| 154 """ | |
| 155 | |
| 156 @classmethod | |
| 157 def gen_trace(cls, cmd, cwd, logname): | |
| 158 """Runs dtrace on an executable.""" | |
| 159 silent = not isEnabledFor(logging.DEBUG) | |
| 160 print 'Running: %s' % cmd | |
| 161 signal = 'Go!' | |
| 162 logging.debug('Our pid: %d' % os.getpid()) | |
| 163 | |
| 164 # Part 1: start the child process. | |
| 165 stdout = stderr = None | |
| 166 if silent: | |
| 167 stdout = subprocess.PIPE | |
| 168 stderr = subprocess.PIPE | |
| 169 child_cmd = [ | |
| 170 sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'), | |
| 171 ] | |
| 172 child = subprocess.Popen( | |
| 173 child_cmd + cmd, | |
| 174 stdin=subprocess.PIPE, | |
| 175 stdout=stdout, | |
| 176 stderr=stderr, | |
| 177 cwd=cwd) | |
| 178 logging.debug('Started child pid: %d' % child.pid) | |
| 179 | |
| 180 # Part 2: start dtrace process. | |
| 181 trace_cmd = [ | |
| 182 'sudo', | |
| 183 'dtrace', | |
| 184 '-x', 'dynvarsize=4m', | |
| 185 '-x', 'evaltime=exec', | |
| 186 '-n', cls.D_CODE, | |
| 187 '-o', '/dev/stderr', | |
| 188 '-p', str(child.pid), | |
| 189 ] | |
| 190 with open(logname, 'w') as logfile: | |
| 191 dtrace = subprocess.Popen( | |
| 192 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) | |
| 193 logging.debug('Started dtrace pid: %d' % dtrace.pid) | |
| 194 | |
| 195 # Part 3: Read until the Go! signal is sent. | |
| 196 with open(logname, 'r') as logfile: | |
| 197 while True: | |
| 198 x = logfile.readline() | |
| 199 if signal in x: | |
| 200 break | |
| 201 | |
| 202 # Part 4: We can now tell our child to go. | |
| 203 child.communicate(signal) | |
| 204 | |
| 205 dtrace.wait() | |
| 206 if dtrace.returncode != 0: | |
| 207 print 'Failure: %d' % dtrace.returncode | |
| 208 with open(logname) as logfile: | |
| 209 print ''.join(logfile.readlines()[-100:]) | |
| 210 # Find a better way. | |
| 211 os.remove(logname) | |
| 212 return dtrace.returncode | |
| 213 | |
| 214 @staticmethod | |
| 215 def parse_log(filename, blacklist): | |
| 216 """Processes a dtrace log and returns the files opened and the files that do | |
| 217 not exist. | |
| 218 | |
| 219 Most of the time, files that do not exist are temporary test files that | |
| 220 should be put in /tmp instead. See http://crbug.com/116251 | |
| 221 | |
| 222 TODO(maruel): Process chdir() calls so relative paths can be processed. | |
| 223 """ | |
| 224 files = set() | |
| 225 non_existent = set() | |
| 226 ignored = set() | |
| 227 # 1=filepath, 2=flags, 3=mode, 4=result | |
| 228 RE = re.compile( | |
| 229 # CPU ID PROBE PPID PID EXECNAME | |
| 230 r'^\s+\d+\s+\d+\s+open[^\:]+:return \d+:\d+ \"[^\"]+\"; ' | |
| 231 # PATH FLAGS MODE RESULT | |
| 232 r'\"([^\"]+)\"; \"([^\"]+)\"; \"([^\"]+)\" \= (.+)$') | |
| 233 for line in open(filename): | |
| 234 m = RE.match(line) | |
| 235 if not m: | |
| 236 continue | |
| 237 if not m.group(4).startswith('0'): | |
| 238 # Called failed. | |
| 239 continue | |
| 240 filepath = m.group(1) | |
| 241 if blacklist(filepath): | |
| 242 continue | |
| 243 if (filepath not in files and | |
| 244 filepath not in non_existent and | |
| 245 filepath not in ignored): | |
| 246 if os.path.isfile(filepath): | |
| 247 files.add(filepath) | |
| 248 elif not os.path.isdir(filepath): | |
| 249 # Silently ignore directories. | |
| 250 non_existent.add(filepath) | |
| 251 else: | |
| 252 ignored.add(filepath) | |
| 253 return files, non_existent | |
| 254 | |
| 255 | |
| 256 def relevant_files(files, root): | |
| 257 """Trims the list of files to keep the expected files and unexpected files. | |
| 258 | |
| 259 Unexpected files are files that are not based inside the |root| directory. | |
| 260 """ | |
| 261 expected = [] | |
| 262 unexpected = [] | |
| 263 for f in files: | |
| 264 if f.startswith(root): | |
| 265 expected.append(f[len(root):]) | |
| 266 else: | |
| 267 unexpected.append(f) | |
| 268 return sorted(set(expected)), sorted(set(unexpected)) | |
| 269 | |
| 270 | |
| 271 def extract_directories(files, root): | |
| 272 """Detects if all the files in a directory were loaded and if so, replace the | |
| 273 individual files by the directory entry. | |
| 274 """ | |
| 275 directories = set(os.path.dirname(f) for f in files) | |
| 276 files = set(files) | |
| 277 for directory in sorted(directories, reverse=True): | |
| 278 actual = set( | |
| 279 os.path.join(directory, f) for f in | |
| 280 os.listdir(os.path.join(root, directory)) | |
| 281 if not f.endswith(('.svn', '.pyc')) | |
| 282 ) | |
| 283 if not (actual - files): | |
| 284 files -= actual | |
| 285 files.add(directory + '/') | |
| 286 return sorted(files) | |
| 287 | |
| 288 | |
| 289 def trace_inputs(log, cmd, api): | |
| 290 """Tries to load the logs if available. If not, trace the test.""" | |
| 291 logname = os.path.join(BASE_DIR, os.path.basename(log)) | |
| 292 if not os.path.isfile(logname): | |
| 293 print 'Tracing... %s' % cmd | |
| 294 returncode = api.gen_trace(cmd, ROOT_DIR, logname) | |
| 295 if returncode: | |
| 296 return returncode | |
| 297 | |
| 298 def blacklist(f): | |
| 299 """Strips ignored paths.""" | |
| 300 return f.startswith(api.IGNORED) or f.endswith('.pyc') | |
| 301 | |
| 302 print 'Loading traces... %s' % logname | |
| 303 files, non_existent = api.parse_log(logname, blacklist) | |
| 304 print('Total: %d' % len(files)) | |
| 305 print('Non existent: %d' % len(non_existent)) | |
| 306 for f in non_existent: | |
| 307 print(' %s' % f) | |
| 308 | |
| 309 expected, unexpected = relevant_files(files, ROOT_DIR + '/') | |
| 310 if unexpected: | |
| 311 print('Unexpected: %d' % len(unexpected)) | |
| 312 for f in unexpected: | |
| 313 print(' %s' % f) | |
| 314 | |
| 315 simplified = extract_directories(expected, ROOT_DIR) | |
| 316 print('Interesting: %d reduced to %d' % (len(expected), len(simplified))) | |
| 317 for f in simplified: | |
| 318 print(' %s' % f) | |
| 319 | |
| 320 return 0 | |
| 321 | |
| 322 | |
| 323 def main(): | |
| 324 parser = optparse.OptionParser( | |
| 325 usage='%prog <options> [cmd line...]') | |
| 326 parser.allow_interspersed_args = False | |
| 327 parser.add_option( | |
| 328 '-v', '--verbose', action='count', default=0, help='Use multiple times') | |
| 329 parser.add_option('-l', '--log', help='Log file') | |
| 330 | |
| 331 options, args = parser.parse_args() | |
| 332 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)] | |
| 333 logging.basicConfig( | |
| 334 level=level, | |
| 335 format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s') | |
| 336 | |
| 337 if not args: | |
| 338 parser.error('Must supply a command to run') | |
| 339 if not options.log: | |
| 340 parser.error('Must supply a log file with -l') | |
| 341 | |
| 342 if sys.platform == 'linux2': | |
| 343 api = Strace | |
| 344 elif sys.platform == 'darwin': | |
| 345 api = Dtrace | |
| 346 else: | |
| 347 print >> sys.stderr, 'Unsupported platform' | |
| 348 return 1 | |
| 349 | |
| 350 return trace_inputs(options.log, args, api) | |
| 351 | |
| 352 | |
| 353 if __name__ == '__main__': | |
| 354 sys.exit(main()) | |
| OLD | NEW |