Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(87)

Side by Side Diff: tools/isolate/trace_inputs.py

Issue 9706058: Implement dtrace log parsing for MacOSX. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/isolate/trace_child_process.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Runs strace or dtrace on a test and processes the logs to extract the
7 dependencies from the source tree.
8
9 Automatically extracts directories where all the files are used to make the
10 dependencies list more compact.
11 """
12
13 import logging
14 import optparse
15 import os
16 import re
17 import subprocess
18 import sys
19
20
21 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
22 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR))
23
24
25 def isEnabledFor(level):
26 return logging.getLogger().isEnabledFor(level)
27
28
29 class Strace(object):
30 """strace implies linux."""
31 IGNORED = (
32 '/dev',
33 '/etc',
34 '/lib',
35 '/proc',
36 '/sys',
37 '/tmp',
38 '/usr',
39 '/var',
40 )
41
42 @staticmethod
43 def gen_trace(cmd, cwd, logname):
44 """Runs strace on an executable."""
45 silent = not isEnabledFor(logging.DEBUG)
46 stdout = stderr = None
47 if silent:
48 stdout = subprocess.PIPE
49 stderr = subprocess.PIPE
50 trace_cmd = ['strace', '-f', '-e', 'trace=open', '-o', logname]
51 cmd = [os.path.normpath(os.path.join(cwd, c)) for c in cmd]
52 p = subprocess.Popen(
53 trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr)
54 out, err = p.communicate()
55 if p.returncode != 0:
56 print 'Failure: %d' % p.returncode
57 # pylint: disable=E1103
58 print ''.join(out.splitlines(True)[-100:])
59 print ''.join(err.splitlines(True)[-100:])
60 return p.returncode
61
62 @staticmethod
63 def parse_log(filename, blacklist):
64 """Processes a strace log and returns the files opened and the files that do
65 not exist.
66
67 Most of the time, files that do not exist are temporary test files that
68 should be put in /tmp instead. See http://crbug.com/116251
69
70 TODO(maruel): Process chdir() calls so relative paths can be processed.
71 """
72 files = set()
73 non_existent = set()
74 # 1=pid, 2=filepath, 3=mode, 4=result
75 RE = re.compile(
76 # PID open(PATH, MODE) = RESULT
77 r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$')
78 for line in open(filename):
79 m = RE.match(line)
80 if not m:
81 continue
82 if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3):
83 # Not present or a directory.
84 continue
85 filepath = m.group(2)
86 if blacklist(filepath):
87 continue
88 if filepath not in files and filepath not in non_existent:
89 if os.path.isfile(filepath):
90 files.add(filepath)
91 else:
92 non_existent.add(filepath)
93 return files, non_existent
94
95
96 class Dtrace(object):
97 """Uses DTrace framework through dtrace. Requires root access.
98
99 Implies Mac OSX.
100
101 dtruss can't be used because it has compatibility issues with python.
102 """
103 IGNORED = (
104 '/.vol',
105 '/Library',
106 '/System',
107 '/dev',
108 '/etc',
109 '/private/var',
110 '/tmp',
111 '/usr',
112 '/var',
113 )
114
115 # pylint: disable=C0301
116 # To understand the following code, you'll want to take a look at:
117 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.h tml
118 # and
119 # https://wikis.oracle.com/display/DTrace/Variables
120 D_CODE = """
121 /* Child process tracking.
122 * I'm really depressed that I need to do it myself. */
123 dtrace:::BEGIN {
Roger Tawa OOO till Jul 10th 2012/03/15 18:07:16 triple ::: ? is that correct? same at line 134.
M-A Ruel 2012/03/15 18:09:55 Yep, that's how it works.
124 trackedpid[ppid] = 1;
125 trackedpid[pid] = 1;
126 /* Signal gen_trace() that we are ready to trace. */
127 printf("Go! 1 %d:%d %s", ppid, pid, execname);
128 }
129
130 /* Make sure all child processes are tracked. This is not very efficient
131 * but for our use case, it's fine enough.
132 * TODO(maruel): We should properly track fork, execve, vfork and friends
133 * instead. */
134 syscall:::entry /trackedpid[ppid]/ {
135 trackedpid[pid] = 1;
Roger Tawa OOO till Jul 10th 2012/03/15 18:07:16 should you be using ppid here?
M-A Ruel 2012/03/15 18:09:55 No. What it is doing is: if the parent's process i
136 }
137 syscall::exit:entry /trackedpid[pid]/ {
138 trackedpid[pid] = 0;
139 }
140
141 /* Finally what we care about! */
142 syscall::open*:entry /trackedpid[pid]/ {
143 self->arg0 = copyinstr(arg0);
144 self->arg1 = arg1;
145 self->arg2 = arg2;
146 }
147 syscall::open*:return /trackedpid[pid]/ {
148 printf("%d:%d \\"%s\\"; \\"%s\\"; \\"%d\\"; \\"%d\\" = %d",
149 ppid, pid, execname, self->arg0, self->arg1, self->arg2, errno);
150 self->arg0 = 0;
151 self->arg1 = 0;
152 self->arg2 = 0;
153 }
154 """
155
156 @classmethod
157 def gen_trace(cls, cmd, cwd, logname):
158 """Runs dtrace on an executable."""
159 silent = not isEnabledFor(logging.DEBUG)
160 print 'Running: %s' % cmd
161 signal = 'Go!'
162 logging.debug('Our pid: %d' % os.getpid())
163
164 # Part 1: start the child process.
165 stdout = stderr = None
166 if silent:
167 stdout = subprocess.PIPE
168 stderr = subprocess.PIPE
169 child_cmd = [
170 sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'),
171 ]
172 child = subprocess.Popen(
173 child_cmd + cmd,
174 stdin=subprocess.PIPE,
175 stdout=stdout,
176 stderr=stderr,
177 cwd=cwd)
178 logging.debug('Started child pid: %d' % child.pid)
179
180 # Part 2: start dtrace process.
181 trace_cmd = [
182 'sudo',
183 'dtrace',
184 '-x', 'dynvarsize=4m',
185 '-x', 'evaltime=exec',
186 '-n', cls.D_CODE,
187 '-o', '/dev/stderr',
188 '-p', str(child.pid),
189 ]
190 with open(logname, 'w') as logfile:
191 dtrace = subprocess.Popen(
192 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT)
193 logging.debug('Started dtrace pid: %d' % dtrace.pid)
194
195 # Part 3: Read until the Go! signal is sent.
196 with open(logname, 'r') as logfile:
197 while True:
198 x = logfile.readline()
199 if signal in x:
200 break
201
202 # Part 4: We can now tell our child to go.
203 child.communicate(signal)
204
205 dtrace.wait()
206 if dtrace.returncode != 0:
207 print 'Failure: %d' % dtrace.returncode
208 with open(logname) as logfile:
209 print ''.join(logfile.readlines()[-100:])
210 # Find a better way.
211 os.remove(logname)
212 return dtrace.returncode
213
214 @staticmethod
215 def parse_log(filename, blacklist):
216 """Processes a dtrace log and returns the files opened and the files that do
217 not exist.
218
219 Most of the time, files that do not exist are temporary test files that
220 should be put in /tmp instead. See http://crbug.com/116251
221
222 TODO(maruel): Process chdir() calls so relative paths can be processed.
223 """
224 files = set()
225 non_existent = set()
226 ignored = set()
227 # 1=filepath, 2=flags, 3=mode, 4=result
228 RE = re.compile(
229 # CPU ID PROBE PPID PID EXECNAME
230 r'^\s+\d+\s+\d+\s+open[^\:]+:return \d+:\d+ \"[^\"]+\"; '
231 # PATH FLAGS MODE RESULT
232 r'\"([^\"]+)\"; \"([^\"]+)\"; \"([^\"]+)\" \= (.+)$')
233 for line in open(filename):
234 m = RE.match(line)
235 if not m:
236 continue
237 if not m.group(4).startswith('0'):
238 # Called failed.
239 continue
240 filepath = m.group(1)
241 if blacklist(filepath):
242 continue
243 if (filepath not in files and
244 filepath not in non_existent and
245 filepath not in ignored):
246 if os.path.isfile(filepath):
247 files.add(filepath)
248 elif not os.path.isdir(filepath):
249 # Silently ignore directories.
250 non_existent.add(filepath)
251 else:
252 ignored.add(filepath)
253 return files, non_existent
254
255
256 def relevant_files(files, root):
257 """Trims the list of files to keep the expected files and unexpected files.
258
259 Unexpected files are files that are not based inside the |root| directory.
260 """
261 expected = []
262 unexpected = []
263 for f in files:
264 if f.startswith(root):
265 expected.append(f[len(root):])
266 else:
267 unexpected.append(f)
268 return sorted(set(expected)), sorted(set(unexpected))
269
270
271 def extract_directories(files, root):
272 """Detects if all the files in a directory were loaded and if so, replace the
273 individual files by the directory entry.
274 """
275 directories = set(os.path.dirname(f) for f in files)
276 files = set(files)
277 for directory in sorted(directories, reverse=True):
278 actual = set(
279 os.path.join(directory, f) for f in
280 os.listdir(os.path.join(root, directory))
281 if not f.endswith(('.svn', '.pyc'))
282 )
283 if not (actual - files):
284 files -= actual
285 files.add(directory + '/')
286 return sorted(files)
287
288
289 def trace_inputs(log, cmd, api):
290 """Tries to load the logs if available. If not, trace the test."""
291 logname = os.path.join(BASE_DIR, os.path.basename(log))
292 if not os.path.isfile(logname):
293 print 'Tracing... %s' % cmd
294 returncode = api.gen_trace(cmd, ROOT_DIR, logname)
295 if returncode:
296 return returncode
297
298 def blacklist(f):
299 """Strips ignored paths."""
300 return f.startswith(api.IGNORED) or f.endswith('.pyc')
301
302 print 'Loading traces... %s' % logname
303 files, non_existent = api.parse_log(logname, blacklist)
304 print('Total: %d' % len(files))
305 print('Non existent: %d' % len(non_existent))
306 for f in non_existent:
307 print(' %s' % f)
308
309 expected, unexpected = relevant_files(files, ROOT_DIR + '/')
310 if unexpected:
311 print('Unexpected: %d' % len(unexpected))
312 for f in unexpected:
313 print(' %s' % f)
314
315 simplified = extract_directories(expected, ROOT_DIR)
316 print('Interesting: %d reduced to %d' % (len(expected), len(simplified)))
317 for f in simplified:
318 print(' %s' % f)
319
320 return 0
321
322
323 def main():
324 parser = optparse.OptionParser(
325 usage='%prog <options> [cmd line...]')
326 parser.allow_interspersed_args = False
327 parser.add_option(
328 '-v', '--verbose', action='count', default=0, help='Use multiple times')
329 parser.add_option('-l', '--log', help='Log file')
330
331 options, args = parser.parse_args()
332 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
333 logging.basicConfig(
334 level=level,
335 format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s')
336
337 if not args:
338 parser.error('Must supply a command to run')
339 if not options.log:
340 parser.error('Must supply a log file with -l')
341
342 if sys.platform == 'linux2':
343 api = Strace
344 elif sys.platform == 'darwin':
345 api = Dtrace
346 else:
347 print >> sys.stderr, 'Unsupported platform'
348 return 1
349
350 return trace_inputs(options.log, args, api)
351
352
353 if __name__ == '__main__':
354 sys.exit(main())
OLDNEW
« no previous file with comments | « tools/isolate/trace_child_process.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698