OLD | NEW |
---|---|
(Empty) | |
1 #!/usr/bin/env python | |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 """Runs strace or dtrace on a test and processes the logs to extract the | |
7 dependencies from the source tree. | |
8 | |
9 Automatically extracts directories where all the files are used to make the | |
10 dependencies list more compact. | |
11 """ | |
12 | |
13 import logging | |
14 import optparse | |
15 import os | |
16 import re | |
17 import subprocess | |
18 import sys | |
19 | |
20 | |
21 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
22 ROOT_DIR = os.path.dirname(os.path.dirname(BASE_DIR)) | |
23 | |
24 | |
25 def isEnabledFor(level): | |
26 return logging.getLogger().isEnabledFor(level) | |
27 | |
28 | |
29 class Strace(object): | |
30 """strace implies linux.""" | |
31 IGNORED = ( | |
32 '/dev', | |
33 '/etc', | |
34 '/lib', | |
35 '/proc', | |
36 '/sys', | |
37 '/tmp', | |
38 '/usr', | |
39 '/var', | |
40 ) | |
41 | |
42 @staticmethod | |
43 def gen_trace(cmd, cwd, logname): | |
44 """Runs strace on an executable.""" | |
45 silent = not isEnabledFor(logging.DEBUG) | |
46 stdout = stderr = None | |
47 if silent: | |
48 stdout = subprocess.PIPE | |
49 stderr = subprocess.PIPE | |
50 trace_cmd = ['strace', '-f', '-e', 'trace=open', '-o', logname] | |
51 cmd = [os.path.normpath(os.path.join(cwd, c)) for c in cmd] | |
52 p = subprocess.Popen( | |
53 trace_cmd + cmd, cwd=cwd, stdout=stdout, stderr=stderr) | |
54 out, err = p.communicate() | |
55 if p.returncode != 0: | |
56 print 'Failure: %d' % p.returncode | |
57 # pylint: disable=E1103 | |
58 print ''.join(out.splitlines(True)[-100:]) | |
59 print ''.join(err.splitlines(True)[-100:]) | |
60 return p.returncode | |
61 | |
62 @staticmethod | |
63 def parse_log(filename, blacklist): | |
64 """Processes a strace log and returns the files opened and the files that do | |
65 not exist. | |
66 | |
67 Most of the time, files that do not exist are temporary test files that | |
68 should be put in /tmp instead. See http://crbug.com/116251 | |
69 | |
70 TODO(maruel): Process chdir() calls so relative paths can be processed. | |
71 """ | |
72 files = set() | |
73 non_existent = set() | |
74 # 1=pid, 2=filepath, 3=mode, 4=result | |
75 RE = re.compile( | |
76 # PID open(PATH, MODE) = RESULT | |
77 r'^(\d+)\s+open\("([^"]+)", ([^\)]+)\)\s+= (.+)$') | |
78 for line in open(filename): | |
79 m = RE.match(line) | |
80 if not m: | |
81 continue | |
82 if m.group(4).startswith('-1') or 'O_DIRECTORY' in m.group(3): | |
83 # Not present or a directory. | |
84 continue | |
85 filepath = m.group(2) | |
86 if blacklist(filepath): | |
87 continue | |
88 if filepath not in files and filepath not in non_existent: | |
89 if os.path.isfile(filepath): | |
90 files.add(filepath) | |
91 else: | |
92 non_existent.add(filepath) | |
93 return files, non_existent | |
94 | |
95 | |
96 class Dtrace(object): | |
97 """Uses DTrace framework through dtrace. Requires root access. | |
98 | |
99 Implies Mac OSX. | |
100 | |
101 dtruss can't be used because it has compatibility issues with python. | |
102 """ | |
103 IGNORED = ( | |
104 '/.vol', | |
105 '/Library', | |
106 '/System', | |
107 '/dev', | |
108 '/etc', | |
109 '/private/var', | |
110 '/tmp', | |
111 '/usr', | |
112 '/var', | |
113 ) | |
114 | |
115 # pylint: disable=C0301 | |
116 # To understand the following code, you'll want to take a look at: | |
117 # http://developers.sun.com/solaris/articles/dtrace_quickref/dtrace_quickref.h tml | |
118 # and | |
119 # https://wikis.oracle.com/display/DTrace/Variables | |
120 D_CODE = """ | |
121 /* Child process tracking. | |
122 * I'm really depressed that I need to do it myself. */ | |
123 dtrace:::BEGIN { | |
Roger Tawa OOO till Jul 10th
2012/03/15 18:07:16
triple ::: ? is that correct? same at line 134.
M-A Ruel
2012/03/15 18:09:55
Yep, that's how it works.
| |
124 trackedpid[ppid] = 1; | |
125 trackedpid[pid] = 1; | |
126 /* Signal gen_trace() that we are ready to trace. */ | |
127 printf("Go! 1 %d:%d %s", ppid, pid, execname); | |
128 } | |
129 | |
130 /* Make sure all child processes are tracked. This is not very efficient | |
131 * but for our use case, it's fine enough. | |
132 * TODO(maruel): We should properly track fork, execve, vfork and friends | |
133 * instead. */ | |
134 syscall:::entry /trackedpid[ppid]/ { | |
135 trackedpid[pid] = 1; | |
Roger Tawa OOO till Jul 10th
2012/03/15 18:07:16
should you be using ppid here?
M-A Ruel
2012/03/15 18:09:55
No. What it is doing is: if the parent's process i
| |
136 } | |
137 syscall::exit:entry /trackedpid[pid]/ { | |
138 trackedpid[pid] = 0; | |
139 } | |
140 | |
141 /* Finally what we care about! */ | |
142 syscall::open*:entry /trackedpid[pid]/ { | |
143 self->arg0 = copyinstr(arg0); | |
144 self->arg1 = arg1; | |
145 self->arg2 = arg2; | |
146 } | |
147 syscall::open*:return /trackedpid[pid]/ { | |
148 printf("%d:%d \\"%s\\"; \\"%s\\"; \\"%d\\"; \\"%d\\" = %d", | |
149 ppid, pid, execname, self->arg0, self->arg1, self->arg2, errno); | |
150 self->arg0 = 0; | |
151 self->arg1 = 0; | |
152 self->arg2 = 0; | |
153 } | |
154 """ | |
155 | |
156 @classmethod | |
157 def gen_trace(cls, cmd, cwd, logname): | |
158 """Runs dtrace on an executable.""" | |
159 silent = not isEnabledFor(logging.DEBUG) | |
160 print 'Running: %s' % cmd | |
161 signal = 'Go!' | |
162 logging.debug('Our pid: %d' % os.getpid()) | |
163 | |
164 # Part 1: start the child process. | |
165 stdout = stderr = None | |
166 if silent: | |
167 stdout = subprocess.PIPE | |
168 stderr = subprocess.PIPE | |
169 child_cmd = [ | |
170 sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'), | |
171 ] | |
172 child = subprocess.Popen( | |
173 child_cmd + cmd, | |
174 stdin=subprocess.PIPE, | |
175 stdout=stdout, | |
176 stderr=stderr, | |
177 cwd=cwd) | |
178 logging.debug('Started child pid: %d' % child.pid) | |
179 | |
180 # Part 2: start dtrace process. | |
181 trace_cmd = [ | |
182 'sudo', | |
183 'dtrace', | |
184 '-x', 'dynvarsize=4m', | |
185 '-x', 'evaltime=exec', | |
186 '-n', cls.D_CODE, | |
187 '-o', '/dev/stderr', | |
188 '-p', str(child.pid), | |
189 ] | |
190 with open(logname, 'w') as logfile: | |
191 dtrace = subprocess.Popen( | |
192 trace_cmd, stdout=logfile, stderr=subprocess.STDOUT) | |
193 logging.debug('Started dtrace pid: %d' % dtrace.pid) | |
194 | |
195 # Part 3: Read until the Go! signal is sent. | |
196 with open(logname, 'r') as logfile: | |
197 while True: | |
198 x = logfile.readline() | |
199 if signal in x: | |
200 break | |
201 | |
202 # Part 4: We can now tell our child to go. | |
203 child.communicate(signal) | |
204 | |
205 dtrace.wait() | |
206 if dtrace.returncode != 0: | |
207 print 'Failure: %d' % dtrace.returncode | |
208 with open(logname) as logfile: | |
209 print ''.join(logfile.readlines()[-100:]) | |
210 # Find a better way. | |
211 os.remove(logname) | |
212 return dtrace.returncode | |
213 | |
214 @staticmethod | |
215 def parse_log(filename, blacklist): | |
216 """Processes a dtrace log and returns the files opened and the files that do | |
217 not exist. | |
218 | |
219 Most of the time, files that do not exist are temporary test files that | |
220 should be put in /tmp instead. See http://crbug.com/116251 | |
221 | |
222 TODO(maruel): Process chdir() calls so relative paths can be processed. | |
223 """ | |
224 files = set() | |
225 non_existent = set() | |
226 ignored = set() | |
227 # 1=filepath, 2=flags, 3=mode, 4=result | |
228 RE = re.compile( | |
229 # CPU ID PROBE PPID PID EXECNAME | |
230 r'^\s+\d+\s+\d+\s+open[^\:]+:return \d+:\d+ \"[^\"]+\"; ' | |
231 # PATH FLAGS MODE RESULT | |
232 r'\"([^\"]+)\"; \"([^\"]+)\"; \"([^\"]+)\" \= (.+)$') | |
233 for line in open(filename): | |
234 m = RE.match(line) | |
235 if not m: | |
236 continue | |
237 if not m.group(4).startswith('0'): | |
238 # Called failed. | |
239 continue | |
240 filepath = m.group(1) | |
241 if blacklist(filepath): | |
242 continue | |
243 if (filepath not in files and | |
244 filepath not in non_existent and | |
245 filepath not in ignored): | |
246 if os.path.isfile(filepath): | |
247 files.add(filepath) | |
248 elif not os.path.isdir(filepath): | |
249 # Silently ignore directories. | |
250 non_existent.add(filepath) | |
251 else: | |
252 ignored.add(filepath) | |
253 return files, non_existent | |
254 | |
255 | |
256 def relevant_files(files, root): | |
257 """Trims the list of files to keep the expected files and unexpected files. | |
258 | |
259 Unexpected files are files that are not based inside the |root| directory. | |
260 """ | |
261 expected = [] | |
262 unexpected = [] | |
263 for f in files: | |
264 if f.startswith(root): | |
265 expected.append(f[len(root):]) | |
266 else: | |
267 unexpected.append(f) | |
268 return sorted(set(expected)), sorted(set(unexpected)) | |
269 | |
270 | |
271 def extract_directories(files, root): | |
272 """Detects if all the files in a directory were loaded and if so, replace the | |
273 individual files by the directory entry. | |
274 """ | |
275 directories = set(os.path.dirname(f) for f in files) | |
276 files = set(files) | |
277 for directory in sorted(directories, reverse=True): | |
278 actual = set( | |
279 os.path.join(directory, f) for f in | |
280 os.listdir(os.path.join(root, directory)) | |
281 if not f.endswith(('.svn', '.pyc')) | |
282 ) | |
283 if not (actual - files): | |
284 files -= actual | |
285 files.add(directory + '/') | |
286 return sorted(files) | |
287 | |
288 | |
289 def trace_inputs(log, cmd, api): | |
290 """Tries to load the logs if available. If not, trace the test.""" | |
291 logname = os.path.join(BASE_DIR, os.path.basename(log)) | |
292 if not os.path.isfile(logname): | |
293 print 'Tracing... %s' % cmd | |
294 returncode = api.gen_trace(cmd, ROOT_DIR, logname) | |
295 if returncode: | |
296 return returncode | |
297 | |
298 def blacklist(f): | |
299 """Strips ignored paths.""" | |
300 return f.startswith(api.IGNORED) or f.endswith('.pyc') | |
301 | |
302 print 'Loading traces... %s' % logname | |
303 files, non_existent = api.parse_log(logname, blacklist) | |
304 print('Total: %d' % len(files)) | |
305 print('Non existent: %d' % len(non_existent)) | |
306 for f in non_existent: | |
307 print(' %s' % f) | |
308 | |
309 expected, unexpected = relevant_files(files, ROOT_DIR + '/') | |
310 if unexpected: | |
311 print('Unexpected: %d' % len(unexpected)) | |
312 for f in unexpected: | |
313 print(' %s' % f) | |
314 | |
315 simplified = extract_directories(expected, ROOT_DIR) | |
316 print('Interesting: %d reduced to %d' % (len(expected), len(simplified))) | |
317 for f in simplified: | |
318 print(' %s' % f) | |
319 | |
320 return 0 | |
321 | |
322 | |
323 def main(): | |
324 parser = optparse.OptionParser( | |
325 usage='%prog <options> [cmd line...]') | |
326 parser.allow_interspersed_args = False | |
327 parser.add_option( | |
328 '-v', '--verbose', action='count', default=0, help='Use multiple times') | |
329 parser.add_option('-l', '--log', help='Log file') | |
330 | |
331 options, args = parser.parse_args() | |
332 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)] | |
333 logging.basicConfig( | |
334 level=level, | |
335 format='%(levelname)5s %(module)15s(%(lineno)3d):%(message)s') | |
336 | |
337 if not args: | |
338 parser.error('Must supply a command to run') | |
339 if not options.log: | |
340 parser.error('Must supply a log file with -l') | |
341 | |
342 if sys.platform == 'linux2': | |
343 api = Strace | |
344 elif sys.platform == 'darwin': | |
345 api = Dtrace | |
346 else: | |
347 print >> sys.stderr, 'Unsupported platform' | |
348 return 1 | |
349 | |
350 return trace_inputs(options.log, args, api) | |
351 | |
352 | |
353 if __name__ == '__main__': | |
354 sys.exit(main()) | |
OLD | NEW |